- import java.io.FileReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.Reader;
- import java.net.URL;
- import java.net.URLConnection;
- import java.io.*;
- import java.net.*;
- import javax.swing.text.*;
- import javax.swing.text.html.*;
- public class ReadTextFromHTML {
- public static void main(String[] args) throws Exception{
- EditorKit kit = new HTMLEditorKit();
- Document doc = kit.createDefaultDocument();
- // The Document class does not yet handle charset's properly.
- doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
- // Create a reader on the HTML content.
- Reader rd = getReader("http://theprogrammersfirst.blogspot.com/2016/06/javalangexception-error-getting-all.html");
- // Parse the HTML.
- kit.read(rd, doc, 0);
- // The HTML text is now stored in the document
- String s=(doc.getText(0, doc.getLength())).trim().replaceAll(" \n+", " ");
- System.out.println(s);
- }
- static Reader getReader(String uri)
- throws IOException
- {
- // Retrieve from Internet.
- if (uri.startsWith("http:"))
- {System.out.println(uri);
- URLConnection conn = new URL(uri).openConnection();
- return new InputStreamReader(conn.getInputStream());
- }
- // Retrieve from file.
- else
- {
- return new FileReader(uri);
- }
- }
- }
2017-05-07
java program to read text from html file
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment