Skip to main content

HTMLEditorKit Parsing only getting 1 meta tag

No replies
samcogan
Offline
Joined: 2008-01-18
Points: 0

Hi, Im using the HTMLEditorKit to parse a HTML document. My aim is to pull all the cookies out of the http-equiv meta tags. However when I run the parser, it gets to the first meta tag, displays the attirbutes, then seems to skip all the other meta tags on move onto the rest of the page. I need it to display all the meta tags, any thoughts?

Code is below

package parsers;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;

public class Parser {
private CallbackHandler zHandler;
/**
* @param args
*/
public static void main(String[] args) throws MalformedURLException, IOException {
// TODO Auto-generated method stub

String URL="http://bbc.co.uk";

Parser p = new Parser(URL);

}

public Parser(String URL) throws MalformedURLException, IOException{
URL sURL= new URL(URL);
parse(sURL);
}

public void parse(URL sURL)throws IOException{
zHandler = new CallbackHandler();
BufferedReader zReader = new BufferedReader
(new InputStreamReader(sURL.openStream()));

new ParserDelegator().parse(zReader, zHandler,true);

}
public class CallbackHandler extends HTMLEditorKit.ParserCallback{

public void handleSimpleTag(HTML.Tag zTag, MutableAttributeSet zAttributes, int iPosition){
System.out.println(zTag.toString());
System.out.println(zAttributes.toString());
}

public void handleStartTag(HTML.Tag zTag, MutableAttributeSet zAttributes, int iPosition){
System.out.println(zTag.toString());
System.out.println(zAttributes.toString());
}
public void handleEndTag(HTML.Tag zTag, MutableAttributeSet zAttributes, int iPosition){
System.out.println(zTag.toString());
System.out.println(zAttributes.toString());
}
}
}