Loading...
I have started an interesting project. I will illustrate a crawler based on JavaFX scripting. Though there is no specific reason beyond using JavaFX except to enhance my knowledge on JavaFX scripting.
Here is some preliminary code that I have already written:
Import statement and package declaration –
———————————————————
package com.we4tech.linkcrawler.fx.script;
import java.lang.*;
import java.io.*;
import java.net.*;
import java.util.*;
import org.xml.sax.SAXException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.NamedNodeMap;
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
Sample class skeleton –
————————————————————–
/**
* Define structure for Crawler class.
* @author hasan (hasan -AT- somewherein.net)
*/
public class Crawler {
/**
* Define the target url.
*/
public attribute url: String;
public attribute debug: Boolean;
private attribute foundLinks: String*;
/**
* Start crawling process.
*/
public operation start();
private operation followupLink(link: String);
/**
* Return a list of all available links.
*/
public function getLinks(): String*;
/**
* Print out debug output without timestamp and [LEVEL] prefix.
*/
private operation debug(message: String);
private operation error(message: String);
}
Implementation –
————————————————————————-
operation Crawler.start() {
debug(”Initiating crawler process..”);
debug(”URL is set to - {url}”);
debug(”Crawling just initiated.”);
followupLink(url);
}
operation Crawler.followupLink(link: String) {
debug(”Follow up link - {link}”);
if (link == null or link.length() == 0) {
error(”URL is empty”);
return;
} else {
try {
// Run in background thread through EDT.
do {
var parse = new DOMParser();
parse.parse(link);
var document = parse.getDocument();
var nodes = document.getElementsByTagName(”a”);
for (i in [0..nodes.getLength()]) {
var node = nodes.item(i);
var attributes = node.getAttributes();
if (attributes <> null) {
for (j in [0..attributes.getLength()]) {
var attr = attributes.item(j);
if (attr <> null) {
var nodeName = attr.getNodeName();
if (”href” == nodeName) {
var nodeValue = attr.getNodeValue();
debug(”{nodeName} link - {nodeValue}”);
insert nodeValue into foundLinks;
// start new follow up process.
followupLink(
}
}
}
}
}
}
} catch (e) {
error(”Error found during opening up new URLConnection - {e}”);
}
}
}
operation Crawler.debug(message: String) {
if (debug) {
System.out.println(”[DEBUG] - [{new Date()}] - {message}”);
}
}
operation Crawler.error(message: String) {
System.out.println(”[ERROR] - [{new Date()}] - {message}”);
}
function Crawler.getLinks() {
return foundLinks;
}
NOTE: I have published these codes to make some sense to those people who have started learning JavaFX. This might help other people to know more on it.
Best wishes,







| www.flickr.com |
Leave a reply