public class HtmlScraper extends Object
Modifier and Type | Field and Description |
---|---|
private org.jsoup.nodes.Document |
doc |
private String |
label |
private LinkedHashSet<String> |
listIterableObjects |
private static org.slf4j.Logger |
LOGGER |
private Map<String,List<org.apache.jena.graph.Triple>> |
selectedMap |
private Map<String,List<org.apache.jena.graph.Triple>> |
staticMap |
static Comparator<org.apache.jena.graph.Triple> |
tripleComparator |
private LinkedHashSet<org.apache.jena.graph.Node> |
updatedObjects |
private String |
uri |
private Map<String,YamlFile> |
yamlFiles |
Constructor and Description |
---|
HtmlScraper() |
HtmlScraper(File file) |
Modifier and Type | Method and Description |
---|---|
private List<org.apache.jena.graph.Node> |
jsoupQuery(String cssQ) |
private String |
replaceCommands(String s) |
List<org.apache.jena.graph.Triple> |
scrape(String uri,
File filetToScrape) |
private Set<org.apache.jena.graph.Triple> |
scrapeDownloadLink(Map<String,Object> resources,
File htmlFile) |
private Set<org.apache.jena.graph.Triple> |
scrapeTree(Map<String,Object> mapEntry,
Set<org.apache.jena.graph.Triple> triples,
Stack<org.apache.jena.graph.Node> stackNode) |
private List<org.apache.jena.graph.Triple> |
updateRelationship(List<org.apache.jena.graph.Triple> listTriples)
Update the triples with nested objects
|
private static final org.slf4j.Logger LOGGER
private LinkedHashSet<org.apache.jena.graph.Node> updatedObjects
private LinkedHashSet<String> listIterableObjects
private String uri
private String label
private org.jsoup.nodes.Document doc
public static Comparator<org.apache.jena.graph.Triple> tripleComparator
public HtmlScraper(File file)
public HtmlScraper()
public List<org.apache.jena.graph.Triple> scrape(String uri, File filetToScrape) throws Exception
Exception
private List<org.apache.jena.graph.Triple> updateRelationship(List<org.apache.jena.graph.Triple> listTriples)
listTriples
- private Set<org.apache.jena.graph.Triple> scrapeDownloadLink(Map<String,Object> resources, File htmlFile) throws Exception
Exception
private Set<org.apache.jena.graph.Triple> scrapeTree(Map<String,Object> mapEntry, Set<org.apache.jena.graph.Triple> triples, Stack<org.apache.jena.graph.Node> stackNode) throws Exception
mapEntry
- triples
- stackNode
- MalformedURLException
Exception
Copyright © 2017–2020. All rights reserved.