@MetaInfServices(value=org.apache.any23.extractor.Extractor.class) public class HTMLScraperExtractor extends Object implements Extractor.ContentExtractor
HTMLScraperPluginExtractor.BlindExtractor, Extractor.ContentExtractor, Extractor.TagSoupDOMExtractor| Modifier and Type | Field and Description |
|---|---|
static org.openrdf.model.URI |
PAGE_CONTENT_AE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_CE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_DE_PROPERTY |
static org.openrdf.model.URI |
PAGE_CONTENT_LCE_PROPERTY |
| Constructor and Description |
|---|
HTMLScraperExtractor() |
| Modifier and Type | Method and Description |
|---|---|
void |
addTextExtractor(String name,
org.openrdf.model.URI property,
de.l3s.boilerpipe.BoilerpipeExtractor extractor) |
ExtractorDescription |
getDescription() |
String[] |
getTextExtractors() |
void |
run(ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
InputStream inputStream,
ExtractionResult extractionResult) |
void |
setStopAtFirstError(boolean b) |
public static final org.openrdf.model.URI PAGE_CONTENT_DE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_AE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_LCE_PROPERTY
public static final org.openrdf.model.URI PAGE_CONTENT_CE_PROPERTY
public void addTextExtractor(String name, org.openrdf.model.URI property, de.l3s.boilerpipe.BoilerpipeExtractor extractor)
public String[] getTextExtractors()
public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream inputStream, ExtractionResult extractionResult) throws IOException, ExtractionException
run in interface Extractor<InputStream>IOExceptionExtractionExceptionpublic ExtractorDescription getDescription()
getDescription in interface Extractor<InputStream>public void setStopAtFirstError(boolean b)
setStopAtFirstError in interface Extractor.ContentExtractorCopyright © 2010-2013 The Apache Software Foundation. All Rights Reserved.