| Package | Description |
|---|---|
| us.codecraft.webmagic |
Main class "Spider" and models.
|
| us.codecraft.webmagic.selector |
Selectors for page extraction.
|
| Modifier and Type | Method and Description |
|---|---|
Selectable |
Page.getUrl()
get url of current page
|
| Modifier and Type | Method and Description |
|---|---|
void |
Page.setUrl(Selectable url) |
| Modifier and Type | Class and Description |
|---|---|
class |
AbstractSelectable |
class |
Html
Selectable html.
|
class |
HtmlNode |
class |
Json
parse json
|
class |
PlainText
Selectable plain text.
Can not be selected by XPath or CSS Selector. |
| Modifier and Type | Method and Description |
|---|---|
Selectable |
PlainText.$(java.lang.String selector) |
Selectable |
Selectable.$(java.lang.String selector)
select list with css selector
|
Selectable |
HtmlNode.$(java.lang.String selector) |
Selectable |
PlainText.$(java.lang.String selector,
java.lang.String attrName) |
Selectable |
Selectable.$(java.lang.String selector,
java.lang.String attrName)
select list with css selector
|
Selectable |
HtmlNode.$(java.lang.String selector,
java.lang.String attrName) |
Selectable |
Selectable.css(java.lang.String selector)
select list with css selector
|
Selectable |
AbstractSelectable.css(java.lang.String selector) |
Selectable |
Selectable.css(java.lang.String selector,
java.lang.String attrName)
select list with css selector
|
Selectable |
AbstractSelectable.css(java.lang.String selector,
java.lang.String attrName) |
Selectable |
Selectable.jsonPath(java.lang.String jsonPath)
extract by JSON Path expression
|
Selectable |
Json.jsonPath(java.lang.String jsonPath) |
Selectable |
AbstractSelectable.jsonPath(java.lang.String jsonPath) |
Selectable |
PlainText.links() |
Selectable |
Selectable.links()
select all links
|
Selectable |
HtmlNode.links() |
Selectable |
Selectable.regex(java.lang.String regex)
select list with regex, default group is group 1
|
Selectable |
AbstractSelectable.regex(java.lang.String regex) |
Selectable |
Selectable.regex(java.lang.String regex,
int group)
select list with regex
|
Selectable |
AbstractSelectable.regex(java.lang.String regex,
int group) |
Selectable |
Selectable.replace(java.lang.String regex,
java.lang.String replacement)
replace with regex
|
Selectable |
AbstractSelectable.replace(java.lang.String regex,
java.lang.String replacement) |
Selectable |
Selectable.select(Selector selector)
extract by custom selector
|
Selectable |
HtmlNode.select(Selector selector) |
Selectable |
AbstractSelectable.select(Selector selector) |
protected Selectable |
AbstractSelectable.select(Selector selector,
java.util.List<java.lang.String> strings) |
protected Selectable |
HtmlNode.selectElements(BaseElementSelector elementSelector)
select elements
|
Selectable |
Selectable.selectList(Selector selector)
extract by custom selector
|
Selectable |
HtmlNode.selectList(Selector selector) |
Selectable |
AbstractSelectable.selectList(Selector selector) |
protected Selectable |
AbstractSelectable.selectList(Selector selector,
java.util.List<java.lang.String> strings) |
Selectable |
PlainText.smartContent() |
Selectable |
Selectable.smartContent()
select smart content with ReadAbility algorithm
|
Selectable |
HtmlNode.smartContent() |
Selectable |
PlainText.xpath(java.lang.String xpath) |
Selectable |
Selectable.xpath(java.lang.String xpath)
select list with xpath
|
Selectable |
HtmlNode.xpath(java.lang.String xpath) |
| Modifier and Type | Method and Description |
|---|---|
java.util.List<Selectable> |
PlainText.nodes() |
java.util.List<Selectable> |
Selectable.nodes()
get all nodes
|
java.util.List<Selectable> |
HtmlNode.nodes() |
Copyright © 2020. All rights reserved.