"))
+ || ((text.startsWith("") || text.startsWith(" | "))) {
+ text = "";
+ }
+ return text;
+ }
+
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/SiteTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/SiteTest.java
new file mode 100644
index 000000000..783b82ddc
--- /dev/null
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/SiteTest.java
@@ -0,0 +1,17 @@
+package us.codecraft.webmagic;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Test;
+
+public class SiteTest {
+
+ @Test
+ public void test() {
+ Site site = Site.me().setDefaultCharset(StandardCharsets.UTF_8.name());
+ assertEquals(StandardCharsets.UTF_8.name(), site.getDefaultCharset());
+ }
+
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/SimpleProxyProviderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/SimpleProxyProviderTest.java
index 6495b16bf..e9325a7a7 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/SimpleProxyProviderTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/SimpleProxyProviderTest.java
@@ -1,6 +1,9 @@
package us.codecraft.webmagic.proxy;
import org.junit.Test;
+import org.mockito.Mockito;
+
+import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
@@ -20,11 +23,12 @@ public void test_get_proxy() throws Exception {
Proxy originProxy1 = new Proxy("127.0.0.1", 1087);
Proxy originProxy2 = new Proxy("127.0.0.1", 1088);
SimpleProxyProvider proxyProvider = SimpleProxyProvider.from(originProxy1, originProxy2);
- Proxy proxy = proxyProvider.getProxy(TASK);
+ Request request = Mockito.mock(Request.class);
+ Proxy proxy = proxyProvider.getProxy(request, TASK);
assertThat(proxy).isEqualTo(originProxy1);
- proxy = proxyProvider.getProxy(TASK);
+ proxy = proxyProvider.getProxy(request, TASK);
assertThat(proxy).isEqualTo(originProxy2);
- proxy = proxyProvider.getProxy(TASK);
+ proxy = proxyProvider.getProxy(request, TASK);
assertThat(proxy).isEqualTo(originProxy1);
}
}
diff --git a/webmagic-coverage/pom.xml b/webmagic-coverage/pom.xml
index e2c0f741c..a0a5ffb48 100644
--- a/webmagic-coverage/pom.xml
+++ b/webmagic-coverage/pom.xml
@@ -8,7 +8,7 @@
us.codecraft
webmagic-parent
- 0.8.0
+ 0.9.0
webmagic-coverage
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 05d6100a6..7cf0aa617 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.8.0
+ 0.9.0
4.0.0
@@ -14,6 +14,11 @@
redis.clients
jedis
+
+ org.assertj
+ assertj-core
+ test
+
com.google.guava
guava
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
index b213dda94..50dbcaf1a 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
@@ -1,21 +1,25 @@
package us.codecraft.webmagic.monitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.Request;
-import us.codecraft.webmagic.Spider;
-import us.codecraft.webmagic.SpiderListener;
-import us.codecraft.webmagic.utils.Experimental;
-import us.codecraft.webmagic.utils.UrlUtils;
-
-import javax.management.*;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import javax.management.InstanceAlreadyExistsException;
+import javax.management.JMException;
+import javax.management.MBeanRegistrationException;
+import javax.management.MBeanServer;
+import javax.management.MalformedObjectNameException;
+import javax.management.NotCompliantMBeanException;
+import javax.management.ObjectName;
+
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.SpiderListener;
+import us.codecraft.webmagic.utils.Experimental;
+import us.codecraft.webmagic.utils.UrlUtils;
+
/**
* @author code4crafer@gmail.com
* @since 0.5.0
@@ -23,17 +27,13 @@
@Experimental
public class SpiderMonitor {
- private static SpiderMonitor INSTANCE = new SpiderMonitor();
-
- private AtomicBoolean started = new AtomicBoolean(false);
-
- private Logger logger = LoggerFactory.getLogger(getClass());
+ private static final SpiderMonitor INSTANCE = new SpiderMonitor();
private MBeanServer mbeanServer;
private String jmxServerName;
- private List spiderStatuses = new ArrayList();
+ private List spiderStatuses = new ArrayList<>();
protected SpiderMonitor() {
jmxServerName = "WebMagic";
@@ -51,7 +51,7 @@ public synchronized SpiderMonitor register(Spider... spiders) throws JMException
for (Spider spider : spiders) {
MonitorSpiderListener monitorSpiderListener = new MonitorSpiderListener();
if (spider.getSpiderListeners() == null) {
- List spiderListeners = new ArrayList();
+ List spiderListeners = new ArrayList<>();
spiderListeners.add(monitorSpiderListener);
spider.setSpiderListeners(spiderListeners);
} else {
@@ -90,7 +90,7 @@ public void onSuccess(Request request) {
}
@Override
- public void onError(Request request) {
+ public void onError(Request request, Exception e) {
errorUrls.add(request.getUrl());
errorCount.incrementAndGet();
}
@@ -109,7 +109,6 @@ public List getErrorUrls() {
}
protected void registerMBean(SpiderStatusMXBean spiderStatus) throws MalformedObjectNameException, InstanceAlreadyExistsException, MBeanRegistrationException, NotCompliantMBeanException {
-// ObjectName objName = new ObjectName(jmxServerName + ":name=" + spiderStatus.getName());
ObjectName objName = new ObjectName(jmxServerName + ":name=" + UrlUtils.removePort(spiderStatus.getName()));
mbeanServer.registerMBean(spiderStatus, objName);
}
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 449fcf243..e42e1fcd8 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.8.0
+ 0.9.0
4.0.0
@@ -27,22 +27,22 @@
org.mapdb
mapdb
- 3.0.8
+ 3.0.9
com.fasterxml.jackson.core
jackson-core
- 2.13.0-rc1
+ 2.15.2
com.fasterxml.jackson.core
jackson-annotations
- 2.13.0-rc1
+ 2.15.2
com.fasterxml.jackson.core
jackson-databind
- 2.13.4.2
+ 2.15.2
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index b73f6fd27..c5238760b 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.8.0
+ 0.9.0
4.0.0
diff --git a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/JaxpSelectorUtils.java b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/JaxpSelectorUtils.java
new file mode 100644
index 000000000..b03f3a2ab
--- /dev/null
+++ b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/JaxpSelectorUtils.java
@@ -0,0 +1,61 @@
+package us.codecraft.webmagic.selector;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * @author hooy
+ */
+public final class JaxpSelectorUtils {
+
+ private JaxpSelectorUtils() {
+ throw new RuntimeException("The util class cannot be instanced");
+ }
+
+ public static List NodeListToArrayList(NodeList nodes) {
+ List list = new ArrayList<>(nodes.getLength());
+ for (int i = 0; i < nodes.getLength(); i++) {
+ list.add(nodes.item(i));
+ }
+ return list;
+ }
+
+ public static String nodeToString(Node node) throws TransformerException {
+ List before = Collections.singletonList(node);
+ List after = nodesToStrings(before);
+ if (after.size() > 0) {
+ return after.get(0);
+ } else {
+ return null;
+ }
+ }
+
+ public static List nodesToStrings(List nodes) throws TransformerException {
+ List results = new ArrayList<>(nodes.size());
+ Transformer transformer = TransformerFactory.newInstance().newTransformer();
+ StreamResult xmlOutput = new StreamResult();
+ transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ for (Node node : nodes) {
+ if (node.getNodeType() == Node.ATTRIBUTE_NODE || node.getNodeType() == Node.TEXT_NODE) {
+ results.add(node.getTextContent());
+ } else {
+ xmlOutput.setWriter(new StringWriter());
+ transformer.transform(new DOMSource(node), xmlOutput);
+ results.add(xmlOutput.getWriter().toString());
+ }
+ }
+ return results;
+ }
+
+}
diff --git a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/NodeSelector.java b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/NodeSelector.java
new file mode 100644
index 000000000..3e6339dda
--- /dev/null
+++ b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/NodeSelector.java
@@ -0,0 +1,32 @@
+package us.codecraft.webmagic.selector;
+
+import org.w3c.dom.Node;
+
+import java.util.List;
+
+/**
+ * Selector(extractor) for html node.
+ *
+ * @author hooy
+ * @since 0.8.0
+ */
+public interface NodeSelector {
+
+ /**
+ * Extract single result in text.
+ * If there are more than one result, only the first will be chosen.
+ *
+ * @param node node
+ * @return result
+ */
+ String select(Node node);
+
+ /**
+ * Extract all results in text.
+ *
+ * @param node node
+ * @return results
+ */
+ List selectList(Node node);
+
+}
diff --git a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
index 9d5eef9b0..6c5d7b332 100644
--- a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
+++ b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
@@ -1,18 +1,10 @@
package us.codecraft.webmagic.selector;
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import javax.xml.namespace.NamespaceContext;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
@@ -29,21 +21,24 @@
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.xpath.XPathEvaluator;
+import us.codecraft.webmagic.utils.BaseSelectorUtils;
+
+import static us.codecraft.webmagic.selector.JaxpSelectorUtils.*;
/**
* 支持xpath2.0的选择器。包装了HtmlCleaner和Saxon HE。
*
- * @author code4crafter@gmail.com
- * Date: 13-4-21
- * Time: 上午9:39
+ * @author code4crafter@gmail.com, hooy
+ * Date: 13-4-21
+ * Time: 上午9:39
*/
-public class Xpath2Selector implements Selector {
+public class Xpath2Selector implements Selector, NodeSelector {
- private String xpathStr;
+ private final String xpathStr;
private XPathExpression xPathExpression;
- private Logger logger = LoggerFactory.getLogger(getClass());
+ private final Logger logger = LoggerFactory.getLogger(getClass());
public Xpath2Selector(String xpathStr) {
this.xpathStr = xpathStr;
@@ -54,25 +49,25 @@ public Xpath2Selector(String xpathStr) {
}
}
+ public static Xpath2Selector newInstance(String xpathStr) {
+ return new Xpath2Selector(xpathStr);
+ }
+
enum XPath2NamespaceContext implements NamespaceContext {
INSTANCE;
- private final Map prefix2NamespaceMap = new ConcurrentHashMap();
+ private final Map prefix2NamespaceMap = new ConcurrentHashMap<>();
- private final Map> namespace2PrefixMap = new ConcurrentHashMap>();
+ private final Map> namespace2PrefixMap = new ConcurrentHashMap<>();
private void put(String prefix, String namespaceURI) {
prefix2NamespaceMap.put(prefix, namespaceURI);
- List prefixes = namespace2PrefixMap.get(namespaceURI);
- if (prefixes == null) {
- prefixes = new ArrayList();
- namespace2PrefixMap.put(namespaceURI, prefixes);
- }
+ List prefixes = namespace2PrefixMap.computeIfAbsent(namespaceURI, k -> new ArrayList<>());
prefixes.add(prefix);
}
- private XPath2NamespaceContext() {
+ XPath2NamespaceContext() {
put("fn", NamespaceConstant.FN);
put("xslt", NamespaceConstant.XSLT);
put("xhtml", NamespaceConstant.XHTML);
@@ -111,32 +106,18 @@ private void init() throws XPathExpressionException {
@Override
public String select(String text) {
try {
- HtmlCleaner htmlCleaner = new HtmlCleaner();
- TagNode tagNode = htmlCleaner.clean(text);
- Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
- Object result;
- try {
- result = xPathExpression.evaluate(document, XPathConstants.NODESET);
- } catch (XPathExpressionException e) {
- result = xPathExpression.evaluate(document, XPathConstants.STRING);
- }
- if (result instanceof NodeList) {
- NodeList nodeList = (NodeList) result;
- if (nodeList.getLength() == 0) {
- return null;
- }
- Node item = nodeList.item(0);
- if (item.getNodeType() == Node.ATTRIBUTE_NODE || item.getNodeType() == Node.TEXT_NODE) {
- return item.getTextContent();
- } else {
- StreamResult xmlOutput = new StreamResult(new StringWriter());
- Transformer transformer = TransformerFactory.newInstance().newTransformer();
- transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
- transformer.transform(new DOMSource(item), xmlOutput);
- return xmlOutput.getWriter().toString();
- }
- }
- return result.toString();
+ Document doc = parse(text);
+ return select(doc);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ @Override
+ public String select(Node node) {
+ try {
+ return (String) xPathExpression.evaluate(node, XPathConstants.STRING);
} catch (Exception e) {
logger.error("select text error! " + xpathStr, e);
}
@@ -145,38 +126,72 @@ public String select(String text) {
@Override
public List selectList(String text) {
- List results = new ArrayList();
try {
- HtmlCleaner htmlCleaner = new HtmlCleaner();
- TagNode tagNode = htmlCleaner.clean(text);
- Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
- Object result;
- try {
- result = xPathExpression.evaluate(document, XPathConstants.NODESET);
- } catch (XPathExpressionException e) {
- result = xPathExpression.evaluate(document, XPathConstants.STRING);
- }
- if (result instanceof NodeList) {
- NodeList nodeList = (NodeList) result;
- Transformer transformer = TransformerFactory.newInstance().newTransformer();
- StreamResult xmlOutput = new StreamResult();
- transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
- for (int i = 0; i < nodeList.getLength(); i++) {
- Node item = nodeList.item(i);
- if (item.getNodeType() == Node.ATTRIBUTE_NODE || item.getNodeType() == Node.TEXT_NODE) {
- results.add(item.getTextContent());
- } else {
- xmlOutput.setWriter(new StringWriter());
- transformer.transform(new DOMSource(item), xmlOutput);
- results.add(xmlOutput.getWriter().toString());
- }
- }
- } else {
- results.add(result.toString());
- }
+ Document doc = parse(text);
+ return selectList(doc);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ @Override
+ public List selectList(Node node) {
+ try {
+ NodeList result = (NodeList) xPathExpression.evaluate(node, XPathConstants.NODESET);
+ List nodes = NodeListToArrayList(result);
+ return nodesToStrings(nodes);
} catch (Exception e) {
logger.error("select text error! " + xpathStr, e);
}
- return results;
+ return null;
}
+
+ public Node selectNode(String text) {
+ try {
+ Document doc = parse(text);
+ return selectNode(doc);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ public Node selectNode(Node node) {
+ try {
+ return (Node) xPathExpression.evaluate(node, XPathConstants.NODE);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ public List selectNodes(String text) {
+ try {
+ Document doc = parse(text);
+ return selectNodes(doc);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ public List selectNodes(Node node) {
+ try {
+ NodeList result = (NodeList) xPathExpression.evaluate(node, XPathConstants.NODESET);
+ return NodeListToArrayList(result);
+ } catch (Exception e) {
+ logger.error("select text error! " + xpathStr, e);
+ }
+ return null;
+ }
+
+ protected static Document parse(String text) throws ParserConfigurationException {
+ // HtmlCleaner could not parse or | tag directly
+ text = BaseSelectorUtils.preParse(text);
+ HtmlCleaner htmlCleaner = new HtmlCleaner();
+ TagNode tagNode = htmlCleaner.clean(text);
+ return new DomSerializer(new CleanerProperties()).createDOM(tagNode);
+ }
+
}
diff --git a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
index 166188361..4033fcfbd 100644
--- a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
+++ b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
@@ -11,9 +11,15 @@
import org.junit.Ignore;
import org.junit.Test;
+import org.w3c.dom.Node;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.xsoup.XPathEvaluator;
import us.codecraft.xsoup.Xsoup;
+import javax.xml.transform.TransformerException;
+
/**
* @author code4crafter@gmail.com Date: 13-4-21 Time: 上午10:06
*/
@@ -1389,31 +1395,31 @@ public void testXpath2Selector() {
@Test
public void performanceTest() {
Xpath2Selector xpath2Selector = new Xpath2Selector("//a");
- long time =System.currentTimeMillis();
+ long time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
XpathSelector xpathSelector = new XpathSelector("//a");
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpathSelector.selectList(html);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html);
}
System.out.println(System.currentTimeMillis() - time);
CssSelector cssSelector = new CssSelector("a");
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
cssSelector.selectList(html);
}
- System.out.println("css "+(System.currentTimeMillis()-time));
+ System.out.println("css " + (System.currentTimeMillis() - time));
}
@Ignore("take long time")
@@ -1425,55 +1431,92 @@ public void parserPerformanceTest() throws XPatherException {
TagNode tagNode = htmlCleaner.clean(html);
Document document = Jsoup.parse(html);
- long time =System.currentTimeMillis();
+ long time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
System.out.println("=============");
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
Jsoup.parse(html);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
document.select("a");
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
System.out.println("=============");
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
System.out.println("=============");
XPathEvaluator compile = Xsoup.compile("//a");
- time =System.currentTimeMillis();
+ time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
compile.evaluate(document);
}
- System.out.println(System.currentTimeMillis()-time);
+ System.out.println(System.currentTimeMillis() - time);
}
+ /**
+ * New api test
+ *
+ * @author hooy
+ * @since 8.0
+ */
+ private String rank = " 点击榜排名 | 分类 | 书名/最新章节 | 作者 | 推荐 | 更新时间 |
---|
1. | 现实 | | | 0 | 11-24 22:32 | 2. | 架空 | | | 1047 | 03-04 14:44 | 3. | 现实 | | | 0 | 07-20 09:06 | 4. | 豪门 | | | 0 | 12-03 09:12 | 5. | 现实 | | | 0 | 02-01 21:12 | 6. | 玄奇 | | | 3455 | 02-28 12:31 | 7. | 玄奇 | | | 20614 | 03-31 12:37 | 8. | 复仇 | | | 55 | 06-03 11:43 | 9. | 穿越 | | | 0 | 10-27 18:50 | 10. | 宫斗 | | | 320 | 10-31 13:58 | 11. | 宫斗 | | | 6268 | 07-12 20:23 | 12. | 现实 | | | 0 | 01-18 23:00 | 13. | 婚恋 | | | 0 | 12-14 20:50 | 14. | 修真 | | | 0 | 02-03 23:40 | 15. | 豪门 | | | 0 | 11-06 23:38 | 16. | 穿越 | | | 191 | 12-02 23:37 | 17. | 穿越 | | | 412 | 10-13 22:39 | 18. | 豪门 | | | 635 | 07-01 13:15 | 19. | 架空 | | | 144 | 06-18 09:35 | 20. | 宅斗 | | | 1032 | 08-15 19:03 | 21. | 宫斗 | | | 0 | 09-30 20:32 | 22. | 豪门 | | | 0 | 06-05 11:31 | 23. | 重生 | | | 80 | 11-25 19:56 | 24. | 异世 | | | 68 | 01-12 10:06 | 25. | 豪门 | | | 0 | 05-29 18:46 | 26. | 婚恋 | | | 2778 | 11-04 17:48 | 27. | 玄奇 | | | 207 | 12-06 16:57 | 28. | 穿越 | | | 260 | 01-04 23:26 | 29. | 豪门 | | | 0 | 12-07 21:39 | 30. | 架空 | | | 1127 | 06-06 17:28 | 31. | 穿越 | | | 113 | 09-13 09:06 | 32. | 架空 | | | 597 | 02-14 18:47 | 33. | 玄奇 | | | 528 | 06-04 22:04 | 34. | 穿越 | | | 328 | 06-06 22:09 | 35. | 架空 | | | 539 | 05-24 14:42 | 36. | 架空 | | | 0 | 03-05 23:27 | 37. | 穿越 | | | 3215 | 08-21 16:38 | 38. | 宫斗 | | | 905 | 08-04 20:24 | 39. | 玄奇 | | | 1328 | 07-25 10:58 | 40. | 穿越 | | | 203 | 01-27 20:53 | 41. | 宫斗 | | | 407 | 08-31 09:03 | 42. | 宅斗 | | | 16 | 05-03 17:38 | 43. | 豪门 | | | 0 | 11-10 08:00 | 44. | 婚恋 | | | 0 | 07-12 21:37 | 45. | 架空 | | | 0 | 06-23 21:02 | 46. | 玄奇 | | | 1382 | 05-31 20:36 | 47. | 重生 | | | 334 | 07-16 19:19 | 48. | 婚恋 | | | 505 | 11-01 16:42 | 49. | 婚恋 | | | 0 | 10-19 18:32 | 50. | 豪门 | | | 540 | 09-19 19:18 | 51. | 婚恋 | | | 226 | 03-18 13:09 | 52. | 穿越 | | | 1026 | 03-08 16:28 | 53. | 重生 | | | 304 | 02-19 10:25 | 54. | 玄奇 | | | 2617 | 02-15 20:57 | 55. | 穿越 | | | 199 | 09-04 19:43 | 56. | 同人 | | | 768 | 07-19 20:00 | 57. | 宅斗 | | | 0 | 02-13 18:13 | 58. | 豪门 | | | 0 | 11-12 22:23 | 59. | 架空 | | | 0 | 07-28 23:42 | 60. | 婚恋 | | | 0 | 02-03 23:09 | 61. | 豪门 | | | 285 | 01-07 19:21 | 62. | 重生 | | | 654 | 10-12 18:16 | 63. | 异能 | | | 617 | 06-18 20:23 | 64. | 宫斗 | | | 27 | 06-02 21:05 | 65. | 种田 | | | 206 | 08-31 19:23 | 66. | 宅斗 | | | 2444 | 08-19 15:51 | 67. | 宅斗 | | | 818 | 08-07 23:38 | 68. | 现代 | | | 0 | 12-23 17:02 | 69. | 玄奇 | | | 0 | 07-23 12:00 | 70. | 婚恋 | | | 0 | 11-01 16:43 | 71. | 豪门 | | | 0 | 09-12 00:01 | 72. | 架空 | | | 0 | 04-27 22:42 | 73. | 豪门 | | | 0 | 04-19 13:55 | 74. | 异能 | | | 62 | 07-30 00:00 | 75. | 穿越 | | | 1307 | 07-20 16:41 | 76. | 玄奇 | | | 12820 | 07-15 23:46 | 77. | 架空 | | | 828 | 06-06 17:54 | 78. | 宅斗 | | | 985 | 05-20 23:53 | 79. | 玄奇 | | | 4960 | 04-12 15:58 | 80. | 玄奇 | | | 245 | 03-02 23:11 | 81. | 宅斗 | | | 34 | 12-21 10:11 | 82. | 宅斗 | | | 1411 | 07-21 00:00 | 83. | 现代 | | | 0 | 07-31 10:10 | 84. | 玄奇 | | | 0 | 06-18 13:53 | 85. | 架空 | | | 0 | 12-03 23:41 | 86. | 玄奇 | | | 0 | 11-28 22:13 | 87. | 豪门 | | | 0 | 11-07 22:48 | 88. | 婚恋 | | | 0 | 08-29 23:15 | 89. | 种田 | | | 1831 | 08-21 16:38 | 90. | 豪门 | | | 0 | 07-11 21:25 | 91. | 豪门 | | | 0 | 06-13 15:37 | 92. | 豪门 | | | 0 | 05-07 22:10 | 93. | 豪门 | | | 0 | 02-28 00:01 | 94. | 豪门 | | | 304 | 12-16 07:30 | 95. | 婚恋 | | | 669 | 11-07 18:16 | 96. | 仙侠 | | | 54 | 09-25 19:51 | 97. | 豪门 | | | 655 | 08-31 13:02 | 98. | 现实 | | | 374 | 06-29 09:55 | 99. | 穿越 | | | 373 | 06-19 18:07 | 100. | 婚恋 | | | 159 | 06-04 21:05 |
";
+
+ @Test
+ public void testStringAPI() {
+ // testAPI: selectList(String) -> selectList(Node)
+ List items = new Xpath2Selector("//div[@class=\"bd\"]//tbody/tr").selectList(rank);
+ Assert.assertSame(100, items.size());
+ // testAPI: select(String) -> select(Node)
+ String name = new Xpath2Selector("//td[3]/div/a[1]/text()").select(items.get(10));
+ Assert.assertEquals("深宫安容传", name);
+ }
+
+ @Test
+ public void testNodeAPI() {
+ // testAPI: selectNodes(String) -> selectNodes(Node)
+ List items = new Xpath2Selector("//div[@class=\"bd\"]//tbody/tr").selectNodes(rank);
+ Assert.assertSame(100, items.size());
+ // testAPI: selectNode(Node)
+ Node item = new Xpath2Selector("./td[3]/div/a[1]").selectNode(items.get(10));
+ String name = new Xpath2Selector("./text()").select(item);
+ Assert.assertEquals("深宫安容传", name);
+ }
+
+ @Test
+ public void testUtilAPI() throws TransformerException {
+ Node item = Xpath2Selector.newInstance("//div[@class=\"bd\"]//tbody/tr[11]/td[3]/div/a[1]/text()").selectNode(rank);
+ // testAPI: nodeToString(Node) -> nodesToStrings(List)
+ String name = JaxpSelectorUtils.nodeToString(item);
+ Assert.assertEquals("深宫安容传", name);
+ }
+
}
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 3ec15f9af..0019ea3c8 100644
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.8.0
+ 0.9.0
4.0.0
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index 715d7731b..63682001f 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.8.0
+ 0.9.0
4.0.0
|