pom依赖
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.35.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
代码
String url = "https://pic.netbian.com/tupian/" + num + ".html";
// String url = "https://blog.cool88.top";
// 屏蔽HtmlUnit等系统 log
//
// LogFactory.getFactory().setAttribute("org.apache.commons.logging.Log","org.apache.commons.logging.impl.NoOpLog");
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
java.util.logging.Logger.getLogger("org.apache.http.client").setLevel(Level.OFF);
// HtmlUnit 模拟浏览器
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setJavaScriptEnabled(true); // 启用JS解释器,默认为true
webClient.getOptions().setCssEnabled(false); // 禁用css支持
webClient.getOptions().setThrowExceptionOnScriptError(false); // js运行错误时,是否抛出异常
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
// 支持ajax
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getOptions().setTimeout(5 * 1000); // 设置连接超时时间
HtmlPage htmlPage = webClient.getPage(url);
Thread.sleep(1000);
webClient.waitForBackgroundJavaScript(3 * 1000); // 等待js后台执行30秒
// 解析网页
String pageAsXml = htmlPage.asXml();
//System.out.println(pageAsXml);
Document document = Jsoup.parse(pageAsXml);
Element element = document.getElementById("img");
Elements pictures = element.select("img[src]");