CSkin博客
标题: Java实例——基于jsoup的简单爬虫添加代理示例 [打印本页]
作者: suger 时间: 2020-8-18 17:09
标题: Java实例——基于jsoup的简单爬虫添加代理示例
import java.io.IOException;
import java.net.Authenticator;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class Demo{
// 代理隧道验证信息
final static String ProxyUser = "16KASDA";
final static String ProxyPass = "1231321";
// 代理服务器
final static String ProxyHost = "t.16yun.cn";
final static Integer ProxyPort = 31111;
// 设置IP切换头
final static String ProxyHeadKey = "Proxy-Tunnel";
public static String getUrlProxyContent(String url)
{
Authenticator.setDefault(new Authenticator() {
public PasswordAuthentication getPasswordAuthentication()
{
return new PasswordAuthentication(ProxyUser, ProxyPass.toCharArray());
}
});
// 设置Proxy-Tunnel
Random random = new Random();
int tunnel = random.nextInt(10000);
String ProxyHeadVal = String.valueOf(tunnel);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ProxyHost, ProxyPort));
try
{
// 处理异常、其他参数
Document doc = Jsoup.connect(url).timeout(3000).header(ProxyHeadKey, ProxyHeadVal).proxy(proxy).get();
if(doc != null) {
System.out.println(doc.body().html());
}
}
catch (IOException e)
{
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception
{
// 要访问的目标页面
String targetUrl = "http://httpbin.org/ip";
getUrlProxyContent(targetUrl);
}}
欢迎光临 CSkin博客 (http://bbs.cskin.net/) |
Powered by Discuz! X3.2 |