转:http://www.cnblogs.com/wasp520/archive/2012/06/28/2568897.html
使用httpClient可模拟请求Url获取资源,使用单线程的请求速度上会有一定的限制,参考了Apache给出的例子,自己做了测试实现多线程并发请求,以下代码需要HttpClient 4.2的包,可以在http://hc.apache.org/downloads.cgi下载
1、并发请求
package generate.httpclient; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.conn.ClientConnectionManager; import org.apache.http.conn.params.ConnManagerParams; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.PoolingClientConnectionManager; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import org.apache.http.util.EntityUtils; public class ThreadPoolHttpClient { // 线程池 private ExecutorService exe = null; // 线程池的容量 private static final int POOL_SIZE = 20; private HttpClient client = null; String[] urls=null; public ThreadPoolHttpClient(String[] urls){ this.urls=urls; } public void test() throws Exception { exe = Executors.newFixedThreadPool(POOL_SIZE); HttpParams params =new BasicHttpParams(); /* 从连接池中取连接的超时时间 */ ConnManagerParams.setTimeout(params, 1000); /* 连接超时 */ HttpConnectionParams.setConnectionTimeout(params, 2000); /* 请求超时 */ HttpConnectionParams.setSoTimeout(params, 4000); SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register( new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); //ClientConnectionManager cm = new PoolingClientConnectionManager(schemeRegistry); PoolingClientConnectionManager cm=new PoolingClientConnectionManager(schemeRegistry); cm.setMaxTotal(10); final HttpClient httpClient = new DefaultHttpClient(cm,params); // URIs to perform GETs on final String[] urisToGet = urls; /* 有多少url创建多少线程,url多时机子撑不住 // create a thread for each URI GetThread[] threads = new GetThread[urisToGet.length]; for (int i = 0; i < threads.length; i++) { HttpGet httpget = new HttpGet(urisToGet[i]); threads[i] = new GetThread(httpClient, httpget); } // start the threads for (int j = 0; j < threads.length; j++) { threads[j].start(); } // join the threads,等待所有请求完成 for (int j = 0; j < threads.length; j++) { threads[j].join(); } 使用线程池*/ for (int i = 0; i < urisToGet.length; i++) { final int j=i; System.out.println(j); HttpGet httpget = new HttpGet(urisToGet[i]); exe.execute( new GetThread(httpClient, httpget)); } //创建线程池,每次调用POOL_SIZE /* for (int i = 0; i < urisToGet.length; i++) { final int j=i; System.out.println(j); exe.execute(new Thread() { @Override public void run() { this.setName("threadsPoolClient"+j); try { this.sleep(100); System.out.println(j); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } HttpGet httpget = new HttpGet(urisToGet[j]); new GetThread(httpClient, httpget).get(); } }); } */ //exe.shutdown(); System.out.println("Done"); } static class GetThread extends Thread{ private final HttpClient httpClient; private final HttpContext context; private final HttpGet httpget; public GetThread(HttpClient httpClient, HttpGet httpget) { this.httpClient = httpClient; this.context = new BasicHttpContext(); this.httpget = httpget; } @Override public void run(){ this.setName("threadsPoolClient"); try { Thread.sleep(5000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } get(); } public void get() { try { HttpResponse response = this.httpClient.execute(this.httpget, this.context); HttpEntity entity = response.getEntity(); if (entity != null) { System.out.println(this.httpget.getURI()+": status"+response.getStatusLine().toString()); } // ensure the connection gets released to the manager EntityUtils.consume(entity); } catch (Exception ex) { this.httpget.abort(); }finally{ httpget.releaseConnection(); } } } }
2、多线程异步请求
package generate.httpclient; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.concurrent.FutureCallback; import org.apache.http.impl.nio.client.DefaultHttpAsyncClient; import org.apache.http.nio.client.HttpAsyncClient; import org.apache.http.nio.reactor.IOReactorException; public class AsynClient{ /** * @param args * @throws IOReactorException * @throws InterruptedException */ private List<String> urls; private HandlerFailThread failHandler; public AsynClient(List<String> list){ failHandler=new HandlerFailThread(); urls=list; } public Map<String,String> asynGet() throws IOReactorException, InterruptedException { final HttpAsyncClient httpclient = new DefaultHttpAsyncClient(); httpclient.start(); int urlLength=urls.size(); HttpGet[] requests = new HttpGet[urlLength]; int i=0; for(String url : urls){ requests[i]=new HttpGet(url); i++; } final CountDownLatch latch = new CountDownLatch(requests.length); final Map<String, String> responseMap=new HashMap<String, String>(); try { for (final HttpGet request : requests) { httpclient.execute(request, new FutureCallback<HttpResponse>() { public void completed(final HttpResponse response) { latch.countDown(); responseMap.put(request.getURI().toString(), response.getStatusLine().toString()); try { System.out.println(request.getRequestLine() + "->" + response.getStatusLine()+"->"); //+readInputStream(response.getEntity().getContent()) } catch (IllegalStateException e) { failHandler.putFailUrl(request.getURI().toString(), response.getStatusLine().toString()); e.printStackTrace(); } catch (Exception e) { failHandler.putFailUrl(request.getURI().toString(), response.getStatusLine().toString()); e.printStackTrace(); } } public void failed(final Exception ex) { latch.countDown(); ex.printStackTrace(); failHandler.putFailUrl(request.getURI().toString(), ex.getMessage()); } public void cancelled() { latch.countDown(); } }); } System.out.println("Doing..."); } finally { latch.await(); httpclient.shutdown(); } System.out.println("Done"); failHandler.printFailUrl(); return responseMap; } private String readInputStream(InputStream input) throws IOException{ byte[] buffer = new byte[128]; int len = 0; ByteArrayOutputStream bytes = new ByteArrayOutputStream(); while((len = input.read(buffer)) >= 0) { bytes.write(buffer, 0, len); } return bytes.toString(); } /** * Test * @param args */ public static void main(String[] args) { List<String> urls=new ArrayList<String>(); urls.add("http://127.0.0.1/examples/servlets/"); urls.add("http://127.0.0.1/examples/servlets/"); urls.add("http://127.0.0.1/examples/servlets/"); for(int i=0;i<10;i++){ urls.addAll(urls); } System.out.println(urls.size()); AsynClient client=new AsynClient(urls); try { client.asynGet(); } catch (IOReactorException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("done"); } }
创建一个线程记录失败的请求
package generate.httpclient; import java.util.HashMap; import java.util.Map; public class HandlerFailThread extends Thread{ Map<String, String> failUrl=new HashMap<String, String>(); public void putFailUrl(String url,String status){ synchronized (failUrl) { failUrl.put(url,status); } } @Override public void run() { while(true){ } } public void printFailUrl(){ for(Map.Entry<String, String> m: failUrl.entrySet()){ System.out.println("****fail:url:"+m.getKey()+ " code :"+m.getValue()); } } }
异步请求,也可通过pool管理,例如
ConnectingIOReactor nio=new DefaultConnectingIOReactor();
PoolingClientAsyncConnectionManager manager=new PoolingClientAsyncConnectionManager(nio);
manager.setMaxTotal(1000);
manager.setDefaultMaxPerRoute(100);
HttpParams params=new BasicHttpParams();
/* 连接超时 */
HttpConnectionParams.setConnectionTimeout(params, 10000);
/* 请求超时 */
HttpConnectionParams.setSoTimeout(params, 60*1000);
DefaultHttpAsyncClient.setDefaultHttpParams(params);
final HttpAsyncClient httpclient = new DefaultHttpAsyncClient(manager);
httpclient.start();
HttpClient相关可参看,里面有很多说明与例子
http://hc.apache.org/httpcomponents-client-ga/tutorial/html/connmgmt.html
相关推荐
4、多线程下载:使用RandomAccessFile输出流写 5、多线程下载-断点续传:使用临时文件记录当前下载的数据,下次读取文件开始下载 6、下载显示进度条-ProgressBar 7、在gitbub上面下载已经有支持断点续传功能的代码...
主要介绍了使用java的HttpClient实现多线程并发的相关资料,需要的朋友可以参考下
HttpClient下载数据 图片,string 使用get请求数据
这个是一个httpclient模拟发送请求的例子,请求对象是12306,我只写到了登录和查询从...希望有人可以在我的基础上修改成多线程,在确定买那个车次的票的时候可以无限制抢票知道票没有。最后希望大家能开心过年,浪心
java 多线程抓取去哪儿qunar机票数据,依赖 httpclient ,jsoud 等jar 包
httpclient4.3的二次封装,解决了代理问题、多线程问题、SSL问题、通用的get、post请求
很多公司使用另开线程池的方式进行异步调用来解决tomcat线程阻塞问题。但由于本系统中接口网络太不稳定,使用线程池也将导致线程池中的线程不断加大,不管使用怎样的线程池策略,最终要么线程池线程全部挂起,要么...
9. 连接管理器支持多线程应用。支持设置最大连接数,同时支持设置每个主机的最大连接数,发现并关闭过期的连接。 10. 自动处理Set-Cookie中的Cookie。 11. 插件式的自定义Cookie策略。 12. Request的输出流可以避免...
HTTP代理 C# 网页代理 web代理
Java开发基于多线程和NIO实现聊天室源码+项目说明(含服务端+客户端).zip 涉及到的技术点 - 线程池ThreadPoolExecutor - 阻塞队列BlockingQueue,生产者消费者模式 - Selector - Channel - ByteBuffer - ...
HttpClient连接池 Spring依赖注入 lombok简化POJO开发 原子指标 内置锁 竣工服务 log4j+slf4j日志 实现的功能 登录注销 单聊 群聊 客户端提交任务,下载图片并显示 上线下线公告 在线用户记录 批量下载豆瓣电影的...
网络爬虫程序设计是一个涉及多个步骤和技术的过程。以下是一个基本的网络爬虫程序设计指南...此外,对于大型网站,可能需要实现多线程或异步爬取以提高效率。最终,一个完善的C#网络爬虫应能稳定、高效地爬取目标数据。
通过HttpClient获取到请求页面的字符串字符串 通过jsoup解析 (解析需要自己在页面上查看源代码,分析DOM结构) (通过使用jsoup的css选择器的函数,获取元素,元素集,或者文本和属性值) 每一本书的值set进书实体...
Java并发编程:03-多线程并发下载器, 支持断点下载(手写不限速的迷你版迅雷) 主要是最近学习完一些初级的并发知识, 所以想使用这些知识做一个小小工具, 巩固一下知识点, 然后就想到了多线程并发下载文件的这个小工具...
多线程与异步处理:为了提高抓取效率,可以使用多线程技术同时抓取多个网页;使用异步处理来优化网络请求和数据处理。 反爬虫策略应对:针对网站的反爬虫策略(如验证码、访问频率限制等),实现相应的应对措施,...
Apache HttpClient:一个用于发送HTTP请求的Java库,可以...5.crawler4j:一个开源的爬虫框架,提供了一些基本功能,如多线程处理和网页解析。 FlyingSpider:一个开源的爬虫框架,专注于抓取大型网站和多语言支持。
问题的阐述:Android SDK中的HttpClient和HttpUrlConnection两种请求方式用来处理网络的复杂的操作,但当应用比较复杂的时候需要我们编写大量的代码处理很多东西:图像缓存,请求的调度等等; 解决:Volley就是为...
该类中利用多线程实现了网络的异步请求,请求网络数据时在新建的工作线程中进行,从而避免由于主线程(界面线程)的阻塞而导致界面阻塞的问题。 ##使用方法 /*Get 请求方式*/ 假设有接口地址为:...
重写了多线程逻辑 代码更加易懂了 同时修复了一些线程安全问题 引入了Google Guava API 让代码更简洁 增加配置Spider setSpawnUrl false 此选项为false时 只下载给定的url 不下载任何新发现的url 可以给初始url中...
#描述这是一个用于 HTTP 客户端和服务器的简单 java Socket 程序HTTP 服务器... 此 HTTP 服务器是多线程的,因此它将同时处理多个客户端请求。 (尚未测试!但我相信如此。) ##内容HTTP服务器HTTP 客户端##参考