Python的要求期货似乎并没有为我工作
我有下面这段代码Python的要求期货似乎并没有为我工作
import concurrent.futures as cf
from requests_futures.sessions import FuturesSession
urls = ['http://www.foxnews.com/',
'http://www.cnn.com/',
'http://europe.wsj.com/',
'http://www.bbc.co.uk/',
'https://foursquare.com/'] * 500
futures = []
session = FuturesSession(executor=cf.ThreadPoolExecutor(max_workers=8))
for url in urls:
futures.append(session.get(url))
for future in cf.as_completed(futures):
result = len(future.result().content)
print(result)
但是这个代码使用像
ConnectionError的消息出现了错误:(“中止连接”, RemoteDisconnected(“远程结束时关闭不响应连接”,))
当URL列表的长度约为300它的工作原理
,但是当我有大约500网址我是回到同一个奇怪的错误
完整的堆栈跟踪就像
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked
, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked
, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
c:\python36\lib\http\client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
c:\python36\lib\http\client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
c:\python36\lib\http\client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
c:\python36\lib\socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
c:\python36\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1001 self.__class__)
-> 1002 return self.read(nbytes, buffer)
1003 else:
c:\python36\lib\ssl.py in read(self, len, buffer)
864 try:
--> 865 return self._sslobj.read(len, buffer)
866 except SSLError as x:
c:\python36\lib\ssl.py in read(self, len, buffer)
624 if buffer is not None:
--> 625 v = self._sslobj.read(len, buffer)
626 else:
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
c:\python36\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
422 retries=self.max_retries,
--> 423 timeout=timeout
424 )
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
648 retries = retries.increment(method, url, error=e, _pool=self,
--> 649 _stacktrace=sys.exc_info()[2])
650 retries.sleep()
c:\python36\lib\site-packages\requests\packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stack
trace)
346 if read is False or not self._is_method_retryable(method):
--> 347 raise six.reraise(type(error), error, _stacktrace)
348 elif read is not None:
c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked
, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked
, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
c:\python36\lib\http\client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
c:\python36\lib\http\client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
c:\python36\lib\http\client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
c:\python36\lib\socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
c:\python36\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1001 self.__class__)
-> 1002 return self.read(nbytes, buffer)
1003 else:
c:\python36\lib\ssl.py in read(self, len, buffer)
864 try:
--> 865 return self._sslobj.read(len, buffer)
866 except SSLError as x:
c:\python36\lib\ssl.py in read(self, len, buffer)
624 if buffer is not None:
--> 625 v = self._sslobj.read(len, buffer)
626 else:
ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host',
None, 10054, None))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-144-e24ea43223c2> in <module>()
15
16 for idx,future in enumerate(cf.as_completed(futures)):
---> 17 result = len(future.result().content)
18 print(idx,result)
19
c:\python36\lib\concurrent\futures\_base.py in result(self, timeout)
396 raise CancelledError()
397 elif self._state == FINISHED:
--> 398 return self.__get_result()
399
400 self._condition.wait(timeout)
c:\python36\lib\concurrent\futures\_base.py in __get_result(self)
355 def __get_result(self):
356 if self._exception:
--> 357 raise self._exception
358 else:
359 return self._result
c:\python36\lib\concurrent\futures\thread.py in run(self)
53
54 try:
---> 55 result = self.fn(*self.args, **self.kwargs)
56 except BaseException as e:
57 self.future.set_exception(e)
c:\python36\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeou
t, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 }
487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489
490 return resp
c:\python36\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
607
608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610
611 # Total elapsed time of the request (approximately)
c:\python36\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
471
472 except (ProtocolError, socket.error) as err:
--> 473 raise ConnectionError(err, request=request)
474
475 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host'
, None, 10054, None))
In [145]:
问题是与您的网站,它们会限制从一个IP的,因为事情像DOS攻击(不是专家)的连接数。
正如你可以在错误看到ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
有趣的Reza我所做的是删除requests_futures并使用同步请求库和并发期货线程池执行程序和该库即使慢得多发生此错误 – user3249433
@ user3249433网站限制请求的速率,而不仅限于它们的数量。通过8个工作线程,您比每个线程多8次访问每个站点。 –
@ user3249433这是因为它比较慢。您无法每秒超过每个网站的一定数量的请求。 –
是否少于2500个请求工作?将其缩小到2并完成。 –
约翰辉煌我遵循你的意见,并做了一些变化其奇怪,但它不知道为什么 – user3249433