fix proxy get
This commit is contained in:
parent
3fae0091b3
commit
8b156e18ed
|
@ -9,6 +9,7 @@ from scrapy import signals
|
||||||
from .myutils import ProxyPool
|
from .myutils import ProxyPool
|
||||||
from .settings import USERNAME, PASSWORD
|
from .settings import USERNAME, PASSWORD
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
from scrapy.core.downloader.handlers.http11 import TunnelError
|
||||||
|
|
||||||
|
|
||||||
class ProxyMiddleware:
|
class ProxyMiddleware:
|
||||||
|
@ -32,7 +33,7 @@ class ProxyMiddleware:
|
||||||
|
|
||||||
def process_response(self, request, response, spider):
|
def process_response(self, request, response, spider):
|
||||||
# 如果响应正常,返回响应
|
# 如果响应正常,返回响应
|
||||||
if response.status in [200, 301, 302, 460, 454]:
|
if response.status in [200, 301, 302]:
|
||||||
return response
|
return response
|
||||||
# 如果响应异常,处理失败计数
|
# 如果响应异常,处理失败计数
|
||||||
else:
|
else:
|
||||||
|
@ -44,14 +45,15 @@ class ProxyMiddleware:
|
||||||
def process_exception(self, request, exception, spider):
|
def process_exception(self, request, exception, spider):
|
||||||
# 处理发生异常的请求
|
# 处理发生异常的请求
|
||||||
self._handle_proxy_failure(request.meta['proxy'], spider)
|
self._handle_proxy_failure(request.meta['proxy'], spider)
|
||||||
spider.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}")
|
spider.logger.info(f"Changing proxy to {request.meta['proxy']} due to exception: {exception}")
|
||||||
# 重新调度请求
|
# 重新调度请求
|
||||||
return request
|
return request
|
||||||
|
|
||||||
|
|
||||||
def _handle_proxy_failure(self, http_proxy, spider):
|
def _handle_proxy_failure(self, http_proxy, spider):
|
||||||
# 增加指定代理的失败计数
|
# 增加指定代理的失败计数
|
||||||
proxy = http_proxy.split('@')[-1][:-1]
|
proxy = http_proxy.split('@')[-1][7:]
|
||||||
|
|
||||||
self.proxy_failures[proxy] += 1
|
self.proxy_failures[proxy] += 1
|
||||||
spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}')
|
spider.logger.error(f'Proxy {proxy} failed, failure count: {self.proxy_failures[proxy]}')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user