From dc35f3f990589305afe7d5efaa586228bf09ff25 Mon Sep 17 00:00:00 2001 From: cooper Date: Mon, 20 May 2024 14:21:34 +0800 Subject: [PATCH] modify class attrib selector --- newsspider/spiders/bjxpv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/newsspider/spiders/bjxpv.py b/newsspider/spiders/bjxpv.py index 059f0a2..c839399 100644 --- a/newsspider/spiders/bjxpv.py +++ b/newsspider/spiders/bjxpv.py @@ -14,11 +14,10 @@ class BjxpvSpider(scrapy.Spider): yield scrapy.Request(url, self.news_parse) next_page = response.xpath('//a[contains(text(), "下一页")]') - if next_page.attrib['class'] != 'disable': + if next_page.attrib.get('class') != 'disable': url = next_page.attrib['href'] yield response.follow(url, self.parse) - def news_parse(self, response): news_item = NewsItem() news_item['website'] = '北极星太阳能光伏网'