diff --git a/.gitignore b/.gitignore index adbecf9..fbb7d01 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +# My File +.secret \ No newline at end of file diff --git a/.secret b/.secret index 46dec27..38e155a 100644 --- a/.secret +++ b/.secret @@ -1 +1 @@ -o36r0lgw71mdzm9rkwrv3wi1wn|3600|1715909100.492765|oadugxyl9fhoqsamqopc \ No newline at end of file +ontipj0py431xul5zm4kj6gzzo|3600|1715933457.543795|oadugxyl9fhoqsamqopc \ No newline at end of file diff --git a/decspider/pipelines.py b/decspider/pipelines.py index 9a523e1..bf76928 100644 --- a/decspider/pipelines.py +++ b/decspider/pipelines.py @@ -24,7 +24,9 @@ class DecspiderPipeline: # 检查表是否存在,如果不存在就创建表 self.cursor.execute(f""" CREATE TABLE IF NOT EXISTS `{self.table_name}` ( - id INT AUTO_INCREMENT PRIMARY KEY + id INT AUTO_INCREMENT PRIMARY KEY, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP ) """) @@ -35,21 +37,31 @@ class DecspiderPipeline: # 获取 NewsItem 字段信息 item_columns = set(NewsItem.fields.keys()) - # 添加 NewsItem 字段到表中 + # 添加 NewsItem 字段到表中 for column in item_columns: if column not in existing_columns: self.cursor.execute(f"ALTER TABLE `{self.table_name}` ADD COLUMN `{column}` TEXT") spider.log(f'Added column `{column}` to `{self.table_name}` table') + # 添加 created_at 和 updated_at 字段,如果它们不存在 + if 'created_at' not in existing_columns: + self.cursor.execute(f"ALTER TABLE `{self.table_name}` ADD COLUMN `created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP") + spider.log(f'Added column `created_at` to `{self.table_name}` table') + + if 'updated_at' not in existing_columns: + self.cursor.execute(f"ALTER TABLE `{self.table_name}` ADD COLUMN `updated_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP") + spider.log(f'Added column `updated_at` to `{self.table_name}` table') + # 删除表中不存在于 NewsItem 中的字段 for column in existing_columns: - if column not in item_columns and column != 'id': + if column not in item_columns and column not in {'id', 'created_at', 'updated_at'}: self.cursor.execute(f"ALTER TABLE `{self.table_name}` DROP COLUMN `{column}`") spider.log(f'Dropped column `{column}` from `{self.table_name}` table') self.conn.commit() + def close_spider(self, spider): self.conn.close()