From c54f5a2a18b7e86f996a7849fe39634587efa3cf Mon Sep 17 00:00:00 2001 From: Sheyiyuan <2125107118@qq.com> Date: Sun, 23 Mar 2025 22:32:02 +0800 Subject: [PATCH] add:comments --- .gitignore | 3 +- main.py | 110 ++++++++++++++++++-------------- require.txt => requirements.txt | 0 targets.txt | 2 +- 4 files changed, 63 insertions(+), 52 deletions(-) rename require.txt => requirements.txt (100%) diff --git a/.gitignore b/.gitignore index 4301cf5..87a8b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ ./venv/ .idea/ data/ -*/.DS_Store -targets.txt \ No newline at end of file +*/.DS_Store \ No newline at end of file diff --git a/main.py b/main.py index c4fc634..f8535c8 100644 --- a/main.py +++ b/main.py @@ -5,8 +5,36 @@ from bs4 import BeautifulSoup from time import sleep from random import uniform from fake_useragent import UserAgent +import datetime import time +ONEBOT_HOST = "http://sheyiyuan.cn:63000" # OneBot服务地址 +USER_ID = "1040843522" + +def send_notification(error_msg): + try: + # OneBot私信消息协议 + url = f"{ONEBOT_HOST}/send_group_msg" + data = { + "group_id": USER_ID, + "message": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + f"[SMA-bilibili监控告警]\n{error_msg}" + } + requests.post(url, json=data, timeout=5) + except Exception as e: + print(f"QQ通知发送失败: {e}") + +def send_info_log(log_msg): + try: + # OneBot私信消息协议 + url = f"{ONEBOT_HOST}/send_group_msg" + data = { + "group_id": USER_ID, + "message": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + f"[SMA-bilibili监控日志]\n{log_msg}" + } + requests.post(url, json=data, timeout=5) + except Exception as e: + print(f"QQ通知发送失败: {e}") + def load_targets(): """从targets.txt加载BV号列表""" try: @@ -175,14 +203,16 @@ class BiliWebCrawler: } except Exception as e: print(f"解析UP主数据失败: {str(e)}") + send_notification(f"解析UP主数据失败: {str(e)}") return None try: # 获取投稿列表 archive_url = f'https://api.bilibili.com/x/space/arc/search?mid={mid}&ps=30' + sleep(5) archive_resp = self._safe_request(archive_url) if archive_resp and archive_resp.status_code == 200: archive_data = archive_resp.json() - print(archive_data) + # print(archive_data) videos = archive_data.get('data', {}).get('list', {}).get('vlist', []) # 计算30天前的时间戳 @@ -192,12 +222,14 @@ class BiliWebCrawler: up_info['近一个月投稿数'] = recent_count except Exception as e: print(f"获取投稿数据失败: {str(e)}") + send_notification(f"获取投稿数据失败: {str(e)}") return up_info def get_danmaku(self): """获取弹幕数据""" if not self.bvid: + send_notification("未找到 BVID,无法获取弹幕数据。") print("未找到 BVID,无法获取弹幕数据。") return [] @@ -209,6 +241,7 @@ class BiliWebCrawler: data = resp.json() cid = data.get('data', {}).get('cid') if not cid: + send_notification("未找到 cid,无法获取弹幕数据。") print("未找到 cid,无法获取弹幕数据。") return [] @@ -231,8 +264,9 @@ class BiliWebCrawler: return danmaku def get_comments(self, max_pages=1000): - """获取热门评论,包含二级评论""" + """获取热门评论,仅保留一级评论""" if not self.aid: + send_notification("未找到视频 ID,无法获取评论数据。") print("未找到视频 ID,无法获取评论数据。") return [] comments = [] @@ -256,49 +290,19 @@ class BiliWebCrawler: '回复时间': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(comment['ctime'])) } comments.append(comment_info) - replies = self.fetch_comment_replies(comment['rpid'], comment['member']['uname']) - comments.extend(replies) + # 移除获取二级评论的代码 else: # 当当前页没有评论时,跳出循环 break except requests.RequestException as e: + send_notification(f"评论请求出错: {e}") print(f"请求出错: {e}") break # 适当调整请求间隔 - sleep(1) + sleep(5) return comments - def fetch_comment_replies(self, comment_id, parent_user_name, max_pages=1000): - replies = [] - for page in range(1, max_pages + 1): - url = f'https://api.bilibili.com/x/v2/reply/reply?oid={self.aid}&type=1&root={comment_id}&ps=10&pn={page}' - try: - response = self._safe_request(url) - if response and response.status_code == 200: - response.encoding = 'utf-8' - data = response.json() - if data and data.get('data') and data['data'].get('replies'): - for reply in data['data']['replies']: - reply_info = { - '用户昵称': reply['member']['uname'], - '评论内容': reply['content']['message'], - '被回复用户': parent_user_name, - '评论层级': '二级评论', - '性别': reply['member']['sex'], - '用户当前等级': reply['member']['level_info']['current_level'], - '点赞数量': reply['like'], - '回复时间': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(reply['ctime'])) - } - replies.append(reply_info) - else: - # 当当前页没有二级评论时,跳出循环 - break - except requests.RequestException as e: - print(f"请求二级评论出错: {e}") - break - # 适当调整请求间隔 - sleep(1) - return replies + # 移除 fetch_comment_replies 方法(原方法不再需要) def _parse_count(self, text): """统一处理数量文本""" @@ -338,24 +342,30 @@ class BiliWebCrawler: video_info_filename = os.path.join(video_dir, f'{self.bvid}_{play_count}_info.csv') self.save_to_csv([video_info], video_info_filename) + sleep(5) + # 新增弹幕抓取 print("正在抓取弹幕数据...") - #danmaku = self.get_danmaku() - #danmaku_filename = os.path.join(video_dir, f'{self.bvid}_{len(danmaku)}_danmaku.csv') - #self.save_to_csv(danmaku, danmaku_filename) + danmaku = self.get_danmaku() + danmaku_filename = os.path.join(video_dir, f'{self.bvid}_{len(danmaku)}_danmaku.csv') + self.save_to_csv(danmaku, danmaku_filename) + + sleep(5) # 新增评论抓取 print("正在抓取评论数据...") - #comments = self.get_comments() - #comments_filename = os.path.join(video_dir, f'{self.bvid}_{len(comments)}_comments.csv') - #self.save_to_csv(comments, comments_filename) + comments = self.get_comments() + comments_filename = os.path.join(video_dir, f'{self.bvid}_{len(comments)}_comments.csv') + self.save_to_csv(comments, comments_filename) + + sleep(5) # 新增UP主信息记录 print("正在获取UP主信息...") - #up_info = self.get_up_info(video_info.get('up主UID')) - #up_info['BV号'] = self.bvid - #up_csv_path = os.path.join(base_dir, 'up_info.csv') - #self.save_to_csv([up_info], up_csv_path, mode='a') + up_info = self.get_up_info(video_info.get('up主UID')) + up_info['BV号'] = self.bvid + up_csv_path = os.path.join(base_dir, 'up_info.csv') + self.save_to_csv([up_info], up_csv_path, mode='a') print(f"抓取完成!结果已保存到 {video_dir}/") else: @@ -363,13 +373,15 @@ class BiliWebCrawler: if __name__ == "__main__": + cookie = "buvid3=669D9192-9030-AE04-8149-45A24D82CBB985872infoc; b_nut=1728112285; _uuid=BDD29A64-331010-1578-7AB2-6985DCD1EC10586028infoc; enable_web_push=DISABLE; buvid4=02E86127-F707-C9D6-1E0E-62127CDB94EA86683-024100507-5HwKLZoKiRRAzdUiyP1DtUb99jVdMKHnip8nMCxMvnyDueJx41kzeR6uEnG0C2HY; DedeUserID=399209972; DedeUserID__ckMd5=9ad9de58e979dbdf; header_theme_version=CLOSE; rpdid=0zbfAHZlOT|utNc6ahG|2XT|3w1SWYYy; buvid_fp_plain=undefined; hit-dyn-v2=1; is-2022-channel=1; LIVE_BUVID=AUTO7717291772934669; PVID=2; enable_feed_channel=ENABLE; CURRENT_QUALITY=80; fingerprint=262ed395815a48ea928a2b3cf305da95; buvid_fp=262ed395815a48ea928a2b3cf305da95; bsource=search_bing; CURRENT_FNVAL=4048; bp_t_offset_399209972=1046334052841291776; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDI3ODE1NzgsImlhdCI6MTc0MjUyMjMxOCwicGx0IjotMX0.yseN1sbG5qDIlo5L0mUKqRr2tCL0OqPBnSnnHUYCWtE; bili_ticket_expires=1742781518; home_feed_column=5; browser_resolution=1702-986; b_lsid=EAE310D10E_195C32A76ED; SESSDATA=d0449397%2C1758291057%2C666e5%2A32CjDmj_WpEXUbv2oTIls3PPvM1wODDzR-gnPMRP5gwm09UIZ7YGdhTsJ7ssNg5Tb19qESVmlUMktwdlhkTnNlM0dTU05kZlBOTERNM3JqeTVSNGFaRzdwZmtNWUpjTHhPWmdxTzJiWmdDSVZxNkpEb0VGNHJFTEdaTlJtcVhBUlMzbEZJTzdCeXNnIIEC; bili_jct=e7f4029b2be38fe915678f49aa5b36f7; sid=nxbw3hdh" # 批量处理targets.txt中的BV号 targets = load_targets() if not targets: print("未找到有效的BV号,程序退出") exit() - + send_info_log(f"开始批量处理 {len(targets)} 个视频") for bvid in targets: print(f"\n{'=' * 30} 开始处理 {bvid} {'=' * 30}") - crawler = BiliWebCrawler(f"https://www.bilibili.com/video/{bvid}") - crawler.run() \ No newline at end of file + crawler = BiliWebCrawler(f"https://www.bilibili.com/video/{bvid}",cookie) + crawler.run() + sleep(5) \ No newline at end of file diff --git a/require.txt b/requirements.txt similarity index 100% rename from require.txt rename to requirements.txt diff --git a/targets.txt b/targets.txt index 6d3c9a1..312896d 100644 --- a/targets.txt +++ b/targets.txt @@ -561,4 +561,4 @@ BV1KLXVYREoh BV1jFQrY1EtZ BV1PxXLYdEvh BV1XCQNYzEs5 -BV1kcQBY7Erz \ No newline at end of file +BV1kcQBY7Erz