import pandas as pd import numpy as np from snownlp import SnowNLP import os def load_data(file_path): try: df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python') return df['弹幕内容'].dropna().astype(str).tolist() except Exception as e: print(f"数据加载失败: {str(e)}") return [] def analyze_sentiment(danmu_texts): # 添加特殊词汇处理(以原词典中很好为0.78,一般为0.52,差为0.14为标准手动添加) special_cases = { # 高强度正能量词 "爷青回": 0.9, # 情怀向 "yyds": 0.9, # 永远滴神 "YYDS": 0.9, # 永远滴神 "kksk":0.8, # 很喜欢 "awsl": 0.8, # 啊我死了(感动) '阿伟死了': 0.8, # 谐上(感动) "类目": 0.8, # 感动场景(谐泪目) "排面": 0.8, # 排场十足 "文艺复兴": 0.8, # 经典重现 '绝绝子': 0.7, # 绝 "双厨狂喜": 0.7, # 跨界联动 "梦幻联动": 0.7, # 跨作品合作 "注入灵魂": 0.7, # 高能片段 # 玩梗互动词 "下次一定": 0.55, # 投币拖延梗 "你币没了": 0.45, # 威胁不投币 "典": 0.3, # 经典复读(含贬义) # 高能名场面 "名场面": 0.85, # 经典片段 "神仙打架": 0.9, # 高手对决 "前方高能": 0.7, # 高潮预警 # 数字 "10":0.85,#十分制打分 "100分":0.85,#百分制打分 "5":0.85,#五分制打分 "666": 0.75, # 玩得厉害 "999": 0.75, # 6翻了 "2333": 0.6, # 笑 # 抽象文化 "草": 0.55, # 笑(中性) "生草": 0.6, # 搞笑场景 # 破防场景 "破防了": 0.4, # 心理防线崩溃 "我裂开了": 0.3, # 心态炸裂 # 特定领域梗 "奥利给": 0.8, # 加油打气 "DNA动了": 0.8, # 触发记忆 "有内味了": 0.7, # 特色到位 # 负向场景 "阴间": 0.3, # 诡异内容 "血压上来了": 0.3 # 令人烦躁 } sentiment_scores = [] for item in danmu_texts: if item in special_cases: sentiment_scores.append(special_cases[item]) else: s = SnowNLP(item) sentiment_scores.append(s.sentiments) avg_score = np.mean(sentiment_scores) return avg_score def process_all_partitions(base_path): # 获取所有分区目录 partitions = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))] for partition in partitions: partition_path = os.path.join(base_path, partition) print(f"正在处理分区: {partition}") process_partition(partition_path) # process_partition函数 def process_partition(partition_path): info_file = os.path.join(partition_path, 'info.csv') if not os.path.exists(info_file): print(f"未找到info文件: {info_file}") return info_df = pd.read_csv(info_file, encoding='utf-8') # 创建与info_df行数相同的空列表,初始值为None scores = [None] * len(info_df) for idx, bv in enumerate(info_df['BV号']): # 构建弹幕文件目录路径 danmu_dir = os.path.join(partition_path, bv) if not os.path.exists(danmu_dir): continue # 保持None值 # 查找匹配的弹幕文件 danmu_file = [f for f in os.listdir(danmu_dir) if f.startswith(bv) and f.endswith('danmaku.csv')] if not danmu_file: continue # 保持None值 danmu_path=os.path.join(danmu_dir, danmu_file[0]) danmu_texts = load_data(danmu_path) if not danmu_texts: continue # 保持None值 # 将结果放入对应的索引位置 scores[idx] = analyze_sentiment(danmu_texts) info_df['弹幕情感评分snowNLP'] = scores info_df.to_csv(info_file, index=False, encoding='utf-8-sig') # 使用示例 - 处理所有分区 process_all_partitions("hot_data") process_all_partitions("nohot_data")