import pandas as pd import numpy as np from snownlp import SnowNLP import os def load_data(file_path): try: df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python') return df['弹幕内容'].dropna().astype(str).tolist() except Exception as e: print(f"数据加载失败: {str(e)}") return [] def analyze_sentiment(danmu_texts): emotions = {'positive': 0, 'negative': 0, 'neutral': 0} sentiment_scores = [] for item in danmu_texts: s = SnowNLP(item) score = s.sentiments sentiment_scores.append(score) if score > 0.6: emotions['positive'] += 1 elif score < 0.4: emotions['negative'] += 1 else: emotions['neutral'] += 1 avg_score = np.mean(sentiment_scores) return emotions, avg_score def process_partition(partition_path): info_file = os.path.join(partition_path, 'info.csv') if not os.path.exists(info_file): print(f"未找到info文件: {info_file}") return info_df = pd.read_csv(info_file,encoding='utf-8') scores = [] for bv in info_df['BV号']: danmu_file = os.path.join(partition_path, bv, f"{bv}_273_danmaku.csv") if not os.path.exists(danmu_file): scores.append(None) continue danmu_texts = load_data(danmu_file) if not danmu_texts: scores.append(None) continue _, avg_score = analyze_sentiment(danmu_texts) scores.append(avg_score) info_df['情感评分'] = scores info_df.to_csv(info_file, index=False) # 使用示例 - 处理GMV分区 partition_path = "hot_data/GMV" process_partition(partition_path)