statistics_model2025/snowNLP_danmu sentiment_analyzer.py
2025-03-29 15:46:52 +08:00

63 lines
1.7 KiB
Python

import pandas as pd
import numpy as np
from snownlp import SnowNLP
import os
def load_data(file_path):
try:
df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
return df['弹幕内容'].dropna().astype(str).tolist()
except Exception as e:
print(f"数据加载失败: {str(e)}")
return []
def analyze_sentiment(danmu_texts):
emotions = {'positive': 0, 'negative': 0, 'neutral': 0}
sentiment_scores = []
for item in danmu_texts:
s = SnowNLP(item)
score = s.sentiments
sentiment_scores.append(score)
if score > 0.6:
emotions['positive'] += 1
elif score < 0.4:
emotions['negative'] += 1
else:
emotions['neutral'] += 1
avg_score = np.mean(sentiment_scores)
return emotions, avg_score
def process_partition(partition_path):
info_file = os.path.join(partition_path, 'info.csv')
if not os.path.exists(info_file):
print(f"未找到info文件: {info_file}")
return
info_df = pd.read_csv(info_file,encoding='utf-8')
scores = []
for bv in info_df['BV号']:
danmu_file = os.path.join(partition_path, bv, f"{bv}_273_danmaku.csv")
if not os.path.exists(danmu_file):
scores.append(None)
continue
danmu_texts = load_data(danmu_file)
if not danmu_texts:
scores.append(None)
continue
_, avg_score = analyze_sentiment(danmu_texts)
scores.append(avg_score)
info_df['情感评分'] = scores
info_df.to_csv(info_file, index=False)
# 使用示例 - 处理GMV分区
partition_path = "hot_data/GMV"
process_partition(partition_path)