63 lines
1.7 KiB
Python
63 lines
1.7 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from snownlp import SnowNLP
|
|
import os
|
|
|
|
def load_data(file_path):
|
|
try:
|
|
df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
|
|
return df['弹幕内容'].dropna().astype(str).tolist()
|
|
except Exception as e:
|
|
print(f"数据加载失败: {str(e)}")
|
|
return []
|
|
|
|
|
|
def analyze_sentiment(danmu_texts):
|
|
emotions = {'positive': 0, 'negative': 0, 'neutral': 0}
|
|
sentiment_scores = []
|
|
|
|
for item in danmu_texts:
|
|
s = SnowNLP(item)
|
|
score = s.sentiments
|
|
sentiment_scores.append(score)
|
|
if score > 0.6:
|
|
emotions['positive'] += 1
|
|
elif score < 0.4:
|
|
emotions['negative'] += 1
|
|
else:
|
|
emotions['neutral'] += 1
|
|
|
|
avg_score = np.mean(sentiment_scores)
|
|
return emotions, avg_score
|
|
|
|
|
|
def process_partition(partition_path):
|
|
info_file = os.path.join(partition_path, 'info.csv')
|
|
if not os.path.exists(info_file):
|
|
print(f"未找到info文件: {info_file}")
|
|
return
|
|
|
|
info_df = pd.read_csv(info_file,encoding='utf-8')
|
|
scores = []
|
|
|
|
for bv in info_df['BV号']:
|
|
danmu_file = os.path.join(partition_path, bv, f"{bv}_273_danmaku.csv")
|
|
if not os.path.exists(danmu_file):
|
|
scores.append(None)
|
|
continue
|
|
|
|
danmu_texts = load_data(danmu_file)
|
|
if not danmu_texts:
|
|
scores.append(None)
|
|
continue
|
|
|
|
_, avg_score = analyze_sentiment(danmu_texts)
|
|
scores.append(avg_score)
|
|
|
|
info_df['情感评分'] = scores
|
|
info_df.to_csv(info_file, index=False)
|
|
|
|
|
|
# 使用示例 - 处理GMV分区
|
|
partition_path = "hot_data/GMV"
|
|
process_partition(partition_path) |