statistics_model2025/snowNLP_danmu sentiment_analyzer.py

import pandas as pd
import numpy as np
from snownlp import SnowNLP
import os

def load_data(file_path):
    try:
        df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
        return df['弹幕内容'].dropna().astype(str).tolist()
    except Exception as e:
        print(f"数据加载失败: {str(e)}")
        return []


def analyze_sentiment(danmu_texts):
    emotions = {'positive': 0, 'negative': 0, 'neutral': 0}
    sentiment_scores = []

    for item in danmu_texts:
        s = SnowNLP(item)
        score = s.sentiments
        sentiment_scores.append(score)
        if score > 0.6:
            emotions['positive'] += 1
        elif score < 0.4:
            emotions['negative'] += 1
        else:
            emotions['neutral'] += 1

    avg_score = np.mean(sentiment_scores)
    return emotions, avg_score


def process_partition(partition_path):
    info_file = os.path.join(partition_path, 'info.csv')
    if not os.path.exists(info_file):
        print(f"未找到info文件: {info_file}")
        return

    info_df = pd.read_csv(info_file,encoding='utf-8')
    scores = []

    for bv in info_df['BV号']:
        danmu_file = os.path.join(partition_path, bv, f"{bv}_273_danmaku.csv")
        if not os.path.exists(danmu_file):
            scores.append(None)
            continue

        danmu_texts = load_data(danmu_file)
        if not danmu_texts:
            scores.append(None)
            continue

        _, avg_score = analyze_sentiment(danmu_texts)
        scores.append(avg_score)

    info_df['情感评分'] = scores
    info_df.to_csv(info_file, index=False)


# 使用示例 - 处理GMV分区
partition_path = "hot_data/GMV"
process_partition(partition_path)