2025-04-03 23:24:36 +08:00

79 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
from snownlp import SnowNLP
import os
def load_data(file_path):
try:
df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
return df['弹幕内容'].dropna().astype(str).tolist()
except Exception as e:
print(f"数据加载失败: {str(e)}")
return []
def analyze_sentiment(danmu_texts):
# 添加特殊词汇处理以原词典中很好为0.78一般为0.52差为0.14为标准手动添加)
special_cases = {
# 高强度正能量词
"爷青回": 0.9, # 情怀向
"yyds": 0.9, # 永远滴神
'YYDS': 0.9, # 永远滴神
"awsl": 0.8, # 啊我死了(感动)
'阿伟死了': 0.8, # 谐上(感动)
"泪目": 0.8, # 感动场景
"排面": 0.8, # 排场十足
"双厨狂喜": 0.7, # 跨界联动
"梦幻联动": 0.7, # 跨作品合作
"注入灵魂": 0.7, # 高能片段
"文艺复兴": 0.8, # 经典重现
# 玩梗互动词
"下次一定": 0.55, # 投币拖延梗
"你币没了": 0.45, # 威胁不投币
"空降成功": 0.5, # 跳片头
"标准结局": 0.5, # 意料之中
"典中典": 0.4, # 经典复读(含贬义)
# 高能名场面
"名场面": 0.85, # 经典片段
"神仙打架": 0.9, # 高手对决
"前方高能": 0.7, # 高潮预警
# 数字谐音
"666": 0.75, # 玩得厉害
"999": 0.75, # 6翻了
"2333": 0.6, # 笑
# 抽象文化
"": 0.6, # 笑(中性)
"生草": 0.65, # 搞笑场景
# 破防场景
"破防了": 0.4, # 心理防线崩溃
"我裂开了": 0.3, # 心态炸裂
# 特定领域梗
"奥利给": 0.8, # 加油打气
"DNA动了": 0.8, # 触发记忆
"有内味了": 0.7, # 特色到位
# 负向场景
"公开处刑": 0.5, # 尴尬场面
"阴间": 0.3, # 诡异内容
"阴间滤镜": 0.3, # 画面诡异
"血压上来了": 0.3 # 令人烦躁
}
sentiment_scores = []
for item in danmu_texts:
if item in special_cases:
sentiment_scores.append(special_cases[item])
else:
s = SnowNLP(item)
sentiment_scores.append(s.sentiments)
avg_score = np.mean(sentiment_scores)
return avg_score
# file_path='hot_data/亲子/BV1TLXVYREDt/BV1TLXVYREDt_287_danmaku.csv'
# df = load_data(file_path)
# scores=analyze_sentiment(df)
# print(scores)
# 测试
test_words = ['4']
s = analyze_sentiment(test_words)
print(s)