79 lines
2.6 KiB
Python
79 lines
2.6 KiB
Python
|
import pandas as pd
|
|||
|
import numpy as np
|
|||
|
from snownlp import SnowNLP
|
|||
|
import os
|
|||
|
|
|||
|
def load_data(file_path):
|
|||
|
try:
|
|||
|
df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
|
|||
|
return df['弹幕内容'].dropna().astype(str).tolist()
|
|||
|
except Exception as e:
|
|||
|
print(f"数据加载失败: {str(e)}")
|
|||
|
return []
|
|||
|
|
|||
|
def analyze_sentiment(danmu_texts):
|
|||
|
# 添加特殊词汇处理(以原词典中很好为0.78,一般为0.52,差为0.14为标准手动添加)
|
|||
|
special_cases = {
|
|||
|
# 高强度正能量词
|
|||
|
"爷青回": 0.9, # 情怀向
|
|||
|
"yyds": 0.9, # 永远滴神
|
|||
|
'YYDS': 0.9, # 永远滴神
|
|||
|
"awsl": 0.8, # 啊我死了(感动)
|
|||
|
'阿伟死了': 0.8, # 谐上(感动)
|
|||
|
"泪目": 0.8, # 感动场景
|
|||
|
"排面": 0.8, # 排场十足
|
|||
|
"双厨狂喜": 0.7, # 跨界联动
|
|||
|
"梦幻联动": 0.7, # 跨作品合作
|
|||
|
"注入灵魂": 0.7, # 高能片段
|
|||
|
"文艺复兴": 0.8, # 经典重现
|
|||
|
# 玩梗互动词
|
|||
|
"下次一定": 0.55, # 投币拖延梗
|
|||
|
"你币没了": 0.45, # 威胁不投币
|
|||
|
"空降成功": 0.5, # 跳片头
|
|||
|
"标准结局": 0.5, # 意料之中
|
|||
|
"典中典": 0.4, # 经典复读(含贬义)
|
|||
|
# 高能名场面
|
|||
|
"名场面": 0.85, # 经典片段
|
|||
|
"神仙打架": 0.9, # 高手对决
|
|||
|
"前方高能": 0.7, # 高潮预警
|
|||
|
# 数字谐音
|
|||
|
"666": 0.75, # 玩得厉害
|
|||
|
"999": 0.75, # 6翻了
|
|||
|
"2333": 0.6, # 笑
|
|||
|
# 抽象文化
|
|||
|
"草": 0.6, # 笑(中性)
|
|||
|
"生草": 0.65, # 搞笑场景
|
|||
|
# 破防场景
|
|||
|
"破防了": 0.4, # 心理防线崩溃
|
|||
|
"我裂开了": 0.3, # 心态炸裂
|
|||
|
# 特定领域梗
|
|||
|
"奥利给": 0.8, # 加油打气
|
|||
|
"DNA动了": 0.8, # 触发记忆
|
|||
|
"有内味了": 0.7, # 特色到位
|
|||
|
# 负向场景
|
|||
|
"公开处刑": 0.5, # 尴尬场面
|
|||
|
"阴间": 0.3, # 诡异内容
|
|||
|
"阴间滤镜": 0.3, # 画面诡异
|
|||
|
"血压上来了": 0.3 # 令人烦躁
|
|||
|
}
|
|||
|
sentiment_scores = []
|
|||
|
|
|||
|
for item in danmu_texts:
|
|||
|
if item in special_cases:
|
|||
|
sentiment_scores.append(special_cases[item])
|
|||
|
else:
|
|||
|
s = SnowNLP(item)
|
|||
|
sentiment_scores.append(s.sentiments)
|
|||
|
|
|||
|
avg_score = np.mean(sentiment_scores)
|
|||
|
return avg_score
|
|||
|
|
|||
|
# file_path='hot_data/亲子/BV1TLXVYREDt/BV1TLXVYREDt_287_danmaku.csv'
|
|||
|
# df = load_data(file_path)
|
|||
|
# scores=analyze_sentiment(df)
|
|||
|
# print(scores)
|
|||
|
|
|||
|
# 测试
|
|||
|
test_words = ['4']
|
|||
|
s = analyze_sentiment(test_words)
|
|||
|
print(s)
|