diff --git a/Final_Data_Quantificated.xlsx b/Final_Data_Quantificated.xlsx index dff45c1..458b6a8 100644 Binary files a/Final_Data_Quantificated.xlsx and b/Final_Data_Quantificated.xlsx differ diff --git a/temp.py b/temp.py new file mode 100644 index 0000000..325b5db --- /dev/null +++ b/temp.py @@ -0,0 +1,79 @@ +import pandas as pd +import numpy as np +from snownlp import SnowNLP +import os + +def load_data(file_path): + try: + df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python') + return df['弹幕内容'].dropna().astype(str).tolist() + except Exception as e: + print(f"数据加载失败: {str(e)}") + return [] + +def analyze_sentiment(danmu_texts): + # 添加特殊词汇处理(以原词典中很好为0.78,一般为0.52,差为0.14为标准手动添加) + special_cases = { + # 高强度正能量词 + "爷青回": 0.9, # 情怀向 + "yyds": 0.9, # 永远滴神 + 'YYDS': 0.9, # 永远滴神 + "awsl": 0.8, # 啊我死了(感动) + '阿伟死了': 0.8, # 谐上(感动) + "泪目": 0.8, # 感动场景 + "排面": 0.8, # 排场十足 + "双厨狂喜": 0.7, # 跨界联动 + "梦幻联动": 0.7, # 跨作品合作 + "注入灵魂": 0.7, # 高能片段 + "文艺复兴": 0.8, # 经典重现 + # 玩梗互动词 + "下次一定": 0.55, # 投币拖延梗 + "你币没了": 0.45, # 威胁不投币 + "空降成功": 0.5, # 跳片头 + "标准结局": 0.5, # 意料之中 + "典中典": 0.4, # 经典复读(含贬义) + # 高能名场面 + "名场面": 0.85, # 经典片段 + "神仙打架": 0.9, # 高手对决 + "前方高能": 0.7, # 高潮预警 + # 数字谐音 + "666": 0.75, # 玩得厉害 + "999": 0.75, # 6翻了 + "2333": 0.6, # 笑 + # 抽象文化 + "草": 0.6, # 笑(中性) + "生草": 0.65, # 搞笑场景 + # 破防场景 + "破防了": 0.4, # 心理防线崩溃 + "我裂开了": 0.3, # 心态炸裂 + # 特定领域梗 + "奥利给": 0.8, # 加油打气 + "DNA动了": 0.8, # 触发记忆 + "有内味了": 0.7, # 特色到位 + # 负向场景 + "公开处刑": 0.5, # 尴尬场面 + "阴间": 0.3, # 诡异内容 + "阴间滤镜": 0.3, # 画面诡异 + "血压上来了": 0.3 # 令人烦躁 + } + sentiment_scores = [] + + for item in danmu_texts: + if item in special_cases: + sentiment_scores.append(special_cases[item]) + else: + s = SnowNLP(item) + sentiment_scores.append(s.sentiments) + + avg_score = np.mean(sentiment_scores) + return avg_score + +# file_path='hot_data/亲子/BV1TLXVYREDt/BV1TLXVYREDt_287_danmaku.csv' +# df = load_data(file_path) +# scores=analyze_sentiment(df) +# print(scores) + +# 测试 +test_words = ['4'] +s = analyze_sentiment(test_words) +print(s) \ No newline at end of file