import pandas as pd import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer # 修改为从transformers导入 def load_data(file_path): """优化后的数据加载函数""" try: df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python') return df['弹幕内容'].dropna().astype(str).tolist() except Exception as e: print(f"数据加载失败: {str(e)}") return [] def analyze_sentiment(texts): """改进的情感分析函数""" # 使用新的模型配置 model_name = "IDEA-CCNL/Erlangshen-Roberta-330M-Sentiment" try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # 批量处理提升效率 inputs = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # 调整概率计算方式 probs = torch.softmax(outputs.logits, dim=1) return probs[:, 1].mean().item() # 假设正例在位置1 except Exception as e: print(f"模型加载失败: {str(e)}") return 0.5 # 返回中性评分作为默认值 if __name__ == "__main__": # 示例文件路径 file_path = "hot_data/GMV/BV1ajXMYUE6S/BV1ajXMYUE6S_273_danmaku.csv" # 执行分析 danmu_texts = load_data(file_path) if danmu_texts: final_score = analyze_sentiment(danmu_texts) print(f"B站弹幕情感评分:{final_score:.4f}") else: print("未找到有效弹幕数据")