Compare commits
No commits in common. "main" and "SyyTmp" have entirely different histories.
1
.idea/.name
generated
1
.idea/.name
generated
@ -1 +0,0 @@
|
||||
readme.md
|
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@ -3,5 +3,5 @@
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="SAM-bilibil" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="SAM-bilibil" project-jdk-type="Python SDK" />
|
||||
</project>
|
2
.idea/statistics_model2025.iml
generated
2
.idea/statistics_model2025.iml
generated
@ -2,7 +2,7 @@
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="SAM-bilibil" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
|
BIN
Final_Data.xlsx
BIN
Final_Data.xlsx
Binary file not shown.
Binary file not shown.
@ -5,17 +5,40 @@ import requests
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
import os
|
||||
from colorthief import ColorThief
|
||||
import pytesseract
|
||||
from multiprocessing import Pool
|
||||
from sklearn.cluster import MiniBatchKMeans
|
||||
from tqdm import tqdm
|
||||
from cnsenti import Sentiment
|
||||
import pynlpir
|
||||
from collections import defaultdict
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
#设置OCR路径
|
||||
pytesseract.pytesseract.tesseract_cmd = r'D:Program files\Tesseract-OCR\tesseract.exe'
|
||||
|
||||
# ------------------图像处理-初始化配置 ---------------------
|
||||
# 人脸检测模型初始化
|
||||
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
||||
|
||||
# 图像情感模型系数(基于IAPS数据集校准)
|
||||
VALENCE_WEIGHTS = {
|
||||
'warm_ratio': 0.35,
|
||||
'brightness': 0.15,
|
||||
'symmetry': 0.20,
|
||||
'colorfulness': 0.30
|
||||
}
|
||||
|
||||
AROUSAL_WEIGHTS = {
|
||||
'contrast': 0.40,
|
||||
'edge_density': 0.35,
|
||||
'saturation_std': 0.25
|
||||
}
|
||||
|
||||
# 暖色调定义(HSV色相范围)
|
||||
WARM_HUE_RANGE = (0, 60) # 红色到黄色
|
||||
|
||||
# ------------------处理图像 ---------------------
|
||||
def get_image(url):
|
||||
"""从URL获取图像并预处理"""
|
||||
@ -28,72 +51,56 @@ def get_image(url):
|
||||
return None
|
||||
|
||||
|
||||
def extract_color_palette(img_rgb, color_count=5):
|
||||
"""提取前N种主色及比例"""
|
||||
# 使用k-means聚类提取主色
|
||||
pixels = img_rgb.reshape(-1, 3)
|
||||
kmeans = MiniBatchKMeans(n_clusters=color_count, random_state=0)
|
||||
labels = kmeans.fit_predict(pixels)
|
||||
def analyze_image(img):
|
||||
"""分析图像特征"""
|
||||
if img is None:
|
||||
return {}
|
||||
|
||||
# 计算每种颜色的比例
|
||||
counts = np.bincount(labels)
|
||||
total = counts.sum()
|
||||
palette = []
|
||||
for i in range(color_count):
|
||||
ratio = counts[i] / total
|
||||
color = kmeans.cluster_centers_[i].astype(int)
|
||||
palette.append((color, ratio))
|
||||
# 转换为HSV颜色空间
|
||||
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||||
h, s, v = cv2.split(hsv)
|
||||
|
||||
return sorted(palette, key=lambda x: -x[1]) # 按比例降序排列
|
||||
# 计算基础特征
|
||||
features = {
|
||||
'brightness': np.mean(v),
|
||||
'contrast': np.max(v) - np.min(v),
|
||||
'saturation_std': np.std(s),
|
||||
'colorfulness': np.std(h) + np.std(s) + np.std(v)
|
||||
}
|
||||
|
||||
# 暖色比例计算
|
||||
hue_mask = cv2.inRange(h, WARM_HUE_RANGE[0], WARM_HUE_RANGE[1])
|
||||
features['warm_ratio'] = np.count_nonzero(hue_mask) / (img.shape[0] * img.shape[1])
|
||||
|
||||
# 对称性计算
|
||||
mid = img.shape[1] // 2
|
||||
left = img[:, :mid]
|
||||
right = cv2.flip(img[:, mid:], 1)
|
||||
features['symmetry'] = cv2.matchTemplate(left, right, cv2.TM_CCOEFF_NORMED)[0][0]
|
||||
|
||||
# 边缘密度
|
||||
edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 100, 200)
|
||||
features['edge_density'] = np.mean(edges)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def classify_hsv_color(rgb_color):
|
||||
"""将RGB颜色分类为暖色/冷色/中性色"""
|
||||
try:
|
||||
# 确保输入是有效的RGB颜色值
|
||||
rgb_color = np.clip(rgb_color, 0, 255)
|
||||
hsv = cv2.cvtColor(np.uint8([[rgb_color]]), cv2.COLOR_RGB2HSV)[0][0]
|
||||
h, s, v = hsv[0], hsv[1] / 255.0, hsv[2] / 255.0 # 归一化
|
||||
def calculate_affect(features):
|
||||
"""计算情感效价和唤醒度"""
|
||||
poslm = sum(features[k] * VALENCE_WEIGHTS[k] for k in VALENCE_WEIGHTS)
|
||||
actlm = sum(features[k] * AROUSAL_WEIGHTS[k] for k in AROUSAL_WEIGHTS)
|
||||
|
||||
# 中性色判断(根据Palmer标准)
|
||||
if s < 0.2 or v < 0.2:
|
||||
return 'neutral'
|
||||
|
||||
# 色相分类
|
||||
if (0 <= h < 90) or (270 <= h <= 360):
|
||||
return 'warm'
|
||||
return 'cool'
|
||||
except Exception as e:
|
||||
print(f"颜色越界: {str(e)}")
|
||||
return 'neutral' # 出错时默认返回中性色
|
||||
|
||||
def determine_warm_tone(img_rgb):
|
||||
"""返回(暖色标签, 暖色比例, 冷色比例)"""
|
||||
palette = extract_color_palette(img_rgb)
|
||||
|
||||
warm_ratio, cool_ratio, neutral_ratio = 0.0, 0.0, 0.0
|
||||
for color, ratio in palette:
|
||||
category = classify_hsv_color(color)
|
||||
if category == 'warm':
|
||||
warm_ratio += ratio
|
||||
elif category == 'cool':
|
||||
cool_ratio += ratio
|
||||
else:
|
||||
neutral_ratio += ratio
|
||||
|
||||
return warm_ratio, cool_ratio, neutral_ratio
|
||||
# Sigmoid归一化
|
||||
return {
|
||||
'Poslm': 2 / (1 + np.exp(-poslm)) - 1,
|
||||
'Actlm': 2 / (1 + np.exp(-actlm)) - 1
|
||||
}
|
||||
|
||||
|
||||
def detect_human(img):
|
||||
"""检测人像"""
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
# 优化参数组合
|
||||
faces = face_cascade.detectMultiScale(
|
||||
gray,
|
||||
scaleFactor=1.02, # 减小缩放步长,增加检测粒度
|
||||
minNeighbors=5, # 提高邻居数要求,减少误检
|
||||
minSize=(50, 50), # 适配B站封面最小人脸尺寸
|
||||
flags=cv2.CASCADE_FIND_BIGGEST_OBJECT # 优先检测最大人脸
|
||||
)
|
||||
faces = face_cascade.detectMultiScale(gray, 1.05, 3)
|
||||
return len(faces) > 0
|
||||
|
||||
|
||||
@ -103,18 +110,24 @@ def process_url(url):
|
||||
if img is None:
|
||||
return None
|
||||
|
||||
# 颜色分析
|
||||
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
warm_ratio, cool_ratio, neutral_ratio = determine_warm_tone(img_rgb)
|
||||
features = analyze_image(img)
|
||||
affect = calculate_affect(features)
|
||||
|
||||
# 主色分析
|
||||
color_thief = ColorThief(BytesIO(requests.get(url).content))
|
||||
dominant_color = color_thief.get_color(quality=1)
|
||||
hsv_color = cv2.cvtColor(np.uint8([[dominant_color]]), cv2.COLOR_RGB2HSV)[0][0]
|
||||
warm = 1 if WARM_HUE_RANGE[0] <= hsv_color[0] <= WARM_HUE_RANGE[1] else 0
|
||||
|
||||
# 人像检测
|
||||
has_human = detect_human(img)
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'Portrait': int(detect_human(img)),
|
||||
'WarmRatio': round(warm_ratio, 3),
|
||||
'CoolRatio': round(cool_ratio, 3),
|
||||
'NeutralRatio': round(neutral_ratio, 3)
|
||||
**affect,
|
||||
'Warm': warm,
|
||||
'Portrait': int(has_human)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {url}: {str(e)}")
|
||||
return None
|
||||
@ -122,51 +135,42 @@ def process_url(url):
|
||||
|
||||
# 批量处理
|
||||
def batch_process(urls, workers=4):
|
||||
# 创建包含所有URL的初始结果列表
|
||||
results = [{'url': url, 'success': False} for url in urls]
|
||||
|
||||
with Pool(workers) as pool:
|
||||
processed = list(tqdm(pool.imap(process_url, urls),
|
||||
total=len(urls),
|
||||
desc="处理进度"))
|
||||
|
||||
# 按原始顺序更新成功处理的结果
|
||||
for i, res in enumerate(processed):
|
||||
if res is not None:
|
||||
results[i] = res
|
||||
results[i]['success'] = True
|
||||
|
||||
results = [res for res in pool.imap(process_url, urls) if res is not None]
|
||||
return pd.DataFrame(results)
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# # 读取URL列表
|
||||
# input_csv = "data_all_first_ver.csv"
|
||||
# #输出路径
|
||||
# os.makedirs('./result', exist_ok=True)
|
||||
# output_csv = "result/analysis_results.csv"
|
||||
#
|
||||
# #完整运行
|
||||
# 读取URL列表
|
||||
input_csv = "data_all.csv"
|
||||
#输出路径
|
||||
os.makedirs('./result', exist_ok=True)
|
||||
output_csv = "result/analysis_results.csv"
|
||||
|
||||
##完整运行
|
||||
# df = pd.read_csv(input_csv)
|
||||
# urls = df['视频封面URL'].tolist()
|
||||
# urls = df['视频封面'].tolist()
|
||||
#
|
||||
# # 执行分析
|
||||
# result_df = batch_process(urls)
|
||||
# result_df.to_csv(output_csv, index=False, encoding='utf-8-sig')
|
||||
# print(f"成功处理 {len(result_df)}/{len(urls)} 张图片")
|
||||
# print("分析完成!结果已保存至", output_csv)
|
||||
|
||||
#重新执行失败的url
|
||||
urls_failed=[
|
||||
'http://i1.hdslb.com/bfs/archive/5c42e0fa42ec945106d2e167253889e8a05541c9.jpg',
|
||||
'http://i1.hdslb.com/bfs/archive/d2ca3e3f4c543245715937bf643e98b55badcc21.jpg',
|
||||
'http://i0.hdslb.com/bfs/archive/2b1cf64d70bf2036793e33b2de3067344a7ff77d.jpg',
|
||||
'http://i0.hdslb.com/bfs/archive/123ddc4cdf429968fa416f78f4049a728e8da3ab.jpg',
|
||||
'http://i2.hdslb.com/bfs/archive/b07446d2176cec63d42f204504f4cda7a940b05b.jpg',
|
||||
]
|
||||
result_failed = batch_process(urls_failed)
|
||||
result_failed.to_csv('result/reanalyze.csv', index=False, encoding='utf-8-sig')
|
||||
#
|
||||
# # 合并原始数据
|
||||
# final_df = df.merge(result_df, left_on='视频封面')
|
||||
# final_df.drop('url', axis=1).to_csv(output_csv, index=False)
|
||||
|
||||
# 示例URL列表
|
||||
#小批量实验
|
||||
urls = [
|
||||
'http://i0.hdslb.com/bfs/archive/393a8e961b704d43256fe7e6c89fee04df966e17.jpg',
|
||||
'http://i0.hdslb.com/bfs/archive/072e16a1237040941f15b1ed67a8d1ebe6f2e041.jpg',
|
||||
'http://i2.hdslb.com/bfs/archive/1c56b5bec767c604175983cc5926f5832baa9bb8.jpg',
|
||||
'http://i0.hdslb.com/bfs/archive/66384e53a15345a539ccbb2989442f1d960b9235.jpg',
|
||||
'http://i2.hdslb.com/bfs/archive/836b762456f0b4d65dd2c40fc4cd120107e46b88.jpg',
|
||||
]
|
||||
result_df = batch_process(urls)
|
||||
result_df.to_csv("result/analysis_results.csv", index=False)
|
||||
print(f"成功处理 {len(result_df)}/{len(urls)} 张图片")
|
||||
|
||||
|
||||
print("分析完成!结果已保存至", output_csv)
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
1136
data_text_all/标题.txt
1136
data_text_all/标题.txt
File diff suppressed because it is too large
Load Diff
49
readme.md
49
readme.md
@ -1,5 +1,5 @@
|
||||
# 数据处理
|
||||
原数据文件+量化后文件两个文件
|
||||
|
||||
## 合并数据文件
|
||||
### 1. 合并热门数据
|
||||
- 数据文件
|
||||
@ -36,7 +36,7 @@
|
||||
- 视频类型:搬运0,自制1
|
||||
- 字幕: 无字幕为0,剩下为1
|
||||
- 视频总时长:输出小于60的,之间的,和大于600的,赋值为1,2,3,方便后续描述性分析
|
||||
- 弹幕情感评分(SentimentScore)=0.8*snowNLP+0.2*RoBERTa
|
||||
- 弹幕情感评分=0.8*snowNLP+0.2*RoBERTa
|
||||
### 删除不用指标
|
||||
- 发布时间等上述被处理过的指标(原播放量要保留)
|
||||
- 视频简介、标签
|
||||
@ -44,47 +44,4 @@
|
||||
### 数据清洗
|
||||
- 筛选极端弹幕情感评分,筛选出两种差值>0.3的人工检查
|
||||
- 缺失值处理(按总平均填入)
|
||||
- 去除异常值
|
||||
## 指标创新
|
||||
新增指标:
|
||||
- 弹幕情感评分snowNLP和RoBERTa及其加权平均作为最终评分(SentimentScore)
|
||||
- 标题情感效价(PosTe)和情感唤醒度(ActTe)
|
||||
- 封面:
|
||||
- 是否有人像(Portrait)
|
||||
- 暖色比例(WarmRatio)
|
||||
- 冷色比例(CoolRatio)
|
||||
- 中性色比例(NeutralRatio)
|
||||
### 弹幕情感评分
|
||||
弹幕情感倾向可以反映用户对视频的喜爱程度。此前我们爬取了每个视频的弹幕,为分析弹幕的情感趋向,我们设计了字典法和模型法两套计算方案,并且最终采用加权平均的方法求得最终值以提高结果的可信度。
|
||||
字典法采用SnowNLP库进行情感分析,得到的情感评分范围在0到1之间,其中0表示负面情感,1表示正面情感。
|
||||
字典法运算较为快速,同时也较为传统, 为适应B站弹幕的语言特点,我们手动增添了部分词汇并给予一定的情感赋值(以原词典中很好0.78,一般0.52,差0.14为标准),
|
||||
如"爷青回"0.9(我的青春回来了),"yyds"0.9(永远的神),"awsl"0.8(啊我死了(感动、可爱)),"2333"0.6(笑),"DNA动了"0.8(触发记忆),
|
||||
得到“弹幕情感评分snowNLP”指标。
|
||||
|
||||
模型法采用前人预训练好的RoBERTa模型进行情感分析,情感评分规则同上。
|
||||
RoBERTa是由...等人在2019年提出的改进版BERT模型,适用于文本分类和情感分析,具有一定的鲁棒性(引用论文)。
|
||||
在此基础上,我们采用开源的Erlangshen-Roberta-330M模型,其已在中文领域经过调整,拥有3.3亿个参数,在京东、微博评论等数据集上表现良好(引用论文), 因此较为适合B站的弹幕情感分析。
|
||||
由于弹幕数据量大,计算量很大,我们对单个视频弹幕量超过500的作均匀抽样处理(500条),并且借助学校高性能运算中心提供的平台进行计算~~虽然没什么用~~,得到"弹幕情感评分RoBERTa"指标。
|
||||
最终,我们采用加权平均的方法结合两种方法的结果,得到最终的弹幕情感评分(SentimentScore)。
|
||||
|
||||
### 标题文本的情感效价(PosTe)和情感唤醒度(ActTe)
|
||||
(验证了论文里的封面文本的情感效价和情感唤醒度的计算,发现OCR的识别效果并不好,不过函数编都编了,遂应用到视频标题上去。)
|
||||
|
||||
步骤:
|
||||
1. 定义见论文<<应急科普>>P37表格
|
||||
2. 情感效价PosTe的计算:
|
||||
使用NLPIR分词与情感词标注(Python的“cnsenti”包),统计文本中的积极、消极词汇数,然后 依据廖圣清、程 俊超等学者的做法以“积极词汇数/(积极词汇数+消极词汇数)”作为该条文本的情感效价。
|
||||
若文本中积极、消极词汇同时为0,将PosTe赋值为0.5,表示为中性.
|
||||
3. 情感唤醒度ActTe的计算:
|
||||
对分词结果对照大连理工情感词典(引用论文:徐琳宏,林鸿飞,潘宇,任惠,陈建美.情感词汇本体的构造.情报学报,2008,27(2):180-185)
|
||||
匹配到情感分类(小类),进一步匹配到情感大类, 即“哀”、"好"、“恶”、“乐”、“怒”、“惧”、“惊”七种情感,
|
||||
并借鉴廖圣清等学者的做法,根据情感唤醒度的强弱,分别赋值为1-7。
|
||||
对文本中反映该七种情感的字词出现的频数进行统计,将七种情感所包含的词汇数目分别乘以对应的情感唤醒程度的赋值,归一化后作为该封面文本的情感唤醒度。
|
||||
对标题文本匹配结果为0的,表明主要为中性词汇,将ActTe赋值为0.
|
||||
|
||||
### 视频封面处理
|
||||
视频的封面对其传播具有重要影响,但由于封面吸引力等因素具有主观性,以往的研究较为有限。
|
||||
在这里,我们提取了是否有人像、暖色比例、冷色比例、中性色比例四个客观指标辅助分析。
|
||||
我们使用了OpenCV库加载了预训练的人脸检测模型,并调用`detectMultiScale`方法进行人像检测。
|
||||
并使用改进的k-means聚类算法(MiniBatchK-means)提取封面主色,结合HSV色彩空间分类标准计算色调比例。
|
||||
|
||||
- 去除异常值
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
url,success,Portrait,WarmRatio,CoolRatio,NeutralRatio
|
||||
http://i1.hdslb.com/bfs/archive/5c42e0fa42ec945106d2e167253889e8a05541c9.jpg,False,,,,
|
||||
http://i1.hdslb.com/bfs/archive/d2ca3e3f4c543245715937bf643e98b55badcc21.jpg,False,,,,
|
||||
http://i0.hdslb.com/bfs/archive/2b1cf64d70bf2036793e33b2de3067344a7ff77d.jpg,True,1.0,0.178,0.414,0.408
|
||||
http://i0.hdslb.com/bfs/archive/123ddc4cdf429968fa416f78f4049a728e8da3ab.jpg,False,,,,
|
||||
http://i2.hdslb.com/bfs/archive/b07446d2176cec63d42f204504f4cda7a940b05b.jpg,False,,,,
|
|
79
temp.py
79
temp.py
@ -1,79 +0,0 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from snownlp import SnowNLP
|
||||
import os
|
||||
|
||||
def load_data(file_path):
|
||||
try:
|
||||
df = pd.read_csv(file_path, usecols=['弹幕内容'], engine='python')
|
||||
return df['弹幕内容'].dropna().astype(str).tolist()
|
||||
except Exception as e:
|
||||
print(f"数据加载失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def analyze_sentiment(danmu_texts):
|
||||
# 添加特殊词汇处理(以原词典中很好为0.78,一般为0.52,差为0.14为标准手动添加)
|
||||
special_cases = {
|
||||
# 高强度正能量词
|
||||
"爷青回": 0.9, # 情怀向
|
||||
"yyds": 0.9, # 永远滴神
|
||||
'YYDS': 0.9, # 永远滴神
|
||||
"awsl": 0.8, # 啊我死了(感动)
|
||||
'阿伟死了': 0.8, # 谐上(感动)
|
||||
"泪目": 0.8, # 感动场景
|
||||
"排面": 0.8, # 排场十足
|
||||
"双厨狂喜": 0.7, # 跨界联动
|
||||
"梦幻联动": 0.7, # 跨作品合作
|
||||
"注入灵魂": 0.7, # 高能片段
|
||||
"文艺复兴": 0.8, # 经典重现
|
||||
# 玩梗互动词
|
||||
"下次一定": 0.55, # 投币拖延梗
|
||||
"你币没了": 0.45, # 威胁不投币
|
||||
"空降成功": 0.5, # 跳片头
|
||||
"标准结局": 0.5, # 意料之中
|
||||
"典中典": 0.4, # 经典复读(含贬义)
|
||||
# 高能名场面
|
||||
"名场面": 0.85, # 经典片段
|
||||
"神仙打架": 0.9, # 高手对决
|
||||
"前方高能": 0.7, # 高潮预警
|
||||
# 数字谐音
|
||||
"666": 0.75, # 玩得厉害
|
||||
"999": 0.75, # 6翻了
|
||||
"2333": 0.6, # 笑
|
||||
# 抽象文化
|
||||
"草": 0.6, # 笑(中性)
|
||||
"生草": 0.65, # 搞笑场景
|
||||
# 破防场景
|
||||
"破防了": 0.4, # 心理防线崩溃
|
||||
"我裂开了": 0.3, # 心态炸裂
|
||||
# 特定领域梗
|
||||
"奥利给": 0.8, # 加油打气
|
||||
"DNA动了": 0.8, # 触发记忆
|
||||
"有内味了": 0.7, # 特色到位
|
||||
# 负向场景
|
||||
"公开处刑": 0.5, # 尴尬场面
|
||||
"阴间": 0.3, # 诡异内容
|
||||
"阴间滤镜": 0.3, # 画面诡异
|
||||
"血压上来了": 0.3 # 令人烦躁
|
||||
}
|
||||
sentiment_scores = []
|
||||
|
||||
for item in danmu_texts:
|
||||
if item in special_cases:
|
||||
sentiment_scores.append(special_cases[item])
|
||||
else:
|
||||
s = SnowNLP(item)
|
||||
sentiment_scores.append(s.sentiments)
|
||||
|
||||
avg_score = np.mean(sentiment_scores)
|
||||
return avg_score
|
||||
|
||||
# file_path='hot_data/亲子/BV1TLXVYREDt/BV1TLXVYREDt_287_danmaku.csv'
|
||||
# df = load_data(file_path)
|
||||
# scores=analyze_sentiment(df)
|
||||
# print(scores)
|
||||
|
||||
# 测试
|
||||
test_words = ['4']
|
||||
s = analyze_sentiment(test_words)
|
||||
print(s)
|
@ -157,5 +157,5 @@ def analyze_text(file_path):
|
||||
pynlpir.close() # 确保释放NLPIR资源
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path = 'data_all_second_ver.csv'
|
||||
file_path = 'data_all.csv'
|
||||
analyze_text(file_path)
|
Loading…
x
Reference in New Issue
Block a user