statistics_model2025/word_cloud.py

32 lines
738 B
Python

import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
import pandas as pd
from PIL import Image
file_path = 'hot_data/搞笑/BV1BgQBY3EcE/BV1BgQBY3EcE_360_danmaku.csv'
text1 = pd.read_csv(file_path, usecols=['弹幕内容']) # 明确指定列名
# 将弹幕内容合并为字符串
text = ' '.join(text1['弹幕内容'].dropna().astype(str))
# 分词处理
seg_list = jieba.cut(text)
word_list = " ".join(seg_list)
# 生成词云
wc = WordCloud(
font_path="simhei.ttf", # 指定中文字体
width=800,
height=600,
background_color="white"
).generate(word_list)
# 显示词云
plt.figure(figsize=(10, 8))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.show()