statistics_model2025/word_frequence.py
2025-03-29 15:46:52 +08:00

25 lines
709 B
Python

import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
from PIL import Image
from collections import Counter
import pandas as pd # 新增pandas库
with open("2021-2024国一论文题目.txt", "r", encoding="utf-8") as file:
text1 = file.read()
seg_list1 = jieba.cut(text1)
word_list1 = " ".join(seg_list1)
# 统计词频
word_counts = Counter(word_list1.split())
# 按照词频从高到低排序
sorted_word_counts = word_counts.most_common()
# 创建DataFrame并保存Excel
df = pd.DataFrame(sorted_word_counts[:200], columns=['词语', '词频']) # 取前200个高频词
# 保存到Excel文件
df.to_excel("2021-2024国一论文题目.xlsx", index=False)