25 lines
709 B
Python
25 lines
709 B
Python
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from wordcloud import WordCloud
|
|
import jieba
|
|
from PIL import Image
|
|
from collections import Counter
|
|
import pandas as pd # 新增pandas库
|
|
|
|
with open("2021-2024国一论文题目.txt", "r", encoding="utf-8") as file:
|
|
text1 = file.read()
|
|
|
|
seg_list1 = jieba.cut(text1)
|
|
word_list1 = " ".join(seg_list1)
|
|
|
|
# 统计词频
|
|
word_counts = Counter(word_list1.split())
|
|
|
|
# 按照词频从高到低排序
|
|
sorted_word_counts = word_counts.most_common()
|
|
|
|
# 创建DataFrame并保存Excel
|
|
df = pd.DataFrame(sorted_word_counts[:200], columns=['词语', '词频']) # 取前200个高频词
|
|
|
|
# 保存到Excel文件
|
|
df.to_excel("2021-2024国一论文题目.xlsx", index=False) |