利用jieba进行分词,词云图展示
import jieba
from scipy.misc import imread
from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator
import matplotlib.pyplot as plt
import pandas as pd
def get_word():
df = pd.read_csv('weixiaobao_data/wen.txt', sep='\t')
word_list = df['MsgTitle'].tolist()
return word_list
def get_word_list(mylist):
word_list = [' '.join(jieba.cut(sen)) for sen in mylist]
new_text = ' '.join(word_list)
pic_path = 'buity.jpg'
im_mask = imread(pic_path)
wordcloud = WordCloud(background_color='white',
font_path=r'C:\Windows\Fonts\simfang.ttf',
mask=im_mask,
stopwords=STOPWORDS).generate(new_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
wordcloud.to_file('ciyun.jpg')
word_list = get_word()
get_word_list(word_list)
过程中常会出现的一个错误就是:
OSError: cannot open resource
这个是因为找不到你计算机上的字体而报错,需要你在字体的文件上看清字体的英文名称
选中点击右键-属性:
复制英文名称到font_path参数