使用Power BI 、pandas做微信好友的数据分析
最近学习了使用Power BI、pandas相关知识,一直没有什么好的想法来实践一下,看了下之前写的 一片查看微信好友男女比例的文章觉得还可以再继续扩展一下,于是乎有了下文,废话不多说,直接开始。
环境准备:Power BI软件、pandas、itchat、plotly模块,各位可自行百度安装。
大致流程:step1 获取微信好友数据 step2 清洗、整理数据 step3 使用Power BI、plotly实现数据可视化。
step1 获取微信好友数据,代码如下:
import itchat #导入itchat
login = itchat.login() #微信登录
friends = itchat.get_friends()
print(friends) #获取好友数据并打印结果
结果如图所示:我们获取的数据是一个list型,我们要的数据是从第二个开始的,这些数据内容包括了微信号有的昵称、性别、所在城市、省份、备注名称、昵称、座右铭等等,然后从中获取我需要的数据,并以字典形式保存。代码如下:
data = {} #新建一个空字典 '''这里新建的几个空列表并准备作为data中的value值''' IsOwner = [] HideInputBarFlag = [] Sex = [] Province = [] RemarkName = [] PYQuanPin = [] City = [] Signature = [] NickName = []
for i in friends[1:]: #print(i) IsOwner.append(i['IsOwner']) HideInputBarFlag.append(i['HideInputBarFlag']) Sex.append(i['Sex']) Province.append(i['Province']) RemarkName.append(i['RemarkName']) PYQuanPin.append(i['PYQuanPin']) City.append(i['City']) Signature.append(i['Signature']) NickName.append(i['NickName']) #print(type(IsOwner)) #print(IsOwner) data['IsOwner'] = IsOwner data['HideInputBarFlag'] = HideInputBarFlag data['Sex'] = Sex data['Province'] = Province data['RemarkName'] = 'RemarkName' data['PYQuanPin'] = 'PYQuanPin' data['City'] = City data['Signature'] = Signature data['NickName'] = NickName print(data)#打印结果
这样我们就把微信好友所在省份、城市、性别等获取了下来,并以字典形式保存。
step2 数据保存
import pandas as pd data2 = pd.DataFrame(data) #使用DataFrame创建二维数组 print(data2) data2.to_csv('123.csv')把数据保存成csv格式
保存的csv数据如下所示:
step3 数据分析及可视化
查看每个城市有多少好友
import pandas as pd from numpy import nan as NaN import numpy as np
data = pd.read_csv('./123.csv',encoding = 'utf-8') #读取保存的csv文件 print(data.head(5)) #获取前五行数据
friends_number = data.groupby('City').count()['Sex'] #根据City进行分组,查看每个城市有多少名好友 print(friends_number)
这里使用Power BI来做个可视图查看好友城市分布以及好友位置在国内分布情况,图中可以看出我的朋友多分布在江浙沪皖一带,期中杭州朋友最多。
直方图
查看微信男女比例:
d = data['Sex'].values print(d) u,indices = np.unique(d,return_counts=True) print(u) print(indices) #打印去重元素的去重数量 other = indices[0]#其他 male = indices[1] #男性 female = indices[2] #女性 #print(female) my_font= matplotlib.font_manager.FontProperties(fname=r'C:\Windows\Fonts\simsun.ttc',size = 18) labels = ['其他','男性','女性'] layout = go.Layout(title='我的微信好友男女比例图') trace = [go.Pie(labels=labels, values=indices, hole=0.5, textinfo='label+value+percent')] config = go.Figure(data=trace, layout=layout) py.offline.plot(config, filename='男女比例.html')
结果如下:微信好友中男性占62.3%,女性占30.6。
以上就是我对自己好友的一些简单分析,大家可以看看啊,明天又要上班了,祝大家工作愉快!ps:全部代码如下。 获取数据: # -*- coding:utf-8 -*- import pandas as pd import itchat import matplotlib as mpl login = itchat.login() data = {} IsOwner = [] HideInputBarFlag = [] Sex = [] Province = [] RemarkName = [] PYQuanPin = [] City = [] Signature = [] NickName = [] friends = itchat.get_friends() print(friends) for i in friends[1:]: #print(i) IsOwner.append(i['IsOwner']) HideInputBarFlag.append(i['HideInputBarFlag']) Sex.append(i['Sex']) Province.append(i['Province']) RemarkName.append(i['RemarkName']) PYQuanPin.append(i['PYQuanPin']) City.append(i['City']) Signature.append(i['Signature']) NickName.append(i['NickName']) #print(type(IsOwner)) #print(IsOwner) data['IsOwner'] = IsOwner data['HideInputBarFlag'] = HideInputBarFlag data['Sex'] = Sex data['Province'] = Province data['RemarkName'] = 'RemarkName' data['PYQuanPin'] = 'PYQuanPin' data['City'] = City data['Signature'] = Signature data['NickName'] = NickName print(data) data2 = pd.DataFrame(data) print(data2) data2.to_csv('123.csv')
数据读取及可视化:
import pandas as pd from numpy import nan as NaN import numpy as np import matplotlib.pyplot as plt import matplotlib from matplotlib import font_manager import plotly as py import plotly.graph_objs as go data = pd.read_csv('./123.csv',encoding = 'utf-8') print(data.head(5)) #print(data.describe()) friends_number = data.groupby('City').count()['Sex'] #根据City进行分组,查看每个城市有多少名好友 print(friends_number) Signature = data['Signature'] print(Signature) print(Signature.dropna(how = 'all')) #滤除全为Nan的行 # male = data.groupby('Sex').count() # print(male) #print(data.groupby('City').count()['Sex']) # d = data['Sex'].values # print(d) # u,indices = np.unique(d,return_counts=True) # print(u) # print(indices) #打印去重元素的去重数量 # # other = indices[0] # male = indices[1] #男性 # female = indices[2] #女性 # #print(female) # # # my_font= matplotlib.font_manager.FontProperties(fname=r'C:\Windows\Fonts\simsun.ttc',size = 18) # labels = ['其他','男性','女性'] # layout = go.Layout(title='我的微信好友男女比例图') # trace = [go.Pie(labels=labels, values=indices, hole=0.5, textinfo='label+value+percent')] # config = go.Figure(data=trace, layout=layout) # py.offline.plot(config, filename='男女比例.html')