-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdata_analysis.py
147 lines (124 loc) · 4.95 KB
/
data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: utf-8 -*-
# author: inspurer(月小水长)
# pc_type lenovo
# create_date: 2019/3/2
# file_name: data_analysis.py
# github https://github.com/inspurer
# qq_mail [email protected]
# 微信公众号 月小水长(ID: inspurer)
from configure import *
from pymongo import MongoClient
import matplotlib.pyplot as plt
import matplotlib
# 设置中文字体和负号正常显示
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
class dataAnalysis(object):
def __init__(self):
self.client = MongoClient(MONGO_URL, connect=False)
self.db = self.client[MONGO_DB]
self.table = self.db[MONGO_TABLE]
self.raw_data = self.table.find()
print(self.raw_data.count())
# for result in results:
# print(result)
pass
def age_counter(self):
self.age_dict = dict()
for line in self.raw_data:
age = line.get('age')
if age not in self.age_dict.keys():
self.age_dict[age] = 1
else:
self.age_dict[age] +=1
print(self.age_dict)
def wordcloud_util(self):
all_sign = ""
for line in self.table.find():
sign = line.get('nickname')
all_sign = all_sign + sign + "\n"
if os.path.exists("wc/sign.txt"):
os.remove("wc/sign.txt")
with open("wc/sign.txt","a+",encoding="utf-8") as f:
f.write(all_sign)
def data_viewer(self):
age_list = sorted((list)(self.age_dict.keys()))
number =list()
for age in age_list:
number.append(self.age_dict.get(age))
x = range(len(number))
"""
绘制条形图
left: 长条形中点横坐标
height: 长条形高度
width: 长条形宽度,默认值0
.8
label: 为后面设置legend准备
"""
rects1 = plt.bar(x=x, height=number, width=0.5, alpha=0.8, color='#1ABDE6', label="直方图")
plt.plot(age_list,number,color='#0066ff',label="折线图",linewidth = 1,marker='*',markersize=10)
# plt.ylim(0, 50) # y轴取值范围
plt.ylabel("人数/人")
"""
设置x轴刻度显示值
参数一:中点坐标
参数二:显示值
"""
plt.xticks(x, age_list)
plt.xlabel("年龄")
plt.title("uki 活跃用户各年龄人数直方-折线图")
plt.legend() # 设置题注 # 编辑文本
for rect in rects1:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2, height + 1, str(height), ha="center", va="bottom")
#plt.show()
plt.figure()
# 饼状图
labels = ['10-15岁','15-20岁','20-25岁','25-30岁','其它']
ages_dict = dict.fromkeys(labels,0)
for key in self.age_dict.keys():
age = int(key)
if age >= 10 and age < 15:
ages_dict['10-15岁'] += self.age_dict.get(key)
elif age >= 15 and age < 20:
ages_dict['15-20岁'] += self.age_dict.get(key)
elif age >= 20 and age < 25:
ages_dict['20-25岁'] += self.age_dict.get(key)
elif age >= 25 and age < 30:
ages_dict['25-30岁'] += self.age_dict.get(key)
else:
ages_dict['其它'] += self.age_dict.get(key)
total = self.raw_data.count()
explode = [0 for i in range(len(labels))] # 0.1 凸出这部分,
max = 0
index = 0
percentage = []
for i,label in enumerate(labels):
if ages_dict[label] > max:
max = ages_dict[label]
index = i
percentage.append(ages_dict[label]/total)
explode[index] = 0.1
plt.axes(aspect=1) # set this , Figure is round, otherwise it is an ellipse
# autopct ,show percet
plt.pie(x=percentage, labels=labels, explode=explode, autopct='%3.1f %%',
shadow=True, labeldistance=1.1, startangle=90, pctdistance=0.6
)
plt.legend(loc='best')
plt.title("uki 活跃用户年龄段比例图")
'''
labeldistance,文本的位置离远点有多远,1.1指1.1倍半径的位置
autopct,圆里面的文本格式,%3.1f%%表示小数有三位,整数有一位的浮点数
shadow,饼是否有阴影
startangle,起始角度,0,表示从0开始逆时针转,为第一块。一般选择从90度开始比较好看
pctdistance,百分比的text离圆心的距离
patches, l_texts, p_texts,为了得到饼图的返回值,p_texts饼图内部文本的,l_texts饼图外label的文本
'''
plt.show()
def main(self):
self.age_counter()
self.wordcloud_util()
self.data_viewer()
if __name__ == '__main__':
dataanalysis = dataAnalysis()
dataanalysis.main()