1
- import spacy
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib .pyplot as plt
4
+ import seaborn as sns
2
5
3
- nlp = spacy . load ( "en_core_web_sm" )
6
+ def main ():
4
7
8
+ s = pd .Series (np .random .randn (10 ).cumsum (), index = np .arange (0 ,100 ,10 ))
9
+ s .plot ()
10
+ plt .show ()
5
11
6
- def anonymize_text (sentences ):
7
- answer = ""
8
- words = nlp (sentences )
12
+ df = pd .DataFrame (np .random .randn (10 ,4 ).cumsum (0 ),
13
+ columns = ['A' ,'B' ,'C' ,'D' ],
14
+ index = np .arange (0 ,100 ,10 ))
15
+ df .plot ()
16
+ plt .show ()
9
17
10
- for word in words :
11
- if word .tag_ == "NNP" :
12
- print ("aaa" )
13
- answer += word .tag_ + " "
18
+ fig , axes = plt .subplots (2 ,1 )
19
+ data = pd .Series (np .random .rand (16 ), index = list ('abcdefghijklmnop' ))
20
+ data .plot .bar (ax = axes [0 ], color = 'k' , alpha = 0.7 )
21
+ data .plot .barh (ax = axes [1 ], color = 'k' , alpha = 0.7 )
22
+ plt .show ()
14
23
15
- return answer
24
+ df = pd .DataFrame (np .random .rand (6 ,4 ),
25
+ index = ['one' ,'two' ,'three' ,'four' ,'five' ,'six' ],
26
+ columns = pd .Index (['A' ,'B' ,'C' ,'D' ], name = 'Genus' ))
27
+ print (df )
28
+ df .plot .bar ()
29
+ plt .show ()
16
30
31
+ df .plot .barh (stacked = True , alpha = 0.5 )
32
+ plt .show ()
17
33
18
- anonymize_text ("John ate an apple Oh John" )
34
+ tips = pd .read_csv ('LEC/examples/tips.csv' )
35
+ party_counts = pd .crosstab (tips ['day' ], tips ['size' ])
36
+ print (party_counts )
37
+ party_counts = party_counts .loc [:, 2 :5 ]
38
+ party_pcts = party_counts .div (party_counts .sum (1 ), axis = 0 )
39
+ print (party_pcts )
40
+
41
+ party_pcts .plot .bar ()
42
+ plt .show ()
43
+
44
+ tips = pd .read_csv ('LEC/examples/tips.csv' )
45
+ tips ['tip_pct' ] = tips ['tip' ] / (tips ['total_bill' ] - tips ['tip' ])
46
+ print (tips .head ())
47
+ sns .barplot (x = 'tip_pct' , y = 'day' , data = tips , orient = 'h' )
48
+ plt .show ()
49
+
50
+ sns .barplot (x = 'tip_pct' , y = 'day' , hue = 'time' , data = tips , orient = 'h' )
51
+ sns .set (style = 'whitegrid' )
52
+ plt .show ()
53
+
54
+ tips = pd .read_csv ('LEC/examples/tips.csv' )
55
+ tips ['tip_pct' ] = tips ['tip' ] / (tips ['total_bill' ] - tips ['tip' ])
56
+ tips ['tip_pct' ].plot .hist (bins = 50 )
57
+ plt .show ()
58
+
59
+ tips ['tip_pct' ].plot .density ()
60
+ plt .show ()
61
+
62
+ tips ['tip_pct' ].plot .kde ()
63
+ plt .show ()
64
+
65
+ comp1 = np .random .normal (0 ,1 ,size = 200 )
66
+ comp2 = np .random .normal (10 ,2 ,size = 200 )
67
+
68
+ values = pd .Series (np .concatenate ([comp1 , comp2 ]))
69
+ sns .distplot (values , bins = 100 , color = 'k' )
70
+ plt .show ()
71
+
72
+ macro = pd .read_csv ('LEC/examples/macrodata.csv' )
73
+ data = macro [['cpi' , 'm1' , 'tbilrate' , 'unemp' ]]
74
+ trans_data = np .log (data ).diff ().dropna ()
75
+ print (trans_data [- 5 :])
76
+
77
+ sns .regplot ('m1' , 'unemp' , data = trans_data )
78
+ plt .title ('Changes in log %s versus log %s' % ('m1' , 'unemp' ))
79
+ plt .show ()
80
+
81
+ sns .pairplot (trans_data , diag_kind = 'kde' , plot_kws = {'alpha' :0.2 })
82
+ plt .show ()
83
+
84
+ tips = pd .read_csv ('LEC/examples/tips.csv' )
85
+ tips ['tip_pct' ] = tips ['tip' ] / (tips ['total_bill' ] - tips ['tip' ])
86
+ sns .factorplot (x = 'day' ,y = 'tip_pct' , hue = 'time' , col = 'smoker' , kind = 'bar' , data = tips [tips .tip_pct < 1 ])
87
+ plt .show ()
88
+
89
+ sns .factorplot (x = 'day' , y = 'tip_pct' , row = 'time' , col = 'smoker' , kind = 'bar' , data = tips [tips .tip_pct < 1 ])
90
+ plt .show ()
91
+ sns .factorplot (x = 'tip_pct' , y = 'day' , kind = 'box' , data = tips [tips .tip_pct < 0.5 ])
92
+ plt .show ()
93
+
94
+ if __name__ == '__main__' : # main()
95
+ main ()
0 commit comments