-
Notifications
You must be signed in to change notification settings - Fork 0
/
goog-fin-data.rb
128 lines (112 loc) · 3.4 KB
/
goog-fin-data.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
require 'nokogiri'
require 'restclient'
require 'csv'
require 'json'
def get_financials(company)
ticker = company[0]
exchange = company[1]['exchange']
page = Nokogiri::HTML(RestClient.get("https://www.google.com/finance?q=#{ticker.upcase}%3A#{ticker}&fstype=ii&ei=gSvXU6iWEIGiigK-nIHgCw"))
header_info = page.css("div,id-incannualdiv").css("table").css("thead")[1]
rev_info = page.css("div,id-incannualdiv").css("table").css("tbody").css("tbody")[1].css("tr")[2]
profit_info = page.css("div,id-incannualdiv").css("table").css("tbody").css("tbody")[1].css("tr")[4]
income_before_tax_info = page.css("div,id-incannualdiv").css("table").css("tbody").css("tbody")[1].css("tr")[16]
income_after_tax_info = page.css("div,id-incannualdiv").css("table").css("tbody").css("tbody")[1].css("tr")[17]
def convert_numbers(array)
result = []
array.size.times do | idx |
if idx == 0
result << array[idx]
else
result << array[idx].gsub(',','').to_f
end
end
result
end
def convert_dates(array)
result = []
array.size.times do | idx |
if idx == 0
result << array[idx].split(' ')[1]
else
result << array[idx][/(\d)+-(\d)+-(\d)+/]
end
end
result
end
def get_data(info, type)
result = []
info.css(type).each do | t |
result << t.text.strip
end
result
end
header = convert_dates(get_data(header_info, 'th'))
rev = convert_numbers(get_data(rev_info, 'td'))
profit = convert_numbers(get_data(profit_info, 'td'))
income_before_tax = convert_numbers(get_data(income_before_tax_info, 'td'))
income_after_tax = convert_numbers(get_data(income_after_tax_info, 'td'))
tax_paid = ['Taxes Paid']
effective_tax_rate = ['Effective Tax Rate']
income_before_tax.size.times do | idx |
unless idx == 0
tax_paid << income_before_tax[idx].to_f - income_after_tax[idx].to_f
rev[idx].to_f == 0 ? effective_tax_rate << 'NA' : effective_tax_rate << tax_paid[idx] / rev[idx].to_f * 100
end
end
data = [header, rev, profit, tax_paid, effective_tax_rate]
end
def convert_to_hash(data)
result = {}
result['units'] = data[0][0]
data[0].size.times do | idx |
unless idx == 0
year = data[0][idx][/(\d)+/]
result[year] = {}
result[year]['date'] = data[0][idx]
result[year]['revenue'] = data[1][idx]
result[year]['profit'] = data[2][idx]
result[year]['taxes'] = data[3][idx]
result[year]['etr'] = data[4][idx]
end
end
result
end
def make_dataset(stocks, file)
json = stocks
i = 0
json.each do | company |
i += 1
begin
data = get_financials(company)
json[company[0]] = convert_to_hash(data).merge(json[company[0]])
rescue
end
break if i == 5
end
File.open("data/#{file}-stock-data.json",'w') do | f |
f.write(json.to_json)
end
end
def companies(exchange)
result = {}
csv = CSV.foreach("data/#{exchange}.csv", :headers => TRUE) do | row |
result[row['Symbol']] = {}
result[row['Symbol']]['name'] = row['Name']
result[row['Symbol']]['market_cap'] = row['MarketCap']
result[row['Symbol']]['ipo_year'] = row['IPOyear']
result[row['Symbol']]['sector'] = row['sector']
result[row['Symbol']]['industry'] = row['industry']
result[row['Symbol']]['exchange'] = exchange
result[row['Symbol']]['exchange2'] = 'amex' if exchange == 'nysemkt'
end
result
end
def main
nasdaq = companies('nasdaq')
amex = companies('nysemkt')
nyse = companies('nyse')
make_dataset(nyse, 'nyse')
make_dataset(amex, 'nysemkt')
make_dataset(nasdaq, 'nasdaq')
end
main