-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
165 lines (130 loc) · 5.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import functions_framework
import json
from google.cloud import storage
import pandas as pd
import re
# Global variables
BUCKET_NAME = 'gadoe_data'
FILE_NAME = 'gshs/GSHS_2024_{survey_type}_629.xlsx'
cached_data = None
survey_types = ['ES', 'MSHS', 'Parents', 'Personnel']
def get_data_from_storage():
global cached_data
if cached_data is not None:
print("Using cached data")
return cached_data
try:
print("Reading data from storage")
storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET_NAME)
cached_data = {}
for survey_type in survey_types:
print(f"loading survey type: {survey_type}")
file = FILE_NAME.format(survey_type=survey_type)
blob = bucket.blob(file)
contents = blob.download_as_bytes()
cached_data[survey_type] = pd.read_excel(contents) # Use pandas to parse Excel
except Exception as e:
print(f"Error reading from storage: {e}")
return "Internal Server Error", 500
finally:
return cached_data
def list_questions(survey_data, parms):
print(f"listing questions: ")
survey_type = parms['survey_type']
data = survey_data[survey_type]
pattern = r'^\d+\. .+'
for index, string in enumerate(list(data.columns)):
if re.match(pattern, string):
return list(survey_data[survey_type].columns)[index:]
return list(survey_data[survey_type].columns)[10:]
def tabulate_question(data, parms):
print(f"tabulating question: {parms['question']}")
try:
survey_data = data[parms['survey_type']]
print(f"{survey_data=}")
question = parms['question']
list_of_questions = list_questions(data, parms)
for idx, q in enumerate(list_of_questions):
if question in q:
question = q
print(f"found question: {q}")
break
# question = "5. I get along with other students at school."
by_category = parms['by_category']
if by_category == 'School':
category = ' SchoolName'
all_category = 'All Schools'
elif by_category == 'Ethnicity':
category = 'Ethnicity'
all_category = 'All Ethnicities'
elif by_category == 'Gender':
category = 'Gender'
all_category = 'All Genders'
else:
category = ' SchoolName'
print(f"tabulating data for question: {question} and category: {category}")
#crosstable to get the counts
transformed_df = pd.crosstab(survey_data[category], survey_data[question])
print(transformed_df.head())
transformed_df.reset_index(inplace=True)
transformed_df.columns.name = None
# Calculate the sums of the response columns
column_names = [str(col) for col in transformed_df.columns[1:]]
sums = transformed_df[column_names].sum()
# Create a new row with the sums
all_category_row = pd.DataFrame([[all_category] + sums.tolist()], columns=transformed_df.columns)
# Append the new row to the original DataFrame
df_with_all_categories = pd.concat([all_category_row, transformed_df], ignore_index=True)
# Add a 'Totals' column
df_with_all_categories['Totals'] = df_with_all_categories[column_names].sum(axis=1)
except Exception as e:
print(f"Error tabulating question: {e}")
return "Internal Server Error", 500
finally:
return df_with_all_categories
@functions_framework.http
def service(request):
"""HTTP Cloud Function.
Args:
request (flask.Request): The request object.
<https://flask.palletsprojects.com/en/1.1.x/api/#incoming-request-data>
Returns:
The response text, or any set of values that can be turned into a
Response object using `make_response`
<https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response>.
"""
print("servicing request ... ")
request_json = request.get_json(silent=True)
request_args = request.args
path = request.path
print(f"request path: {path}")
print(request_json)
data = get_data_from_storage()
# print(data.columns)
try:
name = None
if request_json and 'name' in request_json:
name = request_json['name']
elif request_args and 'name' in request_args:
name = request_args['name']
elif request_json and 'action' in request_json:
print(f"the action is: {request_json['action']}")
if request_json['action'] == 'tabulate':
print("in tabulate action ... ")
df = tabulate_question(data, request_json)
print(f"the length of the tabulated dataframe: {df.index.size}")
print(json.dumps(df.to_json(orient='records')))
return json.dumps({'data_table': df.to_json(orient='records')})
elif request_json['action'] == 'list':
print("in list action ... ")
questions = list_questions(data, request_json)
print(f"the length of the questions: {len(questions)}")
print(json.dumps(questions))
return json.dumps({'questions': questions})
else:
name = 'World'
return json.dumps({'message': f'Hello {name}!'})
except Exception as e:
print(f"Error processing request: {e}")
return json.dumps({'error': str(e)})