-
Notifications
You must be signed in to change notification settings - Fork 0
/
jatosAPI.py
264 lines (204 loc) · 8.58 KB
/
jatosAPI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import requests
from datetime import datetime, timedelta
import zipfile
import os
import numpy as np
import pandas as pd
import json
import shutil
import subprocess
# jap_5ThOJ14yf7z1EPEUpAoZYMWoETZcmJk305719
def parse_cmd():
#parse command line for TEASE var
import argparse
parser = argparse.ArgumentParser(description='API File to Pull Subject Data from Jatos')
parser.add_argument('-t', type=str, help='TEASE')
parser.add_argument('-a', type=str, help="toke")
return parser.parse_args()
def get_met(tease):
proxies = {
'http': f'http:zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
'https': f'http://zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
}
url = 'https://jatos.psychology.uiowa.edu/jatos/api/v1/results/metadata'
headers = {
'accept': 'application/json',
'Authorization': 'Bearer jap_5ThOJ14yf7z1EPEUpAoZYMWoETZcmJk305719',
'Content-Type': 'application/json',
}
data = {
'studyIds': [955, 970, 993, 916, 926, 942]
}
response = requests.post(url, headers=headers, json=data, proxies=proxies)
# If you want to print the response
print(response.status_code)
print(response.json())
response_json = response.json()
response = response_json
# Get the current timestamp
current_time = datetime.now().timestamp() * 1000 # Convert to milliseconds
one_day_ago = current_time - 35 * (24 * 60 * 60 * 1000) # 24 hours ago in milliseconds
# Initialize an empty list to store study result IDs
study_result_ids = []
# Iterate through the data to check conditions and collect study result IDs
for study in response['data']:
for study_result in study['studyResults']:
if study_result['studyState'] == 'FINISHED' and study_result['endDate'] >= one_day_ago:
study_result_ids.append(study_result['id'])
break # No need to check other component results for this study result
# Print the list of study result IDs
print(study_result_ids)
if len(study_result_ids) == 0:
print("No study results found.")
exit()
return study_result_ids
def get_data(study_result_ids, tease):
proxies = {
'http': f'http:zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
'https': f'http://zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
}
headers = {
'accept': 'application/octet-stream',
'Authorization': 'Bearer jap_5ThOJ14yf7z1EPEUpAoZYMWoETZcmJk305719',
'Content-Type': 'application/json',
}
# Get the data for each study result
datas = {
'studyIds': [955, 970, 993, 916, 926, 942],
'studyResultIds': study_result_ids
}
url = 'https://jatos.psychology.uiowa.edu/jatos/api/v1/results/data'
response = requests.post(url, headers=headers, json=datas, proxies=proxies)
# Debugging information
print(f"Status Code: {response.status_code}")
# Save the unzip file and save .txt file to the current directory
if response.status_code == 200:
jrzip_file = 'response.jrzip'
with open(jrzip_file, 'wb') as f:
f.write(response.content)
print(f"Downloaded file: {jrzip_file}")
# Verify if the file is a valid zip file
if zipfile.is_zipfile(jrzip_file):
print("The file is a valid zip file.")
# Create a new zip file with only the desired files
filtered_jrzip_file = 'filtered_response.jrzip'
with zipfile.ZipFile(jrzip_file, 'r') as zip_ref:
with zipfile.ZipFile(filtered_jrzip_file, 'w') as filtered_zip_ref:
for zip_info in zip_ref.infolist():
# Check if the filename contains any of the study_result_ids
if any(str(study_result_id) in zip_info.filename for study_result_id in study_result_ids):
filtered_zip_ref.writestr(zip_info, zip_ref.read(zip_info.filename))
print(f"Filtered zip file created: {filtered_jrzip_file}")
# Extract the filtered zip file
with zipfile.ZipFile(filtered_jrzip_file, 'r') as zip_ref:
zip_ref.extractall('./data/raw')
print(f"Unzipped file: {filtered_jrzip_file}")
# Optionally, remove the original and filtered zip files after extraction
os.remove(jrzip_file)
os.remove(filtered_jrzip_file)
# Walk through the directory and find all .txt files, save paths to a list
txt_files = []
for root, dirs, files in os.walk("./data/raw"):
for file in files:
if file.endswith(".txt"):
txt_files.append(os.path.join(root, file))
print(f"Found {len(txt_files)} .txt files.")
#move the text file to the data folder
else:
print("The file is not a valid zip file.")
else:
print("Failed to retrieve or save the file.")
print(f"Response Text: {response.text}")
return txt_files
def get_next_run_dir(sub):
base_dir = f'./data/{sub}/processed'
i = 1
while os.path.exists(os.path.join(base_dir, f'run-{i}')):
i += 1
return os.path.join(base_dir, f'run-{i}')
def convert_beh():
txt_files = []
for root, dirs, files in os.walk('./data/raw'):
for file in files:
if file.endswith(".txt"):
txt_files.append(os.path.join(root, file))
print(f"Found text files: {txt_files}")
dic = {}
for idx, b in enumerate(txt_files, start=1):
tweets = []
with open(b, 'r') as file:
for line in file:
tweets.append(json.loads(line))
dic[idx] = pd.json_normalize(tweets, 'data')
print("Data dictionaries created.")
all_paths = []
for i in dic:
df = dic[i]
for sub in np.unique(df['subject_id']):
print(f"Processing subject: {sub}")
# Filter data for this subject
sub_df = df[df['subject_id'] == sub]
# Get next run directory
run_dir = get_next_run_dir(sub)
os.makedirs(run_dir, exist_ok=True)
# Build the CSV file path
csv_filename = f"{sub}_{sub_df['task'].iloc[0]}_{sub_df['task_vers'].iloc[0]}.csv"
csv_path = os.path.join(run_dir, csv_filename)
# Save CSV
sub_df.to_csv(csv_path, index=False)
print(f"Saved {csv_path}")
all_paths.append(csv_path)
return all_paths
def move_txt(txt_files):
dic = {}
for file_path in txt_files:
tweets = []
with open(file_path, 'r') as file:
# Read text file and append each line as a JSON object to tweets
for line in file:
tweets.append(json.loads(line))
dic[file_path] = pd.json_normalize(tweets, 'data')
for file_path, df in dic.items():
for sub in np.unique(df['subject_id']):
print(sub)
target_dir = f'./data/{sub}/raw'
os.makedirs(target_dir, exist_ok=True)
# Save the DataFrame to a text file in the target directory
output_file = os.path.join(target_dir, os.path.basename(file_path))
with open(output_file, 'w') as f:
f.write(df.to_string(index=False))
print(f"Saved {output_file} to {target_dir}")
os.remove(file_path)
print(f"Removed {file_path}")
# Move the directory removal outside the loop
for root, dirs, files in os.walk('./data/raw'):
for d in dirs:
shutil.rmtree(os.path.join(root, d))
# Optionally, remove the raw directory itself
os.rmdir('./data/raw')
return None
def push(toke):
#use the folder name as task
task = os.path.basename(os.getcwd())
subprocess.run(['git', 'config', 'user.email', '[email protected]'])
subprocess.run(['git', 'remote', 'set-url', 'origin', f'https://miloswrath:{toke}@github.com/HBClab/{task}'])
subprocess.run(['git', 'config', 'user.name', 'miloswrath'])
subprocess.run(['git', 'add', '.'])
subprocess.run(['git', 'commit', '-m', 'Automated Commit -> New Data'])
subprocess.run(['git', 'push', 'origin', 'main'])
def main():
args = parse_cmd()
tease = args.t
toke = args.a
study_result_ids = get_met(tease)
get_data(study_result_ids, tease)
convert_beh()
txt_files = []
for root, dirs, files in os.walk('./data/raw'):
for file in files:
if file.endswith(".txt"):
txt_files.append(os.path.join(root, file))
move_txt(txt_files)
push(toke)
if __name__ == "__main__":
main()