Skip to content

Commit

Permalink
added good stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
miloswrath committed Sep 11, 2024
1 parent 9a568c0 commit 9f5cdb9
Show file tree
Hide file tree
Showing 3 changed files with 399 additions and 0 deletions.
103 changes: 103 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
name: Task1 QC
on:
push:
branches:
- main

jobs:
process_raw:
runs-on: self-hosted
outputs:
sub: ${{ steps.set_vars.outputs.sub }}
task: ${{ steps.set_vars.outputs.task }}
version: ${{ steps.set_vars.outputs.version }}

steps:
- name: checkout code and return recently uploaded file in /data
uses: actions/checkout@v3
- name: Get changed files
run: |
#!/bin/bash
# Get the list of CSV files changed in the last 24 hours
data=$(git log --since="24 hours ago" --name-only --pretty=format: -- '*.csv' | sort | uniq)
# Export the data variable to the environment
echo "data=$data" >> $GITHUB_ENV
# Print the changed CSV files
echo "Changed CSV files in the last 24 hours: $data"
- name: set up python
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: parse raw
id: set_vars
run: |
# Loop through each CSV file in $data
for file in $data; do
filename=$(basename "$file")
IFS='_' read -r sub task version <<< "$filename"
version="${version%.csv}" # Remove the .csv extension from version
echo "::set-output name=sub::$sub"
echo "::set-output name=task::$task"
echo "::set-output name=version::$version"
echo "Subject: $sub"
echo "Task: $task"
echo "Version: $version"
done
run_qc:
runs-on: self-hosted
needs: process_raw
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Debug env vars
run: |
echo "sub=${{ needs.process_raw.outputs.sub }}"
echo "task=${{ needs.process_raw.outputs.task }}"
echo "version=${{ needs.process_raw.outputs.version }}"
- name: run quality control
run: |
sub=${{ needs.process_raw.outputs.sub }}
task=${{ needs.process_raw.outputs.task }}
vers=${{ needs.process_raw.outputs.version }}
for sub in ${sub}; do
echo "Processing subject: $sub"
for task in ${task}; do
echo "Processing task: $task"
for vers in ${vers}; do
echo "Processing version: $vers"
csv_file="./data/${sub}/processed/${sub}_${task}_${vers}.csv"
log_file="./data/${sub}/qc_${task}_${vers}.log"
echo "CSV file: $csv_file"
echo "Log file: $log_file"
if [ -f "$csv_file" ]; then
python ./code/ATSqC.py -s "$csv_file" -o "./data/${sub}/" -sub "$sub" | tee "$log_file"
echo "QC for ${sub}_${task}_${vers} running"
else
echo "CSV file $csv_file does not exist"
fi
done
done
done
push:
runs-on: self-hosted
needs: run_qc
steps:
- name: Commit and Push Changes
run: |
git config --global user.name "miloswrath"
git config --global user.email "[email protected]"
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
git add .
git commit -m "Automated commit by GitHub Actions"
git push
env:
GITHUB_TOKEN: ${{ secrets.GIT_PAT }}
71 changes: 71 additions & 0 deletions code/ConvertBeh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# %%
import pandas as pd
import numpy as np
import os
import json

def parse_args():
import argparse
parser = argparse.ArgumentParser(description='Convert Behavior Data')
parser.add_argument('-submission', type=str, help='Path to the submission file')
parser.add_argument('-out', type=str, help='Output directory')
parser.add_argument('-sub', type=str, help='Subject ID')
parser.add_argument('-task', type=str, help='Task name')
parser.add_argument('-taskvers', type=str, help='Task version')
return parser.parse_args()


def rename_files(submission, subject, task, taskvers):
#rename raw text files to be subject_task_taskvers.txt
for root, dirs, files in os.walk(submission):
for file in files:
if file.endswith(".txt"):
os.rename(os.path.join(root, file), os.path.join(root, f"{subject}_{task}_{taskvers}.txt"))
submission = os.path.join(root, 'raw', f"{subject}_{task}_{taskvers}.txt")
print(submission)
return submission




def convert_beh(submission, out):

if not os.path.isfile(submission):
print(f"file does not exist: {submission}")

# Use list_txt to store one file since I don't want to screw with Marco's code


count = 0
dic = {}

count += 1
tweets = []
with open(submission, 'r') as file:
for line in file:
tweets.append(json.loads(line))
dic[count]= pd.json_normalize(tweets,'data')

print(dic)


paths = []
for i in range(len(dic)):
i += 1
for sub in np.unique(dic[i]['subject_id']):
print(sub)
paths.append((out+"/{0}_{1}_{2}"+".csv").format(sub,dic[i]['task'][0],dic[i]['task_vers'][0]))

for path in paths:
dic[i].to_csv(path, index=False)
print(f"saved {path}")

def main():
args = parse_args()
submissive = rename_files(args.submission, args.sub, args.task, args.taskvers)
convert_beh(submissive, args.out)

if __name__ == "__main__":
main()


Loading

0 comments on commit 9f5cdb9

Please sign in to comment.