added yml and fixed studyIDs in jatosAPI.py (compensate for horrible API

HBClab · Sep 11, 2024 · e27b49e · e27b49e
1 parent 82c60c6
commit e27b49e
Show file tree

Hide file tree

Showing 3 changed files with 399 additions and 0 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,103 @@
+name: Task1 QC
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  process_raw:
+    runs-on: self-hosted
+    outputs:
+        sub: ${{ steps.set_vars.outputs.sub }}
+        task: ${{ steps.set_vars.outputs.task }}
+        version: ${{ steps.set_vars.outputs.version }}
+
+    steps:
+      - name: checkout code and return recently uploaded file in /data
+        uses: actions/checkout@v3
+      - name: Get changed files
+        run: |
+            #!/bin/bash
+
+            # Get the list of CSV files changed in the last 24 hours
+            data=$(git log --since="24 hours ago" --name-only --pretty=format: -- '*.csv' | sort | uniq)
+
+            # Export the data variable to the environment
+            echo "data=$data" >> $GITHUB_ENV
+
+            # Print the changed CSV files
+            echo "Changed CSV files in the last 24 hours: $data"
+
+      - name: set up python
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: parse raw
+        id: set_vars
+        run: |
+            # Loop through each CSV file in $data
+            for file in $data; do
+                filename=$(basename "$file")
+                IFS='_' read -r sub task version <<< "$filename"
+                version="${version%.csv}"  # Remove the .csv extension from version
+                echo "::set-output name=sub::$sub"
+                echo "::set-output name=task::$task"
+                echo "::set-output name=version::$version"
+                echo "Subject: $sub"
+                echo "Task: $task"
+                echo "Version: $version"
+            done
+
+  run_qc:
+    runs-on: self-hosted
+    needs: process_raw
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Debug env vars
+        run: |
+            echo "sub=${{ needs.process_raw.outputs.sub }}"
+            echo "task=${{ needs.process_raw.outputs.task }}"
+            echo "version=${{ needs.process_raw.outputs.version }}"
+
+      - name: run quality control
+        run: |
+            sub=${{ needs.process_raw.outputs.sub }}
+            task=${{ needs.process_raw.outputs.task }}
+            vers=${{ needs.process_raw.outputs.version }}
+            for sub in ${sub}; do
+            echo "Processing subject: $sub"
+            for task in ${task}; do
+                echo "Processing task: $task"
+                for vers in ${vers}; do
+                echo "Processing version: $vers"
+                csv_file="./data/${sub}/processed/${sub}_${task}_${vers}.csv"
+                log_file="./data/${sub}/qc_${task}_${vers}.log"
+                echo "CSV file: $csv_file"
+                echo "Log file: $log_file"
+                if [ -f "$csv_file" ]; then
+                    python ./code/NFqC.py -s "$csv_file" -o "./data/${sub}/" -sub "$sub" | tee "$log_file"
+                    echo "QC for ${sub}_${task}_${vers} running"
+                else
+                    echo "CSV file $csv_file does not exist"
+                fi
+                done
+            done
+            done
+
+  push:
+    runs-on: self-hosted
+    needs: run_qc
+    steps:
+      - name: Commit and Push Changes
+        run: |
+          git config --global user.name "miloswrath"
+          git config --global user.email "[email protected]"
+          git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
+          git add .
+          git commit -m "Automated commit by GitHub Actions"
+          git push
+        env:
+            GITHUB_TOKEN: ${{ secrets.GIT_PAT }}
diff --git a/code/ConvertBeh.py b/code/ConvertBeh.py
@@ -0,0 +1,71 @@
+# %%
+import pandas as pd
+import numpy as np
+import os
+import json
+
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description='Convert Behavior Data')
+    parser.add_argument('-submission', type=str, help='Path to the submission file')
+    parser.add_argument('-out', type=str, help='Output directory')
+    parser.add_argument('-sub', type=str, help='Subject ID')
+    parser.add_argument('-task', type=str, help='Task name')
+    parser.add_argument('-taskvers', type=str, help='Task version')
+    return parser.parse_args()
+
+
+def rename_files(submission, subject, task, taskvers):
+    #rename raw text files to be subject_task_taskvers.txt
+    for root, dirs, files in os.walk(submission):
+        for file in files:
+            if file.endswith(".txt"):
+                os.rename(os.path.join(root, file), os.path.join(root, f"{subject}_{task}_{taskvers}.txt"))
+                submission = os.path.join(root, 'raw', f"{subject}_{task}_{taskvers}.txt")
+                print(submission)
+    return submission
+
+
+
+
+def convert_beh(submission, out):
+
+    if not os.path.isfile(submission):
+        print(f"file does not exist: {submission}")
+
+    # Use list_txt to store one file since I don't want to screw with Marco's code
+
+
+    count = 0
+    dic = {}
+
+    count += 1
+    tweets = []
+    with open(submission, 'r') as file:
+        for line in file:
+            tweets.append(json.loads(line))
+    dic[count]= pd.json_normalize(tweets,'data')
+
+    print(dic)
+
+
+    paths = []
+    for i in range(len(dic)):
+        i += 1
+        for sub in np.unique(dic[i]['subject_id']):
+            print(sub)
+            paths.append((out+"/{0}_{1}_{2}"+".csv").format(sub,dic[i]['task'][0],dic[i]['task_vers'][0]))
+
+        for path in paths:
+            dic[i].to_csv(path, index=False)
+            print(f"saved {path}")
+
+def main():
+    args = parse_args()
+    submissive = rename_files(args.submission, args.sub, args.task, args.taskvers)
+    convert_beh(submissive, args.out)
+
+if __name__ == "__main__":
+    main()
+
+