Skip to content

Commit

Permalink
use db for ingestion, and remove extra imports
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkent committed Jan 13, 2025
1 parent f14f343 commit 3c51281
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 16 deletions.
23 changes: 8 additions & 15 deletions store/neurostore/ingest/extracted_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,9 @@

import json
import os.path as op
import re
from pathlib import Path
import hashlib

import numpy as np
import pandas as pd
import requests
from scipy import sparse
from dateutil.parser import parse as parse_date
from sqlalchemy import or_

from neurostore.database import db
from neurostore.models import (
Expand All @@ -22,15 +15,15 @@
)


def ingest_feature(feature_directory, session):
def ingest_feature(feature_directory):
"""Ingest demographics data into the database."""
# read pipeline_info.json from the base feature directory
with open(op.join(feature_directory, "pipeline_info.json")) as f:
pipeline_info = json.load(f)

# search if there is an existing pipeline with the same name and version
pipeline = (
session.query(Pipeline)
db.session.query(Pipeline)
.filter(
Pipeline.name == pipeline_info["name"],
Pipeline.version == pipeline_info["version"],
Expand All @@ -48,7 +41,7 @@ def ingest_feature(feature_directory, session):
pubget_compatible=pipeline_info.get("pubget_compatible", False),
derived_from=pipeline_info.get("derived_from", None),
)
session.add(pipeline)
db.session.add(pipeline)

# search within the pipeline and see if there are any existing pipeline configs
# that match the "arguements" field in the pipeline_info.json
Expand All @@ -57,7 +50,7 @@ def ingest_feature(feature_directory, session):
json.dumps(pipeline_info["arguments"]).encode()
).hexdigest()
pipeline_config = (
session.query(PipelineConfig)
db.session.query(PipelineConfig)
.filter(
PipelineConfig.pipeline_id == pipeline.id,
PipelineConfig.config_hash == config_hash,
Expand All @@ -71,7 +64,7 @@ def ingest_feature(feature_directory, session):
config=pipeline_info["arguments"],
config_hash=config_hash,
)
session.add(pipeline_config)
db.session.add(pipeline_config)

# create a new pipeline run
pipeline_run = PipelineRun(
Expand Down Expand Up @@ -104,7 +97,7 @@ def ingest_feature(feature_directory, session):
)
)

session.add(pipeline_run)
session.add_all(pipeline_run_results)
db.session.add(pipeline_run)
db.session.add_all(pipeline_run_results)

session.commit()
db.session.commit()
2 changes: 1 addition & 1 deletion store/neurostore/tests/test_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ def test_ingest_neuroquery(ingest_neuroquery, session):


def test_ingest_features(create_demographic_features, session):
ingest_feature(create_demographic_features, session)
ingest_feature(create_demographic_features)

0 comments on commit 3c51281

Please sign in to comment.