diff --git a/database.db b/database.db new file mode 100644 index 0000000..e064415 Binary files /dev/null and b/database.db differ diff --git a/genotype_api/api/app.py b/genotype_api/api/app.py index e547a7f..e83b4f8 100644 --- a/genotype_api/api/app.py +++ b/genotype_api/api/app.py @@ -8,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware from genotype_api.config import security_settings -from genotype_api.database import create_db_and_tables +from genotype_api.database.session_handler import create_db_and_tables from genotype_api.api.endpoints import samples, snps, users, plates, analyses from sqlalchemy.exc import NoResultFound diff --git a/genotype_api/api/endpoints/analyses.py b/genotype_api/api/endpoints/analyses.py index 93193b5..c5a754f 100644 --- a/genotype_api/api/endpoints/analyses.py +++ b/genotype_api/api/endpoints/analyses.py @@ -6,18 +6,17 @@ from fastapi import APIRouter, Depends, status, Query, UploadFile, File from fastapi.responses import JSONResponse -from genotype_api.crud.analyses import ( - get_analysis, - check_analyses_objects, - create_analysis, -) -from genotype_api.crud.samples import ( - create_analyses_sample_objects, - refresh_sample_status, -) -from genotype_api.database import get_session +from genotype_api.database.crud.read import get_analysis, check_analyses_objects +from genotype_api.database.crud.create import create_analysis, create_analyses_sample_objects +from genotype_api.database.crud.update import refresh_sample_status +from genotype_api.database.session_handler import get_session from genotype_api.file_parsing.files import check_file -from genotype_api.models import Analysis, AnalysisRead, AnalysisReadWithGenotype, User +from genotype_api.database.models import ( + Analysis, + AnalysisRead, + User, + AnalysisReadWithGenotype, +) from sqlmodel import Session, select from genotype_api.security import get_active_user diff --git a/genotype_api/api/endpoints/plates.py b/genotype_api/api/endpoints/plates.py index 4cd695a..016c63c 100644 --- a/genotype_api/api/endpoints/plates.py +++ b/genotype_api/api/endpoints/plates.py @@ -9,26 +9,23 @@ from sqlalchemy import desc, asc from sqlmodel import Session, select -from genotype_api.crud.analyses import ( +from genotype_api.database.crud.read import ( get_analyses_from_plate, + get_plate, + get_user_by_email, check_analyses_objects, ) -from genotype_api.crud.samples import ( - create_analyses_sample_objects, - refresh_sample_status, -) -from genotype_api.crud.plates import create_plate, get_plate -from genotype_api.crud.users import get_user_by_email -from genotype_api.database import get_session +from genotype_api.database.crud.update import refresh_sample_status +from genotype_api.database.crud.create import create_plate, create_analyses_sample_objects +from genotype_api.database.session_handler import get_session from genotype_api.file_parsing.excel import GenotypeAnalysis from genotype_api.file_parsing.files import check_file -from genotype_api.models import ( - Plate, - PlateReadWithAnalyses, +from genotype_api.database.models import ( Analysis, - PlateCreate, User, - PlateRead, + Plate, + PlateCreate, + PlateReadWithAnalyses, PlateReadWithAnalysisDetail, PlateReadWithAnalysisDetailSingle, ) diff --git a/genotype_api/api/endpoints/samples.py b/genotype_api/api/endpoints/samples.py index 7c187c0..89c71e7 100644 --- a/genotype_api/api/endpoints/samples.py +++ b/genotype_api/api/endpoints/samples.py @@ -1,33 +1,34 @@ from typing import List, Optional, Literal from fastapi import APIRouter, Depends, Query from fastapi.responses import JSONResponse -from datetime import datetime, timedelta, date +from datetime import timedelta, date from starlette import status +import genotype_api.database.crud.create from genotype_api.constants import SEXES -from genotype_api.database import get_session +from genotype_api.database.session_handler import get_session from genotype_api.match import check_sample from genotype_api.models import ( - Sample, - SampleReadWithAnalysis, - SampleRead, - User, SampleDetail, - Analysis, MatchResult, MatchCounts, +) +from genotype_api.database.models import ( + Analysis, + Sample, + SampleRead, + User, SampleReadWithAnalysisDeep, compare_genotypes, ) from collections import Counter -from genotype_api import crud -from genotype_api.crud.samples import ( +from genotype_api.database.crud.update import refresh_sample_status +from genotype_api.database.crud.read import ( get_incomplete_samples, get_plate_samples, get_commented_samples, - get_sample, get_status_missing_samples, - refresh_sample_status, + get_sample, get_samples, ) from sqlmodel import Session, select @@ -118,7 +119,7 @@ def create_sample( session: Session = Depends(get_session), current_user: User = Depends(get_active_user), ): - return crud.samples.create_sample(session=session, sample=sample) + return genotype_api.database.database.crud.create.create_sample(session=session, sample=sample) @router.put("/{sample_id}/sex", response_model=SampleRead) diff --git a/genotype_api/api/endpoints/snps.py b/genotype_api/api/endpoints/snps.py index ffb654a..157399d 100644 --- a/genotype_api/api/endpoints/snps.py +++ b/genotype_api/api/endpoints/snps.py @@ -1,10 +1,10 @@ """Routes for the snps""" -from genotype_api.models import SNP, User +from genotype_api.database.models import SNP, User from typing import List -from fastapi import APIRouter, Depends, HTTPException, Query, File, UploadFile -from genotype_api.database import get_session +from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile +from genotype_api.database.session_handler import get_session from sqlmodel import Session, delete, select from genotype_api.security import get_active_user diff --git a/genotype_api/api/endpoints/users.py b/genotype_api/api/endpoints/users.py index cb351bd..75180e6 100644 --- a/genotype_api/api/endpoints/users.py +++ b/genotype_api/api/endpoints/users.py @@ -1,15 +1,15 @@ """Routes for users""" -from typing import List, Optional +from typing import List from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import EmailStr from starlette import status from starlette.responses import JSONResponse -from genotype_api.crud.users import get_user -from genotype_api.database import get_session -from genotype_api.models import User, UserRead, UserCreate, UserReadWithPlates +from genotype_api.database.crud.read import get_user +from genotype_api.database.session_handler import get_session +from genotype_api.database.models import User, UserRead, UserCreate, UserReadWithPlates from sqlmodel import Session, select from genotype_api.security import get_active_user diff --git a/genotype_api/crud/analyses.py b/genotype_api/crud/analyses.py deleted file mode 100644 index 7af80a6..0000000 --- a/genotype_api/crud/analyses.py +++ /dev/null @@ -1,60 +0,0 @@ -from typing import List, Optional - -from fastapi import HTTPException, status - -from genotype_api.constants import TYPES -from genotype_api.models import Analysis -from sqlmodel import Session, select -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - - -def get_analyses_from_plate(plate_id: int, session: Session) -> List[Analysis]: - statement = select(Analysis).where(Analysis.plate_id == plate_id) - return session.exec(statement).all() - - -def get_analysis_type_sample( - sample_id: str, analysis_type: str, session: Session -) -> Optional[Analysis]: - statement = select(Analysis).where( - Analysis.sample_id == sample_id, Analysis.type == analysis_type - ) - return session.exec(statement).first() - - -def get_analysis(session: Session, analysis_id: int) -> Analysis: - """Get analysis""" - - statement = select(Analysis).where(Analysis.id == analysis_id) - return session.exec(statement).one() - - -def delete_analysis(session: Session, analysis_id: int) -> Analysis: - db_analysis = session.get(Analysis, analysis_id) - session.delete(db_analysis) - session.commit() - return db_analysis - - -def create_analysis(session: Session, analysis: Analysis) -> Analysis: - session.add(analysis) - session.commit() - session.refresh(analysis) - return analysis - - -def check_analyses_objects( - session: Session, analyses: List[Analysis], analysis_type: TYPES -) -> None: - """Raising 400 if any analysis in the list already exist in the database""" - for analysis_obj in analyses: - db_analysis: Analysis = get_analysis_type_sample( - session=session, - sample_id=analysis_obj.sample_id, - analysis_type=analysis_type, - ) - if db_analysis: - session.delete(db_analysis) diff --git a/genotype_api/crud/plates.py b/genotype_api/crud/plates.py deleted file mode 100644 index a0ee42d..0000000 --- a/genotype_api/crud/plates.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -from typing import Optional -from genotype_api.models import Plate, PlateCreate -from sqlmodel import Session, select -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - -LOG = logging.getLogger(__name__) - - -def get_plate(session: Session, plate_id: int) -> Plate: - """Get plate""" - - statement = select(Plate).where(Plate.id == plate_id) - return session.exec(statement).one() - - -def create_plate(session: Session, plate: PlateCreate) -> Plate: - db_plate = Plate.from_orm(plate) - db_plate.analyses = plate.analyses # not sure why from_orm wont pick up the analyses - session.add(db_plate) - session.commit() - session.refresh(db_plate) - LOG.info("Creating plate with id %s", db_plate.plate_id) - return db_plate - - -def delete_plate(session: Session, plate_id: int) -> Optional[Plate]: - db_plate: Plate = session.get(Plate, plate_id) - if not db_plate: - LOG.info("Could not find plate %s", plate_id) - return None - session.delete(db_plate) - session.commit() - LOG.info("Plate deleted") - return db_plate diff --git a/genotype_api/crud/samples.py b/genotype_api/crud/samples.py deleted file mode 100644 index e65956e..0000000 --- a/genotype_api/crud/samples.py +++ /dev/null @@ -1,83 +0,0 @@ -from typing import List - -from genotype_api.match import check_sample -from genotype_api.models import Sample, Analysis -from sqlmodel import Session, func, select -from fastapi import HTTPException -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - - -def get_sample(session: Session, sample_id: str) -> Sample: - """Get sample or raise 404.""" - - statement = select(Sample).where(Sample.id == sample_id) - return session.exec(statement).one() - - -def get_samples(statement: SelectOfScalar, sample_id: str) -> SelectOfScalar: - """Returns a query for samples containing the given sample_id.""" - return statement.where(Sample.id.contains(sample_id)) - - -def create_sample(session: Session, sample: Sample) -> Sample: - """Creates a sample in the database.""" - - sample_in_db = session.get(Sample, sample.id) - if sample_in_db: - raise HTTPException(status_code=409, detail="Sample already registered") - session.add(sample) - session.commit() - session.refresh(sample) - return sample - - -def get_incomplete_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with less than two analyses.""" - - return ( - statement.group_by(Analysis.sample_id) - .order_by(Analysis.created_at) - .having(func.count(Analysis.sample_id) < 2) - ) - - -def get_plate_samples(statement: SelectOfScalar, plate_id: str) -> SelectOfScalar: - """Returning sample query statement for samples analysed on a specific plate.""" - return statement.where(Analysis.plate_id == plate_id) - - -def get_commented_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with no comment.""" - - return statement.where(Sample.comment != None) - - -def get_status_missing_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with no comment.""" - - return statement.where(Sample.status == None) - - -def create_analyses_sample_objects(session: Session, analyses: List[Analysis]) -> List[Sample]: - """creating samples in an analysis if not already in db.""" - return [ - create_sample(session=session, sample=Sample(id=analysis_obj.sample_id)) - for analysis_obj in analyses - if not session.get(Sample, analysis_obj.sample_id) - ] - - -def refresh_sample_status(sample: Sample, session: Session) -> Sample: - if len(sample.analyses) != 2: - sample.status = None - else: - results = check_sample(sample=sample) - sample.status = "fail" if "fail" in results.dict().values() else "pass" - - session.add(sample) - session.commit() - session.refresh(sample) - return sample diff --git a/genotype_api/crud/users.py b/genotype_api/crud/users.py deleted file mode 100644 index a7256b1..0000000 --- a/genotype_api/crud/users.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import List, Optional - -from genotype_api.models import User, UserCreate -from sqlmodel import select, Session -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - - -def get_user(session: Session, user_id: int): - statement = select(User).where(User.id == user_id) - return session.exec(statement).one() - - -def get_user_by_email(session: Session, email: str) -> Optional[User]: - statement = select(User).where(User.email == email) - return session.exec(statement).first() - - -def get_users(session: Session, skip: int = 0, limit: int = 100) -> List[User]: - statement = select(User).offset(skip).limit(limit) - return session.exec(statement).all() - - -def create_user(db: Session, user: UserCreate): - fake_hashed_password = user.password + "notreallyhashed" - db_user = User(email=user.email, hashed_password=fake_hashed_password) - db.add(db_user) - db.commit() - db.refresh(db_user) - return db_user diff --git a/genotype_api/crud/__init__.py b/genotype_api/database/__init__.py similarity index 100% rename from genotype_api/crud/__init__.py rename to genotype_api/database/__init__.py diff --git a/genotype_api/database/crud/__init__.py b/genotype_api/database/crud/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/genotype_api/database/crud/create.py b/genotype_api/database/crud/create.py new file mode 100644 index 0000000..e66527d --- /dev/null +++ b/genotype_api/database/crud/create.py @@ -0,0 +1,67 @@ +import logging +from typing import List + +from fastapi import HTTPException +from sqlmodel import Session + +from genotype_api.database.models import ( + Analysis, + Sample, + User, + UserCreate, + Plate, + PlateCreate, +) +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def create_analysis(session: Session, analysis: Analysis) -> Analysis: + session.add(analysis) + session.commit() + session.refresh(analysis) + return analysis + + +def create_plate(session: Session, plate: PlateCreate) -> Plate: + db_plate = Plate.from_orm(plate) + db_plate.analyses = plate.analyses # not sure why from_orm wont pick up the analyses + session.add(db_plate) + session.commit() + session.refresh(db_plate) + LOG.info(f"Creating plate with id {db_plate.plate_id}.") + return db_plate + + +def create_sample(session: Session, sample: Sample) -> Sample: + """Creates a sample in the database.""" + + sample_in_db = session.get(Sample, sample.id) + if sample_in_db: + raise HTTPException(status_code=409, detail="Sample already registered") + session.add(sample) + session.commit() + session.refresh(sample) + return sample + + +def create_analyses_sample_objects(session: Session, analyses: List[Analysis]) -> List[Sample]: + """creating samples in an analysis if not already in db.""" + return [ + create_sample(session=session, sample=Sample(id=analysis_obj.sample_id)) + for analysis_obj in analyses + if not session.get(Sample, analysis_obj.sample_id) + ] + + +def create_user(db: Session, user: UserCreate): + fake_hashed_password = user.password + "notreallyhashed" + db_user = User(email=user.email, hashed_password=fake_hashed_password) + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user diff --git a/genotype_api/database/crud/delete.py b/genotype_api/database/crud/delete.py new file mode 100644 index 0000000..924722d --- /dev/null +++ b/genotype_api/database/crud/delete.py @@ -0,0 +1,30 @@ +import logging +from typing import Optional + +from sqlmodel import Session + +from genotype_api.database.models import Analysis, Plate +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def delete_analysis(session: Session, analysis_id: int) -> Analysis: + db_analysis = session.get(Analysis, analysis_id) + session.delete(db_analysis) + session.commit() + return db_analysis + + +def delete_plate(session: Session, plate_id: int) -> Optional[Plate]: + db_plate: Plate = session.get(Plate, plate_id) + if not db_plate: + LOG.info(f"Could not find plate {plate_id}") + return None + session.delete(db_plate) + session.commit() + LOG.info("Plate deleted") + return db_plate diff --git a/genotype_api/database/crud/read.py b/genotype_api/database/crud/read.py new file mode 100644 index 0000000..5615957 --- /dev/null +++ b/genotype_api/database/crud/read.py @@ -0,0 +1,110 @@ +import logging +from typing import List, Optional + +from sqlalchemy import func +from sqlmodel import Session, select + +from genotype_api.constants import TYPES +from genotype_api.database.models import Analysis, Sample, User, Plate +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def get_analyses_from_plate(plate_id: int, session: Session) -> List[Analysis]: + statement = select(Analysis).where(Analysis.plate_id == plate_id) + return session.exec(statement).all() + + +def get_analysis_type_sample( + sample_id: str, analysis_type: str, session: Session +) -> Optional[Analysis]: + statement = select(Analysis).where( + Analysis.sample_id == sample_id, Analysis.type == analysis_type + ) + return session.exec(statement).first() + + +def get_analysis(session: Session, analysis_id: int) -> Analysis: + """Get analysis""" + + statement = select(Analysis).where(Analysis.id == analysis_id) + return session.exec(statement).one() + + +def get_plate(session: Session, plate_id: int) -> Plate: + """Get plate""" + + statement = select(Plate).where(Plate.id == plate_id) + return session.exec(statement).one() + + +def get_incomplete_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with less than two analyses.""" + + return ( + statement.group_by(Analysis.sample_id) + .order_by(Analysis.created_at) + .having(func.count(Analysis.sample_id) < 2) + ) + + +def get_plate_samples(statement: SelectOfScalar, plate_id: str) -> SelectOfScalar: + """Returning sample query statement for samples analysed on a specific plate.""" + return statement.where(Analysis.plate_id == plate_id) + + +def get_commented_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with no comment.""" + + return statement.where(Sample.comment != None) + + +def get_status_missing_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with no comment.""" + + return statement.where(Sample.status == None) + + +def get_sample(session: Session, sample_id: str) -> Sample: + """Get sample or raise 404.""" + + statement = select(Sample).where(Sample.id == sample_id) + return session.exec(statement).one() + + +def get_samples(statement: SelectOfScalar, sample_id: str) -> SelectOfScalar: + """Returns a query for samples containing the given sample_id.""" + return statement.where(Sample.id.contains(sample_id)) + + +def get_user(session: Session, user_id: int): + statement = select(User).where(User.id == user_id) + return session.exec(statement).one() + + +def get_user_by_email(session: Session, email: str) -> Optional[User]: + statement = select(User).where(User.email == email) + return session.exec(statement).first() + + +def get_users(session: Session, skip: int = 0, limit: int = 100) -> List[User]: + statement = select(User).offset(skip).limit(limit) + return session.exec(statement).all() + + +def check_analyses_objects( + session: Session, analyses: List[Analysis], analysis_type: TYPES +) -> None: + """Raising 400 if any analysis in the list already exist in the database""" + for analysis_obj in analyses: + db_analysis: Analysis = get_analysis_type_sample( + session=session, + sample_id=analysis_obj.sample_id, + analysis_type=analysis_type, + ) + if db_analysis: + session.delete(db_analysis) diff --git a/genotype_api/database/crud/update.py b/genotype_api/database/crud/update.py new file mode 100644 index 0000000..845ca7c --- /dev/null +++ b/genotype_api/database/crud/update.py @@ -0,0 +1,21 @@ +from sqlmodel import Session + +from genotype_api.match import check_sample +from genotype_api.database.models import Sample +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + + +def refresh_sample_status(sample: Sample, session: Session) -> Sample: + if len(sample.analyses) != 2: + sample.status = None + else: + results = check_sample(sample=sample) + sample.status = "fail" if "fail" in results.dict().values() else "pass" + + session.add(sample) + session.commit() + session.refresh(sample) + return sample diff --git a/genotype_api/database/models.py b/genotype_api/database/models.py new file mode 100644 index 0000000..9edf2a1 --- /dev/null +++ b/genotype_api/database/models.py @@ -0,0 +1,312 @@ +from collections import Counter +from datetime import datetime +from typing import Optional, List, Dict, Tuple + +from pydantic import constr, EmailStr, validator +from sqlalchemy import Index +from sqlmodel import SQLModel, Field, Relationship + +from genotype_api.constants import TYPES, SEXES, STATUS, CUTOFS +from genotype_api.models import SampleDetail, PlateStatusCounts + + +class GenotypeBase(SQLModel): + rsnumber: Optional[constr(max_length=10)] + analysis_id: Optional[int] = Field(default=None, foreign_key="analysis.id") + allele_1: Optional[constr(max_length=1)] + allele_2: Optional[constr(max_length=1)] + + +class Genotype(GenotypeBase, table=True): + __tablename__ = "genotype" + __table_args__ = (Index("_analysis_rsnumber", "analysis_id", "rsnumber", unique=True),) + id: Optional[int] = Field(default=None, primary_key=True) + + analysis: Optional["Analysis"] = Relationship(back_populates="genotypes") + + @property + def alleles(self) -> List[str]: + """Return sorted because we are not dealing with phased data.""" + + return sorted([self.allele_1, self.allele_2]) + + @property + def is_ok(self) -> bool: + """Check that the allele determination is ok.""" + return "0" not in self.alleles + + +class GenotypeRead(GenotypeBase): + id: int + + +class GenotypeCreate(GenotypeBase): + pass + + +class AnalysisBase(SQLModel): + type: TYPES + source: Optional[str] + sex: Optional[SEXES] + created_at: Optional[datetime] = datetime.now() + sample_id: Optional[constr(max_length=32)] = Field(default=None, foreign_key="sample.id") + plate_id: Optional[str] = Field(default=None, foreign_key="plate.id") + + +class Analysis(AnalysisBase, table=True): + __tablename__ = "analysis" + __table_args__ = (Index("_sample_type", "sample_id", "type", unique=True),) + id: Optional[int] = Field(default=None, primary_key=True) + + sample: Optional["Sample"] = Relationship(back_populates="analyses") + plate: Optional[List["Plate"]] = Relationship(back_populates="analyses") + genotypes: Optional[List["Genotype"]] = Relationship(back_populates="analysis") + + def check_no_calls(self) -> Dict[str, int]: + """Check that genotypes look ok.""" + calls = ["known" if genotype.is_ok else "unknown" for genotype in self.genotypes] + return Counter(calls) + + +class AnalysisRead(AnalysisBase): + id: int + + +class AnalysisCreate(AnalysisBase): + pass + + +class SampleSlim(SQLModel): + status: Optional[STATUS] + comment: Optional[str] + + +class SampleBase(SampleSlim): + sex: Optional[SEXES] + created_at: Optional[datetime] = datetime.now() + + +class Sample(SampleBase, table=True): + __tablename__ = "sample" + id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) + + analyses: Optional[List["Analysis"]] = Relationship(back_populates="sample") + + @property + def genotype_analysis(self) -> Optional[Analysis]: + """Return genotype analysis.""" + + for analysis in self.analyses: + if analysis.type == "genotype": + return analysis + + return None + + @property + def sequence_analysis(self) -> Optional[Analysis]: + """Return sequence analysis.""" + + for analysis in self.analyses: + if analysis.type == "sequence": + return analysis + + return None + + +class SampleRead(SampleBase): + id: constr(max_length=32) + + +class SampleCreate(SampleBase): + pass + + +class SNPBase(SQLModel): + ref: Optional[constr(max_length=1)] + chrom: Optional[constr(max_length=5)] + pos: Optional[int] + + +class SNP(SNPBase, table=True): + __tablename__ = "snp" + """Represent a SNP position under investigation.""" + + id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) + + +class SNPRead(SNPBase): + id: constr(max_length=32) + + +class UserBase(SQLModel): + email: EmailStr = Field(index=True, unique=True) + name: Optional[str] = "" + + +class User(UserBase, table=True): + __tablename__ = "user" + id: Optional[int] = Field(default=None, primary_key=True) + plates: Optional[List["Plate"]] = Relationship(back_populates="user") + + +class UserRead(UserBase): + id: int + + +class UserCreate(UserBase): + pass + + +class PlateBase(SQLModel): + created_at: Optional[datetime] = datetime.now() + plate_id: constr(max_length=16) = Field(index=True, unique=True) + signed_by: Optional[int] = Field(default=None, foreign_key="user.id") + signed_at: Optional[datetime] + method_document: Optional[str] + method_version: Optional[str] + + +class Plate(PlateBase, table=True): + __tablename__ = "plate" + id: Optional[int] = Field(default=None, primary_key=True) + user: Optional["User"] = Relationship(back_populates="plates") + analyses: Optional[List["Analysis"]] = Relationship(back_populates="plate") + + +class PlateRead(PlateBase): + id: str + user: Optional[UserRead] + + +class PlateCreate(PlateBase): + analyses: Optional[List[Analysis]] = [] + + +class UserReadWithPlates(UserRead): + plates: Optional[List[Plate]] = [] + + +class SampleReadWithAnalysis(SampleRead): + analyses: Optional[List[AnalysisRead]] = [] + + +class AnalysisReadWithGenotype(AnalysisRead): + genotypes: Optional[List[Genotype]] = [] + + +class SampleReadWithAnalysisDeep(SampleRead): + analyses: Optional[List[AnalysisReadWithGenotype]] = [] + detail: Optional[SampleDetail] + + @validator("detail") + def get_detail(cls, value, values) -> SampleDetail: + analyses = values.get("analyses") + if len(analyses) != 2: + return SampleDetail() + genotype_analysis = [analysis for analysis in analyses if analysis.type == "genotype"][0] + sequence_analysis = [analysis for analysis in analyses if analysis.type == "sequence"][0] + status = check_snps( + genotype_analysis=genotype_analysis, sequence_analysis=sequence_analysis + ) + sex = check_sex( + sample_sex=values.get("sex"), + genotype_analysis=genotype_analysis, + sequence_analysis=sequence_analysis, + ) + + return SampleDetail(**status, sex=sex) + + class Config: + validate_all = True + + +class AnalysisReadWithSample(AnalysisRead): + sample: Optional[SampleSlim] + + +def compare_genotypes(genotype_1: Genotype, genotype_2: Genotype) -> Tuple[str, str]: + """Compare two genotypes if they have the same alleles.""" + + if "0" in genotype_1.alleles or "0" in genotype_2.alleles: + return genotype_1.rsnumber, "unknown" + elif genotype_1.alleles == genotype_2.alleles: + return genotype_1.rsnumber, "match" + else: + return genotype_1.rsnumber, "mismatch" + + +class AnalysisReadWithSampleDeep(AnalysisRead): + sample: Optional[SampleReadWithAnalysisDeep] + + +class PlateReadWithAnalyses(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + + +class PlateReadWithAnalysisDetail(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + detail: Optional[PlateStatusCounts] + + @validator("detail") + def check_detail(cls, value, values): + analyses = values.get("analyses") + statuses = [str(analysis.sample.status) for analysis in analyses] + commented = sum(1 for analysis in analyses if analysis.sample.comment) + status_counts = Counter(statuses) + return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) + + class Config: + validate_all = True + + +class PlateReadWithAnalysisDetailSingle(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + detail: Optional[PlateStatusCounts] + + @validator("detail") + def check_detail(cls, value, values): + analyses = values.get("analyses") + statuses = [str(analysis.sample.status) for analysis in analyses] + commented = sum(1 for analysis in analyses if analysis.sample.comment) + status_counts = Counter(statuses) + return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) + + class Config: + validate_all = True + + +def check_snps(genotype_analysis, sequence_analysis): + genotype_pairs = zip(genotype_analysis.genotypes, sequence_analysis.genotypes) + results = dict( + compare_genotypes(genotype_1, genotype_2) for genotype_1, genotype_2 in genotype_pairs + ) + count = Counter([val for key, val in results.items()]) + unknown = count.get("unknown", 0) + matches = count.get("match", 0) + mismatches = count.get("mismatch", 0) + snps = ( + "pass" + if all([matches >= CUTOFS.get("min_matches") and mismatches <= CUTOFS.get("max_mismatch")]) + else "fail" + ) + nocalls = "pass" if unknown <= CUTOFS.get("max_nocalls") else "fail" + failed_snps = [key for key, val in results.items() if val == "mismatch"] + + return { + "unknown": unknown, + "matches": matches, + "mismatches": mismatches, + "snps": snps, + "nocalls": nocalls, + "failed_snps": failed_snps, + } + + +def check_sex(sample_sex, genotype_analysis, sequence_analysis): + """Check if any source disagrees on the sex""" + if not sample_sex or genotype_analysis.sex == SEXES.UNKNOWN: + return "fail" + sexes = {genotype_analysis.sex, sequence_analysis.sex, sample_sex} + if {SEXES.MALE, SEXES.FEMALE}.issubset(sexes): + return "fail" + return "pass" diff --git a/genotype_api/database.py b/genotype_api/database/session_handler.py similarity index 100% rename from genotype_api/database.py rename to genotype_api/database/session_handler.py diff --git a/genotype_api/file_parsing/excel.py b/genotype_api/file_parsing/excel.py index ff47493..a88b682 100644 --- a/genotype_api/file_parsing/excel.py +++ b/genotype_api/file_parsing/excel.py @@ -6,7 +6,7 @@ import openpyxl from genotype_api.exceptions import SexConflictError -from genotype_api.models import Analysis, Genotype +from genotype_api.database.models import Genotype, Analysis from openpyxl.workbook import Workbook from openpyxl.worksheet.worksheet import Worksheet diff --git a/genotype_api/file_parsing/vcf.py b/genotype_api/file_parsing/vcf.py index e265d88..9d08d90 100644 --- a/genotype_api/file_parsing/vcf.py +++ b/genotype_api/file_parsing/vcf.py @@ -2,7 +2,7 @@ from typing import Dict, Iterable, List, TextIO -from genotype_api.models import Analysis, Genotype as DBGenotype +from genotype_api.database.models import Genotype as DBGenotype, Analysis from pydantic import BaseModel diff --git a/genotype_api/match.py b/genotype_api/match.py index 897d664..203e94d 100644 --- a/genotype_api/match.py +++ b/genotype_api/match.py @@ -2,11 +2,9 @@ import logging from genotype_api.models import ( - Sample, SampleDetail, - check_sex, - check_snps, ) +from genotype_api.database.models import Sample, check_snps, check_sex log = logging.getLogger(__name__) diff --git a/genotype_api/models.py b/genotype_api/models.py index 1562898..44bbc10 100644 --- a/genotype_api/models.py +++ b/genotype_api/models.py @@ -1,12 +1,7 @@ -from collections import Counter -from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import List, Optional -from pydantic import BaseModel, EmailStr, constr, validator -from sqlalchemy import Index -from sqlmodel import Field, Relationship, SQLModel - -from genotype_api.constants import CUTOFS, SEXES, STATUS, TYPES +from pydantic import BaseModel, validator +from sqlmodel import Field class PlateStatusCounts(BaseModel): @@ -63,308 +58,6 @@ class Config: validate_all = True -class GenotypeBase(SQLModel): - rsnumber: Optional[constr(max_length=10)] - analysis_id: Optional[int] = Field(default=None, foreign_key="analysis.id") - allele_1: Optional[constr(max_length=1)] - allele_2: Optional[constr(max_length=1)] - - -class Genotype(GenotypeBase, table=True): - __tablename__ = "genotype" - __table_args__ = (Index("_analysis_rsnumber", "analysis_id", "rsnumber", unique=True),) - id: Optional[int] = Field(default=None, primary_key=True) - - analysis: Optional["Analysis"] = Relationship(back_populates="genotypes") - - @property - def alleles(self) -> List[str]: - """Return sorted because we are not dealing with phased data.""" - - return sorted([self.allele_1, self.allele_2]) - - @property - def is_ok(self) -> bool: - """Check that the allele determination is ok.""" - return "0" not in self.alleles - - -class GenotypeRead(GenotypeBase): - id: int - - -class GenotypeCreate(GenotypeBase): - pass - - -class AnalysisBase(SQLModel): - type: TYPES - source: Optional[str] - sex: Optional[SEXES] - created_at: Optional[datetime] = datetime.now() - sample_id: Optional[constr(max_length=32)] = Field(default=None, foreign_key="sample.id") - plate_id: Optional[str] = Field(default=None, foreign_key="plate.id") - - -class Analysis(AnalysisBase, table=True): - __tablename__ = "analysis" - __table_args__ = (Index("_sample_type", "sample_id", "type", unique=True),) - id: Optional[int] = Field(default=None, primary_key=True) - - sample: Optional["Sample"] = Relationship(back_populates="analyses") - plate: Optional[List["Plate"]] = Relationship(back_populates="analyses") - genotypes: Optional[List["Genotype"]] = Relationship(back_populates="analysis") - - def check_no_calls(self) -> Dict[str, int]: - """Check that genotypes look ok.""" - calls = ["known" if genotype.is_ok else "unknown" for genotype in self.genotypes] - return Counter(calls) - - -class AnalysisRead(AnalysisBase): - id: int - - -class AnalysisCreate(AnalysisBase): - pass - - -class SampleSlim(SQLModel): - status: Optional[STATUS] - comment: Optional[str] - - -class SampleBase(SampleSlim): - sex: Optional[SEXES] - created_at: Optional[datetime] = datetime.now() - - -class Sample(SampleBase, table=True): - __tablename__ = "sample" - id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) - - analyses: Optional[List["Analysis"]] = Relationship(back_populates="sample") - - @property - def genotype_analysis(self) -> Optional[Analysis]: - """Return genotype analysis.""" - - for analysis in self.analyses: - if analysis.type == "genotype": - return analysis - - return None - - @property - def sequence_analysis(self) -> Optional[Analysis]: - """Return sequence analysis.""" - - for analysis in self.analyses: - if analysis.type == "sequence": - return analysis - - return None - - -class SampleRead(SampleBase): - id: constr(max_length=32) - - -class SampleCreate(SampleBase): - pass - - -class SNPBase(SQLModel): - ref: Optional[constr(max_length=1)] - chrom: Optional[constr(max_length=5)] - pos: Optional[int] - - -class SNP(SNPBase, table=True): - __tablename__ = "snp" - """Represent a SNP position under investigation.""" - - id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) - - -class SNPRead(SNPBase): - id: constr(max_length=32) - - -class UserBase(SQLModel): - email: EmailStr = Field(index=True, unique=True) - name: Optional[str] = "" - - -class User(UserBase, table=True): - __tablename__ = "user" - id: Optional[int] = Field(default=None, primary_key=True) - plates: Optional[List["Plate"]] = Relationship(back_populates="user") - - -class UserRead(UserBase): - id: int - - -class UserCreate(UserBase): - pass - - -class PlateBase(SQLModel): - created_at: Optional[datetime] = datetime.now() - plate_id: constr(max_length=16) = Field(index=True, unique=True) - signed_by: Optional[int] = Field(default=None, foreign_key="user.id") - signed_at: Optional[datetime] - method_document: Optional[str] - method_version: Optional[str] - - -class Plate(PlateBase, table=True): - __tablename__ = "plate" - id: Optional[int] = Field(default=None, primary_key=True) - user: Optional["User"] = Relationship(back_populates="plates") - analyses: Optional[List["Analysis"]] = Relationship(back_populates="plate") - - -class PlateRead(PlateBase): - id: str - user: Optional[UserRead] - - -class PlateCreate(PlateBase): - analyses: Optional[List[Analysis]] = [] - - -class UserReadWithPlates(UserRead): - plates: Optional[List[Plate]] = [] - - -class SampleReadWithAnalysis(SampleRead): - analyses: Optional[List[AnalysisRead]] = [] - - -class AnalysisReadWithGenotype(AnalysisRead): - genotypes: Optional[List[Genotype]] = [] - - -class SampleReadWithAnalysisDeep(SampleRead): - analyses: Optional[List[AnalysisReadWithGenotype]] = [] - detail: Optional[SampleDetail] - - @validator("detail") - def get_detail(cls, value, values) -> SampleDetail: - analyses = values.get("analyses") - if len(analyses) != 2: - return SampleDetail() - genotype_analysis = [analysis for analysis in analyses if analysis.type == "genotype"][0] - sequence_analysis = [analysis for analysis in analyses if analysis.type == "sequence"][0] - status = check_snps( - genotype_analysis=genotype_analysis, sequence_analysis=sequence_analysis - ) - sex = check_sex( - sample_sex=values.get("sex"), - genotype_analysis=genotype_analysis, - sequence_analysis=sequence_analysis, - ) - - return SampleDetail(**status, sex=sex) - - class Config: - validate_all = True - - -class AnalysisReadWithSample(AnalysisRead): - sample: Optional[SampleSlim] - - -def compare_genotypes(genotype_1: Genotype, genotype_2: Genotype) -> Tuple[str, str]: - """Compare two genotypes if they have the same alleles.""" - - if "0" in genotype_1.alleles or "0" in genotype_2.alleles: - return genotype_1.rsnumber, "unknown" - elif genotype_1.alleles == genotype_2.alleles: - return genotype_1.rsnumber, "match" - else: - return genotype_1.rsnumber, "mismatch" - - -def check_snps(genotype_analysis, sequence_analysis): - genotype_pairs = zip(genotype_analysis.genotypes, sequence_analysis.genotypes) - results = dict( - compare_genotypes(genotype_1, genotype_2) for genotype_1, genotype_2 in genotype_pairs - ) - count = Counter([val for key, val in results.items()]) - unknown = count.get("unknown", 0) - matches = count.get("match", 0) - mismatches = count.get("mismatch", 0) - snps = ( - "pass" - if all([matches >= CUTOFS.get("min_matches") and mismatches <= CUTOFS.get("max_mismatch")]) - else "fail" - ) - nocalls = "pass" if unknown <= CUTOFS.get("max_nocalls") else "fail" - failed_snps = [key for key, val in results.items() if val == "mismatch"] - - return { - "unknown": unknown, - "matches": matches, - "mismatches": mismatches, - "snps": snps, - "nocalls": nocalls, - "failed_snps": failed_snps, - } - - -def check_sex(sample_sex, genotype_analysis, sequence_analysis): - """Check if any source disagrees on the sex""" - if not sample_sex or genotype_analysis.sex == SEXES.UNKNOWN: - return "fail" - sexes = {genotype_analysis.sex, sequence_analysis.sex, sample_sex} - if {SEXES.MALE, SEXES.FEMALE}.issubset(sexes): - return "fail" - return "pass" - - -class AnalysisReadWithSampleDeep(AnalysisRead): - sample: Optional[SampleReadWithAnalysisDeep] - - -class PlateReadWithAnalyses(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - - -class PlateReadWithAnalysisDetail(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - detail: Optional[PlateStatusCounts] - - @validator("detail") - def check_detail(cls, value, values): - analyses = values.get("analyses") - statuses = [str(analysis.sample.status) for analysis in analyses] - commented = sum(1 for analysis in analyses if analysis.sample.comment) - status_counts = Counter(statuses) - return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) - - class Config: - validate_all = True - - -class PlateReadWithAnalysisDetailSingle(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - detail: Optional[PlateStatusCounts] - - @validator("detail") - def check_detail(cls, value, values): - analyses = values.get("analyses") - statuses = [str(analysis.sample.status) for analysis in analyses] - commented = sum(1 for analysis in analyses if analysis.sample.comment) - status_counts = Counter(statuses) - return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) - - class Config: - validate_all = True - - class MatchCounts(BaseModel): match: Optional[int] = 0 mismatch: Optional[int] = 0 diff --git a/genotype_api/security.py b/genotype_api/security.py index 7df050d..5bb59c7 100644 --- a/genotype_api/security.py +++ b/genotype_api/security.py @@ -7,10 +7,10 @@ from starlette.requests import Request -from genotype_api.database import get_session -from genotype_api.models import User +from genotype_api.database.session_handler import get_session +from genotype_api.database.models import User from genotype_api.config import security_settings -from genotype_api.crud.users import get_user_by_email +from genotype_api.database.crud.read import get_user_by_email from jose import jwt import requests