From c7bcaf1998c7f5698da379f7bf52b61e6823f943 Mon Sep 17 00:00:00 2001 From: ChristianOertlin Date: Tue, 12 Mar 2024 15:28:02 +0100 Subject: [PATCH] refactor(crud and models) (#99) (patch) # Description Refactor crud models --- database.db | Bin 0 -> 53248 bytes genotype_api/api/app.py | 2 +- genotype_api/api/endpoints/analyses.py | 21 +- genotype_api/api/endpoints/plates.py | 23 +- genotype_api/api/endpoints/samples.py | 25 +- genotype_api/api/endpoints/snps.py | 6 +- genotype_api/api/endpoints/users.py | 8 +- genotype_api/crud/analyses.py | 60 ---- genotype_api/crud/plates.py | 38 --- genotype_api/crud/samples.py | 83 ----- genotype_api/crud/users.py | 32 -- genotype_api/{crud => database}/__init__.py | 0 genotype_api/database/crud/__init__.py | 0 genotype_api/database/crud/create.py | 67 ++++ genotype_api/database/crud/delete.py | 30 ++ genotype_api/database/crud/read.py | 110 ++++++ genotype_api/database/crud/update.py | 21 ++ genotype_api/database/models.py | 312 +++++++++++++++++ .../session_handler.py} | 0 genotype_api/file_parsing/excel.py | 2 +- genotype_api/file_parsing/vcf.py | 2 +- genotype_api/match.py | 4 +- genotype_api/models.py | 313 +----------------- genotype_api/security.py | 6 +- 24 files changed, 590 insertions(+), 575 deletions(-) create mode 100644 database.db delete mode 100644 genotype_api/crud/analyses.py delete mode 100644 genotype_api/crud/plates.py delete mode 100644 genotype_api/crud/samples.py delete mode 100644 genotype_api/crud/users.py rename genotype_api/{crud => database}/__init__.py (100%) create mode 100644 genotype_api/database/crud/__init__.py create mode 100644 genotype_api/database/crud/create.py create mode 100644 genotype_api/database/crud/delete.py create mode 100644 genotype_api/database/crud/read.py create mode 100644 genotype_api/database/crud/update.py create mode 100644 genotype_api/database/models.py rename genotype_api/{database.py => database/session_handler.py} (100%) diff --git a/database.db b/database.db new file mode 100644 index 0000000000000000000000000000000000000000..e0644153470a43107beb2f3f62781d19762a5bfd GIT binary patch literal 53248 zcmeI%Z*S8^9KdlKO468wGF6q;2bB&Dv__y!-FN~|wyYQyt)wHQ(-4|0w=u<15{KA9 zt+WZHNBiw)WpgC^Zh=*v(xCFKG}-WP`rpI!ypyk zS=TMwwtf)8vMfhF8}ex$>hps6AdhzL{aJ4v>+9ElHE_aZ*Z#Qnq4A;d zuKrK`uJhjcxAsm}@IU|o1Q0*~0R%n^fmijayRl}^ZbYMgc;E-4U~rg3i9bn3)8TG7 z*$+o?dN>Xnn_aKf^TbZ4{ba`z?al-5H}Pq6vE31-i&#G%E$ZJ9X2j-f|7z7;TeJVV zkp{bi&@?y4`ut42){k3WZZM~2Mi9TYx|=_>x_5+A%bh|_s^>lOvf{xY9EAQxp=nn3 zZN4n(n_i-`-4mUitt~a`FWvUzR`;3s*?T6|qkdDBJ>2ek?MEF|mJh5cy52*t>vcB0 zr^3uecXFD(Q+01i+**d41jDhMl;SiEub#@9Yp6g?W3fK3P@HO%=Fz>HrhMyZO4HCB zodTsP73)q-^)?QL$dhtn}0rnR6G~>%w_*o-bD1Z(RG0&P{aSXB|v8eXO09ollyo6wL=V zy_-j?)vD{d_S-LXVsyEFSuCVv-tA0PGoC>$rYhPW$wcfPnkB6BkQJWD&M-`0#(lpZ z?@fo{C^f^W4)dzl;UtOTQNHG)nKIKV$WN1SGKcR}D(+V+c5L8Pl^+g+XrSIz&oKYk zN_l6)=}yf*uTh;XQ4iQ zG8)Ujg~G{Yfx6}<;R|zSeIdP^#KU~SI8O4J{CL?E=6UkgI*izfG!oMt83JZlQc-DNq&xd@$lpVI^X@_ z+|k^=RB^vwJu{l@HnWk<`?CCsT~uGQ`Un2?&*BT0Ng{v%0tg_000IagfB*srAb`LH z1=Oa#-1O&x00IagfB*srAbW8F91|e|2q1s}0tg_000IagfB*vQ|FZ@VKmY**5I_I{ z1Q0*~0R#{zzX1FH< List[Analysis]: - statement = select(Analysis).where(Analysis.plate_id == plate_id) - return session.exec(statement).all() - - -def get_analysis_type_sample( - sample_id: str, analysis_type: str, session: Session -) -> Optional[Analysis]: - statement = select(Analysis).where( - Analysis.sample_id == sample_id, Analysis.type == analysis_type - ) - return session.exec(statement).first() - - -def get_analysis(session: Session, analysis_id: int) -> Analysis: - """Get analysis""" - - statement = select(Analysis).where(Analysis.id == analysis_id) - return session.exec(statement).one() - - -def delete_analysis(session: Session, analysis_id: int) -> Analysis: - db_analysis = session.get(Analysis, analysis_id) - session.delete(db_analysis) - session.commit() - return db_analysis - - -def create_analysis(session: Session, analysis: Analysis) -> Analysis: - session.add(analysis) - session.commit() - session.refresh(analysis) - return analysis - - -def check_analyses_objects( - session: Session, analyses: List[Analysis], analysis_type: TYPES -) -> None: - """Raising 400 if any analysis in the list already exist in the database""" - for analysis_obj in analyses: - db_analysis: Analysis = get_analysis_type_sample( - session=session, - sample_id=analysis_obj.sample_id, - analysis_type=analysis_type, - ) - if db_analysis: - session.delete(db_analysis) diff --git a/genotype_api/crud/plates.py b/genotype_api/crud/plates.py deleted file mode 100644 index a0ee42d..0000000 --- a/genotype_api/crud/plates.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -from typing import Optional -from genotype_api.models import Plate, PlateCreate -from sqlmodel import Session, select -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - -LOG = logging.getLogger(__name__) - - -def get_plate(session: Session, plate_id: int) -> Plate: - """Get plate""" - - statement = select(Plate).where(Plate.id == plate_id) - return session.exec(statement).one() - - -def create_plate(session: Session, plate: PlateCreate) -> Plate: - db_plate = Plate.from_orm(plate) - db_plate.analyses = plate.analyses # not sure why from_orm wont pick up the analyses - session.add(db_plate) - session.commit() - session.refresh(db_plate) - LOG.info("Creating plate with id %s", db_plate.plate_id) - return db_plate - - -def delete_plate(session: Session, plate_id: int) -> Optional[Plate]: - db_plate: Plate = session.get(Plate, plate_id) - if not db_plate: - LOG.info("Could not find plate %s", plate_id) - return None - session.delete(db_plate) - session.commit() - LOG.info("Plate deleted") - return db_plate diff --git a/genotype_api/crud/samples.py b/genotype_api/crud/samples.py deleted file mode 100644 index e65956e..0000000 --- a/genotype_api/crud/samples.py +++ /dev/null @@ -1,83 +0,0 @@ -from typing import List - -from genotype_api.match import check_sample -from genotype_api.models import Sample, Analysis -from sqlmodel import Session, func, select -from fastapi import HTTPException -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - - -def get_sample(session: Session, sample_id: str) -> Sample: - """Get sample or raise 404.""" - - statement = select(Sample).where(Sample.id == sample_id) - return session.exec(statement).one() - - -def get_samples(statement: SelectOfScalar, sample_id: str) -> SelectOfScalar: - """Returns a query for samples containing the given sample_id.""" - return statement.where(Sample.id.contains(sample_id)) - - -def create_sample(session: Session, sample: Sample) -> Sample: - """Creates a sample in the database.""" - - sample_in_db = session.get(Sample, sample.id) - if sample_in_db: - raise HTTPException(status_code=409, detail="Sample already registered") - session.add(sample) - session.commit() - session.refresh(sample) - return sample - - -def get_incomplete_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with less than two analyses.""" - - return ( - statement.group_by(Analysis.sample_id) - .order_by(Analysis.created_at) - .having(func.count(Analysis.sample_id) < 2) - ) - - -def get_plate_samples(statement: SelectOfScalar, plate_id: str) -> SelectOfScalar: - """Returning sample query statement for samples analysed on a specific plate.""" - return statement.where(Analysis.plate_id == plate_id) - - -def get_commented_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with no comment.""" - - return statement.where(Sample.comment != None) - - -def get_status_missing_samples(statement: SelectOfScalar) -> SelectOfScalar: - """Returning sample query statement for samples with no comment.""" - - return statement.where(Sample.status == None) - - -def create_analyses_sample_objects(session: Session, analyses: List[Analysis]) -> List[Sample]: - """creating samples in an analysis if not already in db.""" - return [ - create_sample(session=session, sample=Sample(id=analysis_obj.sample_id)) - for analysis_obj in analyses - if not session.get(Sample, analysis_obj.sample_id) - ] - - -def refresh_sample_status(sample: Sample, session: Session) -> Sample: - if len(sample.analyses) != 2: - sample.status = None - else: - results = check_sample(sample=sample) - sample.status = "fail" if "fail" in results.dict().values() else "pass" - - session.add(sample) - session.commit() - session.refresh(sample) - return sample diff --git a/genotype_api/crud/users.py b/genotype_api/crud/users.py deleted file mode 100644 index a7256b1..0000000 --- a/genotype_api/crud/users.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import List, Optional - -from genotype_api.models import User, UserCreate -from sqlmodel import select, Session -from sqlmodel.sql.expression import Select, SelectOfScalar - -SelectOfScalar.inherit_cache = True -Select.inherit_cache = True - - -def get_user(session: Session, user_id: int): - statement = select(User).where(User.id == user_id) - return session.exec(statement).one() - - -def get_user_by_email(session: Session, email: str) -> Optional[User]: - statement = select(User).where(User.email == email) - return session.exec(statement).first() - - -def get_users(session: Session, skip: int = 0, limit: int = 100) -> List[User]: - statement = select(User).offset(skip).limit(limit) - return session.exec(statement).all() - - -def create_user(db: Session, user: UserCreate): - fake_hashed_password = user.password + "notreallyhashed" - db_user = User(email=user.email, hashed_password=fake_hashed_password) - db.add(db_user) - db.commit() - db.refresh(db_user) - return db_user diff --git a/genotype_api/crud/__init__.py b/genotype_api/database/__init__.py similarity index 100% rename from genotype_api/crud/__init__.py rename to genotype_api/database/__init__.py diff --git a/genotype_api/database/crud/__init__.py b/genotype_api/database/crud/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/genotype_api/database/crud/create.py b/genotype_api/database/crud/create.py new file mode 100644 index 0000000..e66527d --- /dev/null +++ b/genotype_api/database/crud/create.py @@ -0,0 +1,67 @@ +import logging +from typing import List + +from fastapi import HTTPException +from sqlmodel import Session + +from genotype_api.database.models import ( + Analysis, + Sample, + User, + UserCreate, + Plate, + PlateCreate, +) +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def create_analysis(session: Session, analysis: Analysis) -> Analysis: + session.add(analysis) + session.commit() + session.refresh(analysis) + return analysis + + +def create_plate(session: Session, plate: PlateCreate) -> Plate: + db_plate = Plate.from_orm(plate) + db_plate.analyses = plate.analyses # not sure why from_orm wont pick up the analyses + session.add(db_plate) + session.commit() + session.refresh(db_plate) + LOG.info(f"Creating plate with id {db_plate.plate_id}.") + return db_plate + + +def create_sample(session: Session, sample: Sample) -> Sample: + """Creates a sample in the database.""" + + sample_in_db = session.get(Sample, sample.id) + if sample_in_db: + raise HTTPException(status_code=409, detail="Sample already registered") + session.add(sample) + session.commit() + session.refresh(sample) + return sample + + +def create_analyses_sample_objects(session: Session, analyses: List[Analysis]) -> List[Sample]: + """creating samples in an analysis if not already in db.""" + return [ + create_sample(session=session, sample=Sample(id=analysis_obj.sample_id)) + for analysis_obj in analyses + if not session.get(Sample, analysis_obj.sample_id) + ] + + +def create_user(db: Session, user: UserCreate): + fake_hashed_password = user.password + "notreallyhashed" + db_user = User(email=user.email, hashed_password=fake_hashed_password) + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user diff --git a/genotype_api/database/crud/delete.py b/genotype_api/database/crud/delete.py new file mode 100644 index 0000000..924722d --- /dev/null +++ b/genotype_api/database/crud/delete.py @@ -0,0 +1,30 @@ +import logging +from typing import Optional + +from sqlmodel import Session + +from genotype_api.database.models import Analysis, Plate +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def delete_analysis(session: Session, analysis_id: int) -> Analysis: + db_analysis = session.get(Analysis, analysis_id) + session.delete(db_analysis) + session.commit() + return db_analysis + + +def delete_plate(session: Session, plate_id: int) -> Optional[Plate]: + db_plate: Plate = session.get(Plate, plate_id) + if not db_plate: + LOG.info(f"Could not find plate {plate_id}") + return None + session.delete(db_plate) + session.commit() + LOG.info("Plate deleted") + return db_plate diff --git a/genotype_api/database/crud/read.py b/genotype_api/database/crud/read.py new file mode 100644 index 0000000..5615957 --- /dev/null +++ b/genotype_api/database/crud/read.py @@ -0,0 +1,110 @@ +import logging +from typing import List, Optional + +from sqlalchemy import func +from sqlmodel import Session, select + +from genotype_api.constants import TYPES +from genotype_api.database.models import Analysis, Sample, User, Plate +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + +LOG = logging.getLogger(__name__) + + +def get_analyses_from_plate(plate_id: int, session: Session) -> List[Analysis]: + statement = select(Analysis).where(Analysis.plate_id == plate_id) + return session.exec(statement).all() + + +def get_analysis_type_sample( + sample_id: str, analysis_type: str, session: Session +) -> Optional[Analysis]: + statement = select(Analysis).where( + Analysis.sample_id == sample_id, Analysis.type == analysis_type + ) + return session.exec(statement).first() + + +def get_analysis(session: Session, analysis_id: int) -> Analysis: + """Get analysis""" + + statement = select(Analysis).where(Analysis.id == analysis_id) + return session.exec(statement).one() + + +def get_plate(session: Session, plate_id: int) -> Plate: + """Get plate""" + + statement = select(Plate).where(Plate.id == plate_id) + return session.exec(statement).one() + + +def get_incomplete_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with less than two analyses.""" + + return ( + statement.group_by(Analysis.sample_id) + .order_by(Analysis.created_at) + .having(func.count(Analysis.sample_id) < 2) + ) + + +def get_plate_samples(statement: SelectOfScalar, plate_id: str) -> SelectOfScalar: + """Returning sample query statement for samples analysed on a specific plate.""" + return statement.where(Analysis.plate_id == plate_id) + + +def get_commented_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with no comment.""" + + return statement.where(Sample.comment != None) + + +def get_status_missing_samples(statement: SelectOfScalar) -> SelectOfScalar: + """Returning sample query statement for samples with no comment.""" + + return statement.where(Sample.status == None) + + +def get_sample(session: Session, sample_id: str) -> Sample: + """Get sample or raise 404.""" + + statement = select(Sample).where(Sample.id == sample_id) + return session.exec(statement).one() + + +def get_samples(statement: SelectOfScalar, sample_id: str) -> SelectOfScalar: + """Returns a query for samples containing the given sample_id.""" + return statement.where(Sample.id.contains(sample_id)) + + +def get_user(session: Session, user_id: int): + statement = select(User).where(User.id == user_id) + return session.exec(statement).one() + + +def get_user_by_email(session: Session, email: str) -> Optional[User]: + statement = select(User).where(User.email == email) + return session.exec(statement).first() + + +def get_users(session: Session, skip: int = 0, limit: int = 100) -> List[User]: + statement = select(User).offset(skip).limit(limit) + return session.exec(statement).all() + + +def check_analyses_objects( + session: Session, analyses: List[Analysis], analysis_type: TYPES +) -> None: + """Raising 400 if any analysis in the list already exist in the database""" + for analysis_obj in analyses: + db_analysis: Analysis = get_analysis_type_sample( + session=session, + sample_id=analysis_obj.sample_id, + analysis_type=analysis_type, + ) + if db_analysis: + session.delete(db_analysis) diff --git a/genotype_api/database/crud/update.py b/genotype_api/database/crud/update.py new file mode 100644 index 0000000..845ca7c --- /dev/null +++ b/genotype_api/database/crud/update.py @@ -0,0 +1,21 @@ +from sqlmodel import Session + +from genotype_api.match import check_sample +from genotype_api.database.models import Sample +from sqlmodel.sql.expression import Select, SelectOfScalar + +SelectOfScalar.inherit_cache = True +Select.inherit_cache = True + + +def refresh_sample_status(sample: Sample, session: Session) -> Sample: + if len(sample.analyses) != 2: + sample.status = None + else: + results = check_sample(sample=sample) + sample.status = "fail" if "fail" in results.dict().values() else "pass" + + session.add(sample) + session.commit() + session.refresh(sample) + return sample diff --git a/genotype_api/database/models.py b/genotype_api/database/models.py new file mode 100644 index 0000000..9edf2a1 --- /dev/null +++ b/genotype_api/database/models.py @@ -0,0 +1,312 @@ +from collections import Counter +from datetime import datetime +from typing import Optional, List, Dict, Tuple + +from pydantic import constr, EmailStr, validator +from sqlalchemy import Index +from sqlmodel import SQLModel, Field, Relationship + +from genotype_api.constants import TYPES, SEXES, STATUS, CUTOFS +from genotype_api.models import SampleDetail, PlateStatusCounts + + +class GenotypeBase(SQLModel): + rsnumber: Optional[constr(max_length=10)] + analysis_id: Optional[int] = Field(default=None, foreign_key="analysis.id") + allele_1: Optional[constr(max_length=1)] + allele_2: Optional[constr(max_length=1)] + + +class Genotype(GenotypeBase, table=True): + __tablename__ = "genotype" + __table_args__ = (Index("_analysis_rsnumber", "analysis_id", "rsnumber", unique=True),) + id: Optional[int] = Field(default=None, primary_key=True) + + analysis: Optional["Analysis"] = Relationship(back_populates="genotypes") + + @property + def alleles(self) -> List[str]: + """Return sorted because we are not dealing with phased data.""" + + return sorted([self.allele_1, self.allele_2]) + + @property + def is_ok(self) -> bool: + """Check that the allele determination is ok.""" + return "0" not in self.alleles + + +class GenotypeRead(GenotypeBase): + id: int + + +class GenotypeCreate(GenotypeBase): + pass + + +class AnalysisBase(SQLModel): + type: TYPES + source: Optional[str] + sex: Optional[SEXES] + created_at: Optional[datetime] = datetime.now() + sample_id: Optional[constr(max_length=32)] = Field(default=None, foreign_key="sample.id") + plate_id: Optional[str] = Field(default=None, foreign_key="plate.id") + + +class Analysis(AnalysisBase, table=True): + __tablename__ = "analysis" + __table_args__ = (Index("_sample_type", "sample_id", "type", unique=True),) + id: Optional[int] = Field(default=None, primary_key=True) + + sample: Optional["Sample"] = Relationship(back_populates="analyses") + plate: Optional[List["Plate"]] = Relationship(back_populates="analyses") + genotypes: Optional[List["Genotype"]] = Relationship(back_populates="analysis") + + def check_no_calls(self) -> Dict[str, int]: + """Check that genotypes look ok.""" + calls = ["known" if genotype.is_ok else "unknown" for genotype in self.genotypes] + return Counter(calls) + + +class AnalysisRead(AnalysisBase): + id: int + + +class AnalysisCreate(AnalysisBase): + pass + + +class SampleSlim(SQLModel): + status: Optional[STATUS] + comment: Optional[str] + + +class SampleBase(SampleSlim): + sex: Optional[SEXES] + created_at: Optional[datetime] = datetime.now() + + +class Sample(SampleBase, table=True): + __tablename__ = "sample" + id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) + + analyses: Optional[List["Analysis"]] = Relationship(back_populates="sample") + + @property + def genotype_analysis(self) -> Optional[Analysis]: + """Return genotype analysis.""" + + for analysis in self.analyses: + if analysis.type == "genotype": + return analysis + + return None + + @property + def sequence_analysis(self) -> Optional[Analysis]: + """Return sequence analysis.""" + + for analysis in self.analyses: + if analysis.type == "sequence": + return analysis + + return None + + +class SampleRead(SampleBase): + id: constr(max_length=32) + + +class SampleCreate(SampleBase): + pass + + +class SNPBase(SQLModel): + ref: Optional[constr(max_length=1)] + chrom: Optional[constr(max_length=5)] + pos: Optional[int] + + +class SNP(SNPBase, table=True): + __tablename__ = "snp" + """Represent a SNP position under investigation.""" + + id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) + + +class SNPRead(SNPBase): + id: constr(max_length=32) + + +class UserBase(SQLModel): + email: EmailStr = Field(index=True, unique=True) + name: Optional[str] = "" + + +class User(UserBase, table=True): + __tablename__ = "user" + id: Optional[int] = Field(default=None, primary_key=True) + plates: Optional[List["Plate"]] = Relationship(back_populates="user") + + +class UserRead(UserBase): + id: int + + +class UserCreate(UserBase): + pass + + +class PlateBase(SQLModel): + created_at: Optional[datetime] = datetime.now() + plate_id: constr(max_length=16) = Field(index=True, unique=True) + signed_by: Optional[int] = Field(default=None, foreign_key="user.id") + signed_at: Optional[datetime] + method_document: Optional[str] + method_version: Optional[str] + + +class Plate(PlateBase, table=True): + __tablename__ = "plate" + id: Optional[int] = Field(default=None, primary_key=True) + user: Optional["User"] = Relationship(back_populates="plates") + analyses: Optional[List["Analysis"]] = Relationship(back_populates="plate") + + +class PlateRead(PlateBase): + id: str + user: Optional[UserRead] + + +class PlateCreate(PlateBase): + analyses: Optional[List[Analysis]] = [] + + +class UserReadWithPlates(UserRead): + plates: Optional[List[Plate]] = [] + + +class SampleReadWithAnalysis(SampleRead): + analyses: Optional[List[AnalysisRead]] = [] + + +class AnalysisReadWithGenotype(AnalysisRead): + genotypes: Optional[List[Genotype]] = [] + + +class SampleReadWithAnalysisDeep(SampleRead): + analyses: Optional[List[AnalysisReadWithGenotype]] = [] + detail: Optional[SampleDetail] + + @validator("detail") + def get_detail(cls, value, values) -> SampleDetail: + analyses = values.get("analyses") + if len(analyses) != 2: + return SampleDetail() + genotype_analysis = [analysis for analysis in analyses if analysis.type == "genotype"][0] + sequence_analysis = [analysis for analysis in analyses if analysis.type == "sequence"][0] + status = check_snps( + genotype_analysis=genotype_analysis, sequence_analysis=sequence_analysis + ) + sex = check_sex( + sample_sex=values.get("sex"), + genotype_analysis=genotype_analysis, + sequence_analysis=sequence_analysis, + ) + + return SampleDetail(**status, sex=sex) + + class Config: + validate_all = True + + +class AnalysisReadWithSample(AnalysisRead): + sample: Optional[SampleSlim] + + +def compare_genotypes(genotype_1: Genotype, genotype_2: Genotype) -> Tuple[str, str]: + """Compare two genotypes if they have the same alleles.""" + + if "0" in genotype_1.alleles or "0" in genotype_2.alleles: + return genotype_1.rsnumber, "unknown" + elif genotype_1.alleles == genotype_2.alleles: + return genotype_1.rsnumber, "match" + else: + return genotype_1.rsnumber, "mismatch" + + +class AnalysisReadWithSampleDeep(AnalysisRead): + sample: Optional[SampleReadWithAnalysisDeep] + + +class PlateReadWithAnalyses(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + + +class PlateReadWithAnalysisDetail(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + detail: Optional[PlateStatusCounts] + + @validator("detail") + def check_detail(cls, value, values): + analyses = values.get("analyses") + statuses = [str(analysis.sample.status) for analysis in analyses] + commented = sum(1 for analysis in analyses if analysis.sample.comment) + status_counts = Counter(statuses) + return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) + + class Config: + validate_all = True + + +class PlateReadWithAnalysisDetailSingle(PlateRead): + analyses: Optional[List[AnalysisReadWithSample]] = [] + detail: Optional[PlateStatusCounts] + + @validator("detail") + def check_detail(cls, value, values): + analyses = values.get("analyses") + statuses = [str(analysis.sample.status) for analysis in analyses] + commented = sum(1 for analysis in analyses if analysis.sample.comment) + status_counts = Counter(statuses) + return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) + + class Config: + validate_all = True + + +def check_snps(genotype_analysis, sequence_analysis): + genotype_pairs = zip(genotype_analysis.genotypes, sequence_analysis.genotypes) + results = dict( + compare_genotypes(genotype_1, genotype_2) for genotype_1, genotype_2 in genotype_pairs + ) + count = Counter([val for key, val in results.items()]) + unknown = count.get("unknown", 0) + matches = count.get("match", 0) + mismatches = count.get("mismatch", 0) + snps = ( + "pass" + if all([matches >= CUTOFS.get("min_matches") and mismatches <= CUTOFS.get("max_mismatch")]) + else "fail" + ) + nocalls = "pass" if unknown <= CUTOFS.get("max_nocalls") else "fail" + failed_snps = [key for key, val in results.items() if val == "mismatch"] + + return { + "unknown": unknown, + "matches": matches, + "mismatches": mismatches, + "snps": snps, + "nocalls": nocalls, + "failed_snps": failed_snps, + } + + +def check_sex(sample_sex, genotype_analysis, sequence_analysis): + """Check if any source disagrees on the sex""" + if not sample_sex or genotype_analysis.sex == SEXES.UNKNOWN: + return "fail" + sexes = {genotype_analysis.sex, sequence_analysis.sex, sample_sex} + if {SEXES.MALE, SEXES.FEMALE}.issubset(sexes): + return "fail" + return "pass" diff --git a/genotype_api/database.py b/genotype_api/database/session_handler.py similarity index 100% rename from genotype_api/database.py rename to genotype_api/database/session_handler.py diff --git a/genotype_api/file_parsing/excel.py b/genotype_api/file_parsing/excel.py index ff47493..a88b682 100644 --- a/genotype_api/file_parsing/excel.py +++ b/genotype_api/file_parsing/excel.py @@ -6,7 +6,7 @@ import openpyxl from genotype_api.exceptions import SexConflictError -from genotype_api.models import Analysis, Genotype +from genotype_api.database.models import Genotype, Analysis from openpyxl.workbook import Workbook from openpyxl.worksheet.worksheet import Worksheet diff --git a/genotype_api/file_parsing/vcf.py b/genotype_api/file_parsing/vcf.py index e265d88..9d08d90 100644 --- a/genotype_api/file_parsing/vcf.py +++ b/genotype_api/file_parsing/vcf.py @@ -2,7 +2,7 @@ from typing import Dict, Iterable, List, TextIO -from genotype_api.models import Analysis, Genotype as DBGenotype +from genotype_api.database.models import Genotype as DBGenotype, Analysis from pydantic import BaseModel diff --git a/genotype_api/match.py b/genotype_api/match.py index 897d664..203e94d 100644 --- a/genotype_api/match.py +++ b/genotype_api/match.py @@ -2,11 +2,9 @@ import logging from genotype_api.models import ( - Sample, SampleDetail, - check_sex, - check_snps, ) +from genotype_api.database.models import Sample, check_snps, check_sex log = logging.getLogger(__name__) diff --git a/genotype_api/models.py b/genotype_api/models.py index 1562898..44bbc10 100644 --- a/genotype_api/models.py +++ b/genotype_api/models.py @@ -1,12 +1,7 @@ -from collections import Counter -from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import List, Optional -from pydantic import BaseModel, EmailStr, constr, validator -from sqlalchemy import Index -from sqlmodel import Field, Relationship, SQLModel - -from genotype_api.constants import CUTOFS, SEXES, STATUS, TYPES +from pydantic import BaseModel, validator +from sqlmodel import Field class PlateStatusCounts(BaseModel): @@ -63,308 +58,6 @@ class Config: validate_all = True -class GenotypeBase(SQLModel): - rsnumber: Optional[constr(max_length=10)] - analysis_id: Optional[int] = Field(default=None, foreign_key="analysis.id") - allele_1: Optional[constr(max_length=1)] - allele_2: Optional[constr(max_length=1)] - - -class Genotype(GenotypeBase, table=True): - __tablename__ = "genotype" - __table_args__ = (Index("_analysis_rsnumber", "analysis_id", "rsnumber", unique=True),) - id: Optional[int] = Field(default=None, primary_key=True) - - analysis: Optional["Analysis"] = Relationship(back_populates="genotypes") - - @property - def alleles(self) -> List[str]: - """Return sorted because we are not dealing with phased data.""" - - return sorted([self.allele_1, self.allele_2]) - - @property - def is_ok(self) -> bool: - """Check that the allele determination is ok.""" - return "0" not in self.alleles - - -class GenotypeRead(GenotypeBase): - id: int - - -class GenotypeCreate(GenotypeBase): - pass - - -class AnalysisBase(SQLModel): - type: TYPES - source: Optional[str] - sex: Optional[SEXES] - created_at: Optional[datetime] = datetime.now() - sample_id: Optional[constr(max_length=32)] = Field(default=None, foreign_key="sample.id") - plate_id: Optional[str] = Field(default=None, foreign_key="plate.id") - - -class Analysis(AnalysisBase, table=True): - __tablename__ = "analysis" - __table_args__ = (Index("_sample_type", "sample_id", "type", unique=True),) - id: Optional[int] = Field(default=None, primary_key=True) - - sample: Optional["Sample"] = Relationship(back_populates="analyses") - plate: Optional[List["Plate"]] = Relationship(back_populates="analyses") - genotypes: Optional[List["Genotype"]] = Relationship(back_populates="analysis") - - def check_no_calls(self) -> Dict[str, int]: - """Check that genotypes look ok.""" - calls = ["known" if genotype.is_ok else "unknown" for genotype in self.genotypes] - return Counter(calls) - - -class AnalysisRead(AnalysisBase): - id: int - - -class AnalysisCreate(AnalysisBase): - pass - - -class SampleSlim(SQLModel): - status: Optional[STATUS] - comment: Optional[str] - - -class SampleBase(SampleSlim): - sex: Optional[SEXES] - created_at: Optional[datetime] = datetime.now() - - -class Sample(SampleBase, table=True): - __tablename__ = "sample" - id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) - - analyses: Optional[List["Analysis"]] = Relationship(back_populates="sample") - - @property - def genotype_analysis(self) -> Optional[Analysis]: - """Return genotype analysis.""" - - for analysis in self.analyses: - if analysis.type == "genotype": - return analysis - - return None - - @property - def sequence_analysis(self) -> Optional[Analysis]: - """Return sequence analysis.""" - - for analysis in self.analyses: - if analysis.type == "sequence": - return analysis - - return None - - -class SampleRead(SampleBase): - id: constr(max_length=32) - - -class SampleCreate(SampleBase): - pass - - -class SNPBase(SQLModel): - ref: Optional[constr(max_length=1)] - chrom: Optional[constr(max_length=5)] - pos: Optional[int] - - -class SNP(SNPBase, table=True): - __tablename__ = "snp" - """Represent a SNP position under investigation.""" - - id: Optional[constr(max_length=32)] = Field(default=None, primary_key=True) - - -class SNPRead(SNPBase): - id: constr(max_length=32) - - -class UserBase(SQLModel): - email: EmailStr = Field(index=True, unique=True) - name: Optional[str] = "" - - -class User(UserBase, table=True): - __tablename__ = "user" - id: Optional[int] = Field(default=None, primary_key=True) - plates: Optional[List["Plate"]] = Relationship(back_populates="user") - - -class UserRead(UserBase): - id: int - - -class UserCreate(UserBase): - pass - - -class PlateBase(SQLModel): - created_at: Optional[datetime] = datetime.now() - plate_id: constr(max_length=16) = Field(index=True, unique=True) - signed_by: Optional[int] = Field(default=None, foreign_key="user.id") - signed_at: Optional[datetime] - method_document: Optional[str] - method_version: Optional[str] - - -class Plate(PlateBase, table=True): - __tablename__ = "plate" - id: Optional[int] = Field(default=None, primary_key=True) - user: Optional["User"] = Relationship(back_populates="plates") - analyses: Optional[List["Analysis"]] = Relationship(back_populates="plate") - - -class PlateRead(PlateBase): - id: str - user: Optional[UserRead] - - -class PlateCreate(PlateBase): - analyses: Optional[List[Analysis]] = [] - - -class UserReadWithPlates(UserRead): - plates: Optional[List[Plate]] = [] - - -class SampleReadWithAnalysis(SampleRead): - analyses: Optional[List[AnalysisRead]] = [] - - -class AnalysisReadWithGenotype(AnalysisRead): - genotypes: Optional[List[Genotype]] = [] - - -class SampleReadWithAnalysisDeep(SampleRead): - analyses: Optional[List[AnalysisReadWithGenotype]] = [] - detail: Optional[SampleDetail] - - @validator("detail") - def get_detail(cls, value, values) -> SampleDetail: - analyses = values.get("analyses") - if len(analyses) != 2: - return SampleDetail() - genotype_analysis = [analysis for analysis in analyses if analysis.type == "genotype"][0] - sequence_analysis = [analysis for analysis in analyses if analysis.type == "sequence"][0] - status = check_snps( - genotype_analysis=genotype_analysis, sequence_analysis=sequence_analysis - ) - sex = check_sex( - sample_sex=values.get("sex"), - genotype_analysis=genotype_analysis, - sequence_analysis=sequence_analysis, - ) - - return SampleDetail(**status, sex=sex) - - class Config: - validate_all = True - - -class AnalysisReadWithSample(AnalysisRead): - sample: Optional[SampleSlim] - - -def compare_genotypes(genotype_1: Genotype, genotype_2: Genotype) -> Tuple[str, str]: - """Compare two genotypes if they have the same alleles.""" - - if "0" in genotype_1.alleles or "0" in genotype_2.alleles: - return genotype_1.rsnumber, "unknown" - elif genotype_1.alleles == genotype_2.alleles: - return genotype_1.rsnumber, "match" - else: - return genotype_1.rsnumber, "mismatch" - - -def check_snps(genotype_analysis, sequence_analysis): - genotype_pairs = zip(genotype_analysis.genotypes, sequence_analysis.genotypes) - results = dict( - compare_genotypes(genotype_1, genotype_2) for genotype_1, genotype_2 in genotype_pairs - ) - count = Counter([val for key, val in results.items()]) - unknown = count.get("unknown", 0) - matches = count.get("match", 0) - mismatches = count.get("mismatch", 0) - snps = ( - "pass" - if all([matches >= CUTOFS.get("min_matches") and mismatches <= CUTOFS.get("max_mismatch")]) - else "fail" - ) - nocalls = "pass" if unknown <= CUTOFS.get("max_nocalls") else "fail" - failed_snps = [key for key, val in results.items() if val == "mismatch"] - - return { - "unknown": unknown, - "matches": matches, - "mismatches": mismatches, - "snps": snps, - "nocalls": nocalls, - "failed_snps": failed_snps, - } - - -def check_sex(sample_sex, genotype_analysis, sequence_analysis): - """Check if any source disagrees on the sex""" - if not sample_sex or genotype_analysis.sex == SEXES.UNKNOWN: - return "fail" - sexes = {genotype_analysis.sex, sequence_analysis.sex, sample_sex} - if {SEXES.MALE, SEXES.FEMALE}.issubset(sexes): - return "fail" - return "pass" - - -class AnalysisReadWithSampleDeep(AnalysisRead): - sample: Optional[SampleReadWithAnalysisDeep] - - -class PlateReadWithAnalyses(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - - -class PlateReadWithAnalysisDetail(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - detail: Optional[PlateStatusCounts] - - @validator("detail") - def check_detail(cls, value, values): - analyses = values.get("analyses") - statuses = [str(analysis.sample.status) for analysis in analyses] - commented = sum(1 for analysis in analyses if analysis.sample.comment) - status_counts = Counter(statuses) - return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) - - class Config: - validate_all = True - - -class PlateReadWithAnalysisDetailSingle(PlateRead): - analyses: Optional[List[AnalysisReadWithSample]] = [] - detail: Optional[PlateStatusCounts] - - @validator("detail") - def check_detail(cls, value, values): - analyses = values.get("analyses") - statuses = [str(analysis.sample.status) for analysis in analyses] - commented = sum(1 for analysis in analyses if analysis.sample.comment) - status_counts = Counter(statuses) - return PlateStatusCounts(**status_counts, total=len(analyses), commented=commented) - - class Config: - validate_all = True - - class MatchCounts(BaseModel): match: Optional[int] = 0 mismatch: Optional[int] = 0 diff --git a/genotype_api/security.py b/genotype_api/security.py index 7df050d..5bb59c7 100644 --- a/genotype_api/security.py +++ b/genotype_api/security.py @@ -7,10 +7,10 @@ from starlette.requests import Request -from genotype_api.database import get_session -from genotype_api.models import User +from genotype_api.database.session_handler import get_session +from genotype_api.database.models import User from genotype_api.config import security_settings -from genotype_api.crud.users import get_user_by_email +from genotype_api.database.crud.read import get_user_by_email from jose import jwt import requests