From 9c2de8c0a49ae4a44511e64fdb24d966f38ea600 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Mon, 18 Dec 2023 15:11:38 +0100 Subject: [PATCH] Add FHIR tutorial and simplify code (#626) * Removed unused dependencies Signed-off-by: zethson * Tutorials update Signed-off-by: zethson * Readd fhiry Signed-off-by: zethson * Skip jupyter_core Signed-off-by: zethson * Fix import Signed-off-by: zethson * Don't fail session-info Signed-off-by: zethson * Revert session info change Signed-off-by: zethson --------- Signed-off-by: zethson --- .github/workflows/run_notebooks.yml | 2 ++ .pre-commit-config.yaml | 6 ------ docs/_static/tutorials/fhir.jpg | Bin 0 -> 23811 bytes docs/conf.py | 3 ++- docs/tutorials/index.md | 1 + docs/tutorials/notebooks | 2 +- ehrapy/{util => }/_doc_util.py | 0 ehrapy/core/meta_information.py | 4 +--- ehrapy/io/_read.py | 15 ++++++++++++- ehrapy/plot/_scanpy_pl_api.py | 4 +--- ehrapy/preprocessing/_imputation.py | 26 +++++++++++------------ ehrapy/preprocessing/_quality_control.py | 4 +--- ehrapy/tools/_scanpy_tl_api.py | 3 +-- ehrapy/util/__init__.py | 0 pyproject.toml | 6 +++--- 15 files changed, 40 insertions(+), 36 deletions(-) create mode 100644 docs/_static/tutorials/fhir.jpg rename ehrapy/{util => }/_doc_util.py (100%) delete mode 100644 ehrapy/util/__init__.py diff --git a/.github/workflows/run_notebooks.yml b/.github/workflows/run_notebooks.yml index b64bbcb9..42d19880 100644 --- a/.github/workflows/run_notebooks.yml +++ b/.github/workflows/run_notebooks.yml @@ -13,6 +13,8 @@ jobs: "docs/tutorials/notebooks/ehrapy_introduction.ipynb", "docs/tutorials/notebooks/mimic_2_introduction.ipynb", "docs/tutorials/notebooks/mimic_2_survival_analysis.ipynb", + "docs/tutorials/notebooks/mimic_2_fate.ipynb", + "docs/tutorials/notebooks/mimic_2_causal_inference.ipynb", "docs/tutorials/notebooks/mimic_3_demo.ipynb", # "docs/tutorials/notebooks/medcat.ipynb", ] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02a200d4..04140f6f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,12 +10,6 @@ repos: rev: v3.1.0 hooks: - id: prettier - # Newer versions of node don't work on systems that have an older version of GLIBC - # (in particular Ubuntu 18.04 and Centos 7) - # EOL of Centos 7 is in 2024-06, we can probably get rid of this then. - # See https://github.com/scverse/cookiecutter-scverse/issues/143 and - # https://github.com/jupyterlab/jupyterlab/issues/12675 - language_version: "17.9.1" - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.1.6 hooks: diff --git a/docs/_static/tutorials/fhir.jpg b/docs/_static/tutorials/fhir.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cdb99cac325b1dabb85894d1b72f94842c474190 GIT binary patch literal 23811 zcmce-b#z=!t~l6kGcz+YGcz+YGcz;B%*@O&Gc(1G?Koy;h$*&XJ27p4^WOXIeDh}h z*t2I>-N!y%DoIt6N>^7`_hap24}c;kB`XC00|NlSKp()z6~IBo#@WN!-NxC4gq?{6 zz#}HB2mt~33zP}K^Z?-al+px1008tQ0D1uUM`}pS9ZcPvJuJ;_JncysNo+hkT=AG#CsB ziw_$A3;8!J6gUtJ`eOrt01hHW0Y?Gp_3?jb|G)e%WLli|q(qUfOT9Y$@E-+f3^TCiNdd5zw`Hb;DRlh+k^%$8pS~wG1h%_~vpP2k7 zB2ry#opd7ds~vK1>kGltaQ{ia;HDF8ItKfC`*Qc}|3nr*rn9<6Fdy$XpQe3Hw0Q7V zR5;LX$ixsY8R?ne(ZK$n=vZf13aVUBf&v%sGlI(~q}}tbR|JqtWnQlgxfmm0N&W+! zkoa~E6~zGZSR?CD?Hi4b&;9Pxj_So*Q;I%^-glXQPX)yqqxay%p$jz#bM?9bHAQYy z5D6`>X*~6EbK2-MP&?4r{JAJO_xHayfZZfG?r(5{NP`am`k}3Q`RCmZ`+>vx;uF3P z0B>!h-)ia~rMRL}snq5X(X?Xcf3pPey@dEQSOi<^hs z7leV}_|i0dagPULq3aW|Uu30Uz@3dvlXmf1KC%37ZaBnd3i zGUee%`kY!f;rUO7hG*%TIPP5O%k8g)2^*e0+Qq-0DyHfmm{}e^05njS_*{wL4^@pX$q0OOpeSI_&N1CN47+P5&YXtbWq? z3&3|iM!m1~Xy$6Ws%^B0ALmXe{L8aJgrYcu|5-}acClGYa*2a-UPI4nein5Rp8XHx zfV(8^qPV5s_v?N78R0i$hSa_KzD1RKL7v9+T81Q-{~`v86$?rl|FqvYKclF)KRESQ z4F_B=Gscy=BFKnw13?VE^F^+_gdC}yd_NpK&l@8zeemls^UYF+j?hXn& zg9uKQp60&}EHkq&tgfxjP0I)T`MV%Q{8BWE_6#CwYi0cb#mI|ye^0lR`VWB6kwAk5 z-~2*&{<%@BzJKn_>*p+8ZFGI^X&&~)A2;;#)=`rR5A(uLb#Q{;JB_UAc~7rm2N&8q zggF0TgL|(acZ}p!uHMh*NGJbY;IniJ+L-UW_dcuh0T+qG=eNH; zvcDh-7SWy$(0g5MLv>`yQxeqc+Pyq7qw-Ou9x9FzYjq_FlYIcl@u&YVxeHc(+X}U= z=shp#vUS2XN;xsg3(Sq|B<|4sdkBTfe^6k*Csw?h(U1N-gTE8rb=*^+WR6V!ki->f zUem7bX}NX(hFnYNVlg>?|NJL>>6F%WR$ux1rl&j8O&zB$BxPfsSO)aTinV-An>m+P zc0TQ<+RKF_x*+#lAicOK_RW09FaCetCICWq!5U9sb1C4I+r6Y++;P=-amUeJpb>w` zef{N4anMxZ%KGrm$q9>!PoAJQ@TGjQy}0xDxrd=vYJ{53s@nZakl8Q1O@f{u8)vU> zDY6Uq3;WAl)N4N+(BAMvZXViHgI-@ewf}wsrFZM)?r%g7?jJQ1>I)`SJ!QT7zy8F? z`raJ~)jvovakVj-hUN&NrBGh)>%Ree&pExkvh^}JwQ+oSi>2xFmVz+{G9joZKB(cTl{KWtN?B$O`Ve3kM1|#o1W86wI5;vUF`0slZCa(ZF`4; zA2UC0)|b0l{7#d2zuluApE*e8eQxr2QZ0Y~lqa1s@AI-So`L2}&|&(wOI9SeC_AG6 zJI^KK4`H@}FAYb01sHgV_N_=? zg$9NTrvh90tz8Y{sGW3L@Va~d@J)BIi)BQ;dAsxMCJ&01d_rq1ncmPJE&w-IC;$7F zq325}BLkL@?CUCL73+JeG72;N$DRnjW(w+?6WjYzwd|^Dm-c+^AnwWY`|^C}csNzR zzH}eIn`gZ5j&!g?#E_ZdYQbtj_oHI+tIpmWaRZ?vV@GK(ld%9@x#_CAp<|z=$C+Qx z`9G@}0FLwR5WQ*CJBv5+nEB+f^Czd?W${m2KNfFcKO?I@*HoTfzrOv}@aTJz$FJ|v zuccq}jPEqLs_(Q|TQQsdal#PA;}|}=txqNO1xq=mDX83MJZp#O^199WjVhJM^?3gH z@0UuUK$Ln9X$E?SO1OC0oS7ixpPUDjl2y4RabbV6 zw9Lis$0wh;f68B8STtks+P?32B&fOmaeV(`scj1{kCyqZWr>(i{D(38;o0wd8q`3E z)$~4#=U_*>Uf=OwhO38{iGvjS0WW_Hl~97F3+74wCI$c?pTeUOU(pHp^DLcw{FHB` z9u#cJtyR=7WOT8ytL^TEN)hC1ZSrSx==l$867A#)vD8>Sob3Ih&#M z$lLtJXZfTjS8T8S`?G&b007_|HrakLnY{U10RInj|6dLVgA#Z~6aeV%0|bzuJpRA< zfdRoGAW_kvP|!(8Rmj-bG00gtOek15LqI7#3@FnFg9Ls6wzZiLwCZJ&4<-Tost^o! z@3=cZuzx`>rm8(TaU*nXk%@i%01Ojf!oAcb5q?6u7iA=+7aB91kqcxjSbBwhANoG^ zHL0A+YrC}vL!0IUkhEv;(8G!R>XTF}kVzn4e?@SeylWjC8oUJ>bS<|$nOMw9iFlv9 zW%4V)`}EFJKK)1c$eR@Y4*i9AsOS<=sa28Kk(>U)wzO!w*tq!>KJW<9=!D~{ExnL- zo*v6~+{Sd^`U3z6jvh>@-|p>|a#;^DFR%579Zw-1yk)MPw&dyH#NS#rJZH|&7F}v} z@D~nw>Z1}+J?JXads>=HvqKxedK(B_Hp@O6C)b`C5qfy+wy0DlbP>|F4S(=GW{Bo8 zs6+b9*r}fCc5)+YpU9q=TQ23E$@@32nas*G$8TB@?7k{PL_?>OJ}T*j(W(pYS3U&}3=X1dP8o4PhqwdcfFMB`G6q4hFv=VjdxM?{p@9P6QFc`wLua&4zS-IQwhHrn!S zS%#)Xp4$9Q$_raPY6BJ)jmS2tQZY>@LpgejXH;bwj$V$PeJ2j;Mb?^SeD z$p^r@W3fJmi!4Ya=xgCbICPq-eqDm&7(iY1(CZl^<1Ueqgj`!)DbH(r9P+St;I``qeho> z4{IP}DYAJ>NBL^W(+-1cDeuI=oXnAtk=9vl`=6f~B!XYc31lo9q@?B9$`Ck0=gTX~ zx$LHxg1{t6F5Acf=KU}E*?-I@0_A)!rN|Y zj*g0%D;t3p?RmBEFJv7`s0uaGN&=CC7?}K@(;0uQ1OyeU?>yeE@0h!4qp=iZ>QS$8 z@QBPrb09xsFW^PHWFTdor-&UR0BuDp#bQmzb_)N3)=Tog^joaG$vvnM)IBcr&v={Q*m>g>1c}|WuF2^D<|DZ2EsAO9bwT5`{acVs)TU}b9*$`XqAxAlA4I%1UauGhFW@(qRP%qBNF z>2rR-|2M#i9;%=XQ1}Pn4@@0fNlF@|IWTbCF_SRrkp6NLG&PmA=QXvqcu)?`@WR5t z@gcWsYk=ZIOH->NNxqTfeu`*}8FUU$xfIofUdz8ej*KcI8`9i53Mcz8J6;8;$$H~gi= z2$6=a`}yn zB>fp_f5~IZ>mzz~y6YxD)#WY%ohURyc}9VDzEcUF$DQE$MOnoYSnqvlmgx}~UHW0( ztGJ)A(-WGJktFX6_3tN(>b_t%U%xHK@vCN!_t&B!p8AnCyYE_b$b}I&^k3$PEt*Z_AHK=DMxVktek4`F;J&m?m6Y^ zmeDIfLp2RwJ@Ff)rBHwN@36N`K3^W1D_znj*vd?+_8Z*zKUv#>sHzcxF<<>cGJckA z?ioJRGxCzkj*F&^FKR1pt_!APULzIcVxgu?4ApjT`ds11k~MoUxA2jdad%WtjdPU0 zEz;i^CjoyNGSE|vJzk#7)=^be(e$?Xatyj;t*4;kPHo6_6Ro%5eUC_0U!u93mt-7N z$@~DoVOW(_RL(*B5ALl$^bt#wdu_n=!!e|muj`BOHw{kZU1GGo?NjQo_j%naYb>3| zCURK}C{o&SR!b=%K=>8jI-@x1QYbjhplZgI`mqMv`PH{yaod>&HmeG^wpra78-!19 z@3rT(P*9fYCDg4kKuv7=`bR}eIl`BlN;?-K>Ru+Q7x#&J^U&QUcDjV47bduY`n9C| z=+vO~>OBfIT-2zB2!kd(`SlV2yMPcIJ*(Pp$6ditnEU2sC9wzeQhYM(FTz4)! z^(w<&w=?uv9QzVq{ZXo8auUa7gb5ktSkxQ|h zr$v-2jnn6tBEH`-mq@M$D@>Pfv6$jo_3Ifr8m@rBxoWEHs-hTcpipVK`g?iJbtNUW zvZ!mW8Ja9f1)5C5#cS*)Ik6`gb22&k_S$|%E*7DZ3?h-t+H5rA9h_>NT?B``ZTX5W zs?BvjxwQcWQ1-Lr<_w<91e}QD)s`bY;x@mmWah{;V3)+1Vfbldn4x|IM;1u$)ES0< z4=+kXY41QOQMx^Ll#jua$%6&QLxloMVF>37ft)fFfflfsF zYRZ`6d)y&M>9H;;= zaBv_vBs2sN{2%%BU*&TMR5VCd6m(JyGBy!WC~`3sRZ~|I_RwYy7m3B&f27r*BDyfx z2SAGtjSS6?IL+2$yHPnHI4C*}9|KxB)t~#Fe~{j=;^?~-mz&Ks|54#Q*Y~VH8K0Nj zYh>{|Yi*t+Uen~`GGr!7!LaCBh~}>cPLDh|-b%eDK1n=5addq)F2qjho-TVks50@*8~r z@^J7#s7QIHydt&10I$Y2M-e9>pWe>*X1JFI2Nws-9{8ZFknqjJxxk!|yxHcWSHMpr zV482ri&$bpUX$^YHfGfVTcUu}dmks>Y~T_mKjRKR?QAAlZ6Re=n(ldG9;9*Z)w1qTB3br6V6KQYl261&CEM>lvG z!F7MpuTL12eEkDiujUnN?l@`8DJ@NDkj<*5CN7SxeDqmag)?=_N9v2%k^{d)(d650 zZH*&88|~MCn{Gw0(qVOk3h+;D?A9u{5Af?FT4)=hxa9s=UU=bVYhrv3pnbu~tkjYA9G|d?VJZ9GpyhUO)am}l$W~OOw(^G|Y>H8im6Lw+_yJFpqHon<_sRmRRd<8BX}wOQ z0y!)9D(|g6Y}ww|;keT+dn*b4Gar3#wnl3U6?U&l?S-K_ZORw%s^bYup*qUcov|jVCsKHRq3P-X{e=mmOhKt4K>v4n%c+L6abPwY2K_4Xn3f6nYHuWPd#s# zGN9|YJXZ4G*DCU=5q8DaF~m05jW?&(H8kWbslD(%xeI7>X5fZXp|n!eNDF}Ev0x~> zmR;8J!KA*XP00B}yZJLHHpj&}J5g+Ok-ucR7PmY6x>omRB=1-HYQ%E5veEH`sda45 zP&S2UY#oCdi@DW%MOWJ{RXGYQ9o>p(d`xCP{-4O?7GQ4XU&OlFzu#LgIG%#FBo_0R zrO%Gf6s)sj_<8J2!ET$HZY%lg{#H)ztty(`7?*e@$X}c87X8_2VJBVcDL((g4+^I| z{|Kj`N1ITPpbEub@IT=c6ir!0LE+TY6$*nwMKzS1L#(9PWq6TuKxW!#`88f|td4+#1QO&&b z5!kw$vX4o(va4(NdbK8Aj<>>5%^~X8N+fgund6~s%hap`FPS(H4##EHL&p?;s@*Z( z;nHKlCqey8)k9&>}a)mH3H2&;+^$1$t4{3>U?_oc3`y2v^SS)Ga;)1}yoQB%!~ zp#`p|3~O)4Qg#hnTU$?3NNK0H)Y)c@iW|&QpK&`{mmyO&ak}`~)@tsk*0nDgF-<;i zTJlj1?a83#XnG6tJ1TA=4u+$HBcfed>?m7hy2NZOb9Xn0OfF_!On}iLuS-@JRAlMR zu;}fIotC-A(sXk={I$lD2PvsUVohYJIy&G@ByJg~_47GV6cnKtGBbaS1u!+eaF$rL zE#B_iwLFGn$*t-|bjv$e&Iuv^CMdN{8UCy?MrK87$LXJG#+#8k?R-~6Tnb8f+@GC( z-S%9KW^36izqVqPP-!CRQU~F)38Lp4IB6*C2ynu4=rGddK{*r7)|@ynFuN6&ED7{~ zq4>>!fzWylijU}TS;WHdSAbWl-Z_i5RzE4a(*W@|b;X(QN*vcZwOohZbU1+rCOdJd z%l1!qW;iZa*>A46YVk(KP1?Rd&pki>Z{3LaIqXu(3I7$1C!wcK_=lgu`uW)~s38 z2J9&g-=V`B=a`L*yg9<86tDY*azEu`Y;)FXQthl16enH#Y6tkCLSHW2!hxlIMRd{jr|*lj-4o-(Nxq`5G&I_5#GRS#M6-MvO1G zOuY0dH$KU8fUF~mu0!|ry|##&-IeF?HL}h&u{7L!%5ToHK?WnP z`qMHHTCVFlu9-V{1?7BEFP|+S3mJXdro{T&obdFC*1G2!!nR&V z2~x+8BTa8XHTuUqor{7A6P?}tZDJ)Rt5vOPT_~h=jEl%L4qKWLX1kPm!KQI>06d+= zs6-VzR*T3m*=d^qY$W}di~JSLN^yUi^;a79IQg;e4C+woz~D7WD8=g*yU)uX|a@6oa3$(JcQI)?3JGPip@$ zEC0Urlv9Qbusbhjo_kDcvD22qR=RkvIiRhbhjb_1ZsC8G=;OZ?{nHpY64I#Q#LAbeEG#7fMlyfWTLXJ;vn|2TG zEtj<~E4XZy@d~hPlSN)nywbh~fswn;yuv8Ai3ujwpU32Xc`AR?vF+FHb+ejr)4H{L zI4&yZ>rl)v%&dA`f*#L#h0<8I-X~P}FGadIvZr z?C)zpBE+k~`MK4BZ1LnveNb;Dz(6 zA8XVjP$Qh9GbA%*vSeke2jB7|ms{TVt~URR{#VmsGKBHm>XYd-W_Fk~1{DhKp5hyY zgS)OAnb{8jN{-qDSbcm$4Ebn`r(+yW9R$bns!tr*!#TtWBa~c^Tj6l*icNZKw#GbQ zn7s96cI;I)$CvGtIg+fY`^%ivF-U*UgAd8EsNa{}%b$T33)53iEwyxZv?rxRS z=*(JV>Y42Nq;6Hdp$yd2p)RF8$}_pO$VATR?x@y97V>sw8Z29+z~|P>B^a`$)l=y~ zqNRkZof~yi!SZH;59Mou(PDr_&8EfP-~C4W04O0eD4&d4D2ZbVbbE}nYp1JNOs}gy z(Vcdx>>3#vlOutpL4>NAQ01Lsvg);0UHqf|Z z)dIeK_6V!6(m@7{tDvQb;Hqnu)0wquPZJI!r0u4me{Z3DSGi03Q#dpYOhjX6$uS?H z_4%v|)8@7qhDN6Vfy9g&Y!=>Cbo5$dbg3k0OUKbxZ{-c^V_{+$| zU6;{}@|c#kn3NSzDhAu)ZL1iD3d1ie*RziKm#M4$h0XVFQvo@@x=`Nk?D`1Y*p-`h zp%kOi%f{Yxn7l{)*q2;`fv&AgeywxWDN$XM^w>0@&b;=c41dhKQ$#c@E{t-_WrBOKkIIYjdR2n)_(dVvPL}0rTDEc;gSYz=_isbMuRe|G zOwN#$WV2RCyV>o>IGnThEYUNZ1ua@LR?(v`o=8{k+F4JA*9KpC>!^$Oa1B9IY+bhX zdJW<5?l_SyF~9RoT0z$u85e8{;*bHQMEhgidAB?I+ORb}>S|m#RjVk;>$L9%vn}Xb z%dX8X1lQG-x}fO!MAcfk8R?^b3SO587I5qe*l9sL#+hf%hnc-%g-IeJvq#DXJWnDyF#WsG8Om{#A|Zq-O*!|-nh8Fa=jKkR+Q;^iRsC-vlR6a z`W=0-nORl3jsTxv`7xG~WG#>UstyAV(X8`Q<8);`Ws;GmsWl=5ReTLyP-d`jZoc!&6Gjzoz`f}`Yg z71ArW?(*j)#ul5W*DPnUSc-vZB73esv5Sm<{0k%M38(yxfsp@P}>2`$Dq>QyA9+!|K|sntg4lMEhz~ zWwXdKtIcs0`_Ol(=T;m+)fY*OF&5^Sgtf6II7)cgTCDtU=Id3~>yJq*h($Y4i|-UQ zR?}@cY!g?0g^fFm%z2tnDUy4}1P3$y@Y?lw$2c(OI0LHv@g|R|F|l>%Uv28uF|lcJ zarPY)JmVo}5ERTqPBW8jF?fhGViLwC?gm1(ITMa>R_N_G6CtHw5+Iyu-Vn21?>_*b zB#s^h00h-H!9m5%|8DvPLj|CK5;#>;5*63bl#*tXeU{=WGu?@H@7^ucfVE2jxz&Fr`pgZ?Mk#i*OY`7UD5X*ry z)xPmmge3qdgds`PSNIc^DU4kP$7i;H#Bg}3))WXbm^5z~P?n`lpPeP{a5H2-1 zIK~_R;ozYGU=jW!Z5}59L7?%{KhX}eki|64vq!R+=UJ;Ky;s31d%&?q^tkTtNj^fQN-JuE*i*;+4%uD zmN!G7zM)fMke~#5WfRK3LgxSUJhx;>FtzA;A$axxZVe`6fe)6R5IYN?=0m@WB-k`Y5xHypun%%2oqktuDgs*r}2) zpPUu{z^Q%!Y{TJ#M$lxfIeLv%)y_B*PLr?Cc)tJ*9Si9Q^%035LdgLvq)k@by*Di% z06y@0?zX#2CsRV3M5Xo*K;EEqLtCG>m`ZfGu-efPM}I;LsF4J3Iw1nFPgOL<1Lm+8 zoJNr(C~EO;GbzikMDkg<2fG~4g{OGRhoyVkn+2RDbBvwn4(VYgCl)IE7GDH^%^-*I z+!{Bf>@Ct7rA1jJoxS=c5+F+hv#GRLy{m%o0r(_7r2`Hmamc?>913IK5@W~~>7=?v zuz}NuUE`%eqa+}Oyw^S2$VS>VPeW83o);Sub#{0R0RX@O640Pt7q`$p{kBk71;>m{!CSPo=`3)`MqqT;6Dc+f#}C}5;;Z5rjtLvqLti-# zF;)ruz@)ec?sxG5mrlnnKFUZ$S=(oK#E?qa6@(h>r>@X^Jg6`d zqDj}7@exM1@H&Qi7NY^}APNLZ2z}V`80jL#!~+RTbXSP?>{+paS7_o2cpAfRakv-3 z2MBJ49oC=s#rWehz~f#&+lDTGLYk-Hs<(>!-gOLB5ek=?N)C=7MMhw~1 zk>VJ1JIUcY;tgaN#^cZ%9DF)efpxH51dVO7+ci?W50j$)b8%4}YP6Jssw5^4v00@% zmcOmkA>h4aL5cWq{H zd7}r-LdyjV00H=h!-|43&R~n7Dqj(H#|Z-tc(4a-z#-9;G6eE;$~OzgzNr{^3?MDs z`NnaOwj5u0M|Iefi(YuN-tJ&2$xp(HTDbqEbb%&?0S7}IE@%UU<06!eg^IfnNK zft5_BW9Vo=VAN*Y_cK1u-y4DRayYCe;wZmBYY8zmP>xiv0LbEOixI`s1jkTvU%co> zgrak9^p25+ac8<`S#hE51$)9YNT(4Gl@@wL_S>5lmwZtcOO;QlDRA#zv$M<}!f0|~ z$555z9B-Zl;!;>FXeD?j6q`VV;Hd}EKa*giS|)u94e{){oSW?<*s*=2krp*<*;xp~ zNrr%g*%dDLfTBc{N;lyA7E4nig605aIwLO;+=CDu0kMnkyAk;lF^$t)w`u7az;np&p7tesT_UC8^u06 z#66{}bsf7SVE&#V+p0)*y$r694Y{ZnkBatDbO_AMxhPfrm#f9rG7q+=$g<}Km;BjE zIk+5zB&R$Zz$O{63Ydhjt!%UP9FWRD-*A}obhIeQC1MK>7kLk?Y}-3KeJvQi-NwF^ z;vow$WCBQp4v=(mI>>y{HI+j}Fvx8CYrMie!BrVZZPJHuH|L{DV=}YLs90D+XZeuv z5?9qqFuXr_@JOsI-6eL!2cy5g#Um?UA9&Z4nT;0jlyR1JA?_ozVyKEXO+}2ksDd0o zu!8+JIIK6tj0xQe9>CL`1!6A1tX1`byPo6}Io zfh?A$Y!(YEHDqlZbj%(#=8qHz)1T4b8zzfkPz@F&n*2H6Bor=|rr4j(`i=_eO8|@-goIzTI4`BzJYD9F91TuM zeY-8dknN6kA43(K9_(%9bnQ_RBv7IlH&nEL5>@2c3`7pKU|uF;KZfmK(x66ygX5XFC|$9VwU!G63x!^)*fLt1HO91!W4~ z6?Ejm!e?b`p!A!;d1k&=#@TH3WDgL9-b!MSh|`Lga zjF=We=ts~g0s+$HPZLZogm0s>@ziS~U-gLNY0~#fBcR$sOS#HbC1}>vb7@KqvrC}U z?|i~6!Qc$>wJ$+>tOyZ0GQ=Lx0MYX{@I{TNg-i6BQ8A6UvR=~K#HGd3+WSHZtVO7b zJ{Eb(<%h$OU>7goBBPVDg65kcXZHj|gBDQ*VJL1g`(^wC(2igu&osqE z26UNYex(tknV^gZ=_#BGT^UspJv>iAMd2Rf*@I28jkZnsS)k&tya+d%ioJfVhZF5Iis$f6et8+J*JcX3P5K;-#89k7NpwFuUu^>ple4`N) zk&XBc5?_(pG!Bzs3Y(o}hr?=KPCs0Jqy7>BBZ}q#4TX|`D*xpz%v7sOS*?c$={ve% zl6s>EnF^9ojF2fE_!CTWiyvZ(#V`U49Hx=2&6PP6gGZzfG_+p?gS}!Y4@)^rG;6k^ z?Xdu^Ik<(p_6}=WMahnM73MMW+gP9(J|}5S zRr7Ek+f)$0UkZvbF`wRi^aEYf%P*5c(mHeGQL3!by@>(U3mcWB;N@&5dcHgaKvfDW zB++_0ox)XJ}W;?*_?r-zbDM28I0DEhS zZ;W*-b^`p00@dfJ49tcXJX&qUTKA)Yqx}G8W{VAf_o9OdAqZgX6MLuwNiPilbtGUx z9|tgV0D9lA3H$)@#-iIy+QyfqgE&G_Ef)ef^a>OD4eguC^;HG0I&ZEi8_T{^`9ug1VVWK>m}KL zLp`!4`7H8p#z8~CaXjegE|PzT1QX{xBBjtC{dYJ;-plB*%l6+*{~HNL;$*A>fUgFo z5B)DB#D4nACF%?sMT6SNzar7Us%$kuP@xWS6?2;W3lfR2{L3hIdg!I2Ew~iS-;oFp zp?v_^FrJy*rlq;!kiQ|(X25y@VU8bwV(@(>-hXZTXCFo0NAE>s;dXr54gCE#Lcrba zJLsMp;D{jW>{;(RN$l}AG$a}K55;?D24*9}2j@VSzu^FG?q0^t$ij;Q#&dxC$&kMx zp?P3+Qt8Q79B&3+k_LVz{tZb!8yuoNSM1olQ>-=_{M7Q#NC-}S<9;|izJ5aEG0;eKJL0TRuH{4)-~vI^%GT_Z?}&@wVwPrmy%92ig-a&WmeJq(5k z3<8_y?}Px+Q9G~63;UbFlKW`&p}!G=&e)y)y08Qi&urPx|Ly7!G@~vahX_I4!fpRb z2Vw(3a@!ji)cWV&dKLhI=Mafy;a`mWe}u#Q|8cfKwH7*1?*kA50_uM%Er0bsp#D{A zK{0hL5eZFc9$r-0za{zU@_!o0P(A>e$nG!;u^uFQ3&9bvzFkKQ_$$#T5MnNKdGjMq z{b8Tlc~^Hi$mF?$BZ9gFKShkw@G3h^#~nbs-$rtB1vTF#mSebj4D<3MhQH!U%VeLC zd5M69r2@3!xrGgb5YM>WUc4;M(&Av39(<`3xkZVMf#TFMJI169!-!xZ8qx4d!!M)| z_g$UN0zMF#u!8~&#+;wnEm)1LM#|q~iBa%#(?Wqh$m)R%6$Ha5sHR)w5lAA`$%R{> ze#UYjKyL}R+vpZs9U&O4ilwsqfi|2?CMVBwj-%S50}Ko{59~WbGPF503Fqr4rn&Ku zKE_rqcunA`NpJ=B7zMNmIzc2s3z3uhXRaLUa}2_9cRDsO7%06a59l@H5Q2yYI45<2 ziK}q{vbqRUuq37qL54Fftx5=yk|h8blXYKZ0B5ye@SaP{pY~FUBGoS}v&TY(KOvaI zp8>WISm`mQaVSn$F@_Lb5GlI|fLB*@Qvdnukx0&T%g_BXDrATZYlz&J1BxaV6A?k(6D^(4{ zsIe1`;u9E(y~!_G8d4}EP&m+{XshRhT;Lgo=kRb43+6Bj{ydTUv9qf9x5ctyU?&is z>=Jlpvz$Ver81sW?lnc_UN$hb>5s*F=R;f}KpEBDF0SKUN#mL$x)2-^DB>Yc!^E9u z)i4Y4rK_fbm~O#O8E7-qw9X-WIcpd@*#rtyA^W*O>AvlmK`9hjbZF1x&_yS|NW2A@ z@&TS702X4hQH%H4%=7nsR!I`oyecF14JsA55`1_X^%JhABq%h%uSx3HN*@)tPdJEk zxz0v{O6Zw~+koK|Ksf|NSd2Hju#A_l$hQd;VRcFz-oqY&zCvuCPv#K_@(oew6jRm1 zd^Tv>Y~mWSlr!ML(fYf$vM3#9!telr-}?qoO+O+_xxw79%@Q&tF?7Hp$ia~=N?2Zm z2BRX8daS5S)7=?D=Mz!Tlu*K!g}>>E6xn1Ul*PD@;3|bKPN~y9&t8Nfsj^iU@Im$Q z2(x40aZ@V5%(IX5Yo*%i(cU)3A>U*Y+c2rVrjVdn%%^Dm-9RCMtZ z2Nt#c7LlncrO0MBR+wS{1joH$3VIYw)q5gK#DNV&tA{m>S%_OX z3L2#Z!4Sr6mAl}K78Sr_yi>S6aE0SWZ-7Z4og z|I~f~pyDqp=|2L}U!~t+k$(S-?>zUAoQp}1ebqO==z28~-4@BYgw0+3^9|X&x;MXMI2f`@ zJ-Cp(3nYjAyIZpzNu;-a*`2j}!l~syD_P}_!crR0c3tg)tn1{p6?|OMui^bs$ckG2 z9ToX{7ACZx;}ej7EZh+0RPOl(s*%t{ShmTm79DP6L5@dP7N7@nEkjqy zG4hJ^o(a$d>RrvlEfHLd+kQ2m0k+lgC451Qe|PR$Y08BQIPQcqO*tt@!+u4ANgEuG zXxLx<)eQFRAE+_a`{{`>!lb5p09Y9&O0S0am_z3bqKQ;WX9DCLW*$kLrdiY7Zu|~K2*A?d29am94{pL>uUa7(1;0FU&NL4>9 z^lY!HM7FaN30py-Vh#uY&BRRO|d#s#e!H|~tk&?=tyw?%z7HX5LZ7z}%> zvJ$NJf09mAR=63$v~yszJ%XU6-!W&!#(z=4;kk1N&AY4I3UBy7GD!hibDw zH?TuU+cdmxxo7r`6T@nTv{=7Sd0dre|5l*z>qYXGtV>X~9nL&5^rQEB_Gx$}5q+|e z97_k~8S4(MSXWtG=&qSWfZ*UB{1*j}Q8OROIraq0p}p@mL(Nya2N@a*zJUSShh$<- z<(P4%uN*RB}oiAwFbcyQS@^;ji)bH*5=gmm#3bBRJLw+2hA&5cx3yF%Kxj0?~ZDs zd;U&B5<+jG7bCql5s(@nL3-~sN=Kw9O^Wm?T@Xctgx&=yQXXl76cqvn1f&W=kgC$; zq5PuH_q^wwzjpWB-JLt-bMKkCGtt#Ll!KbzYR_Z3)~X~e&-}TGsp4g__qcgJBIoJJ zg1Me@=E&7LgV*xnV@2LEjm?|(e@dAoOkk*>iMvVgYvpP>d8CaX;1Q$rxLrZ*O1Ll`4+>~It$|P@N#Q$`_RG+1~hfPjBf1_GN+U%`H(fWorj7#x{*BF7B z;U9Az4{wiN+BEpU(}hRGJ)IZvo4(CRgvKSH;(xg>68!zY+}EFCSrYtRm7&)NFufIn zAmN`$-^9v3M*9EDeUb2QH<$fR3$9W9E1Low5Y+CbXIorSEDq&rsonM!MinHPMej!S zv*~>p5Jr1nwi;U{?O=e%kKb34sO9a2!7zD4z#QSev3~3q1o?ZfCvuIz|1O4DL z_mscb#9G0cir>!VLTsoBqYxh8QM#Jg)&-;yFQ+d+B&Nu1AZ!&~mMnaOMf{KXOSA!s z1~RA#EP2h#iu2?Q^<{U*ny{Hlm?vejuO_zVNzFt=uzQ6JJw@=eYIVM)oz7g!JVliz z;_HJ;KuikF7iNrKh>&i}Bsi8eBfnS^hHi^ln6ZQLCNmfq8s%)N?^A&%g>nMFuFxwt zxx9y`jSGvPlHurhiTNuh6XiKjB}4km$}@ndU#r;s@ZB%^00Y;)-SaTIRSPziCRB+? z^g-n-FF>~eUs%r3+0R6~hv_xy`h3{5f2m+}I+i?d1 z=Rekya~xdAPBb=jaSX{_^uaLvjK_${h+T7n%3v33ilRXfA_zyA%LNRT%`x(1y^pL7iH4*pkKO4Lbqm0 zZzNhi8J(;(`sCJms&eOI$Qq|}b@{ofXS8}Q*s5H0#+<9xKXz|Rlb979WQC0dWAAkt zcWKpOWxHxScZrbujkxyn$y&)OKus@a3yb)Ohu^XH*Y!6UFH@zti?us!pRKx}KTX;j zs18lXh^Ltgq(qL7V0BVkRy8rW1j%))XT>*`a8uaoXE3t`B|rE{QKw2hbsR&Nq;<*M zX=-Z!w@5CaAIwLZnI% z{U`-tw7czG&xmwpl=w@7E1l#1NfH@OwfeELjRP{<5ky<4SY*D|`g0sp zrynszMG6pbj_>mX1Mv;Nz&_>)ia)Ut-@ltS-U|GIGeF05@txw@q@PUQ{d#7IphsmZ zU>|C7g#Z32e-^vKpc~;CRZjy%a?nXLcFqz-0pAPyXXHg;0$)dmjOwL^OF|=pyv}zZ zg81z)uISi-j&Cz{ix2b{!aJ%f#lNdM5%Tjz7Rq>g4Y^7HMO(FgbjU{5J`U!9rjG1m zoVLE46_?(;TK}S)fp~^Q6)N!To?Q3TlLqIuXaK0!6AJpcraDHzjoOQuKEC32uC<5e zwB`*X4`D7G&r#2NPrNk|2k>p^GXQIEMXut|7}=J^zLYv8uj}#A6+iB3nkbaAhoMm~ zJ2yEF!LiMcyl9Vr7*_l_(Rz$FxRQlO>xAIlO#rivMs^A_e$DFAc2f$96ro)4GVe0) z=7WO29(uldO=^sGOps1`KwUKVW&zD(cT%^im|rzS5~4Esu0vnr{Ehk)DS5 zQX3U#$(D7DPJ6vvq72Z`6&8GY>&D#U7)!mH*MYu|=!f&*sY3T`JPLnL9DR2w)pI*( z=|@z0QC{$z3Ntn`OG<8g)SkEGqHkDV_iPdlE5{3mWY#8bp*8YPH#4SA4?tMTHW1F? zO~Jak+rDSL>%7k<@dpqQDLAxeA zig{|`eB9JzGysT^)6hw9?kB}}L~CAkV53DFp$ABWC+ldiq%()?fZ%Hb9@(QxF4t>8 zrw1a}z6g9EJ_-s9zk@m68oaSw9=>%#7%rx4-VH*2f0}4@!nA=l7*(rP*V-d$C&}c} zc~!+w1u^RJ#cLkKKVy9;fGKcmSioadpz$jykI!oChI3yvB*~&uheErx!T^XS1J%1o zb~|VM`xaUc>j@RK@GT>{$Wu5=s0yGLejO+}Rk*K8cav|W^(b>a&(hX`IMR#ka|<^l zZ$>vFM39!z6_2%MmYj0`q1%c`XW8TusuvOZ0UhZk{I-vEX-j07GD6RMXdt>KDV-LQv^x~2|La_ ze{g@p9^|zCd1t_jceNuFqg)4HV2n$H$4^GOs3l4-|LR z3?&aLYjYfKE>Sbe9`-lw5#T06qTbPN(~&RL{`&Yyhs{sfMQ0RD`^}l1?(L{U3-?wx zZkPCTg}z<+;`6)mel!oV{o5eND`2!Zhs1u$v_T>3FXC|Ma^tz$yoLE^pj`igebeT? zZNB*SzS^PYT0)Sqx}8DHIHqCBh1VxkEWy8lxxd{k)LI+m?wQ~I5vxPJp(7yy^|H6i&kAqd3bUq|3R2@_u)EYXtWo&*; z9f_A$|71GXa0ytv^Y`0@GR%p#eo!whnOV!cs)I5v8CI90K@JEvKpL7t?(!v$JpX&U zHRX!KUsl+uuQQ0&auHPt@z0?Mw4L>qNKv@n)?kwV;$3jGIZqm z(_Pv58qL$nM^hykG|lg?|KaL?Grc|_{7mv6$rl2`lUWxGnw}gKp6LrLWnM2>&qWToALpyB$vcD&vP#G=q)DKfjZT*P-NK0CNp9FKrj&HJQH(gnuiI@!k9J-!*lIQ@;_ZP`l>vM-PSSjyX;vKT=5R zy7VKun#{1wFb11=0qv1(;;$hF(6r&zZ$VP)V~OK=XBO7>jrUDWO?AOd5S8jsKF%dv zX##YX6q@!hQ$tcOd#3Q#)vS}ichx`_nK-P=8xb$bKH?%l-<3kHfA8}&--(~Y9j6xl{ekZx^gY_W%_AZvT(Lg z2>{7&(*@*!P|>T3>uyX85T2XAnS!4?wxZDn0H!9hc;o24GAS6;s%{wJ`;CyqmKOCN z=TijAZ5!XI&R#fdR{zc1UxiiG|L+HsFPVR+TiCG}LFg8=WAnJRuTx(+gg%v^*^q7M z%GSfK*S#9X#8#LG#^IA0jy~|5u6-T}$moob{+TKEzE#As^HedO58z91lk9mH zsTL?*!QE%5PLdVpevyo!n2kW7HA?XnxlUF;@&Ign>rR2juEZec*ZxB-u!Wm zE;?SfinXL#$#KIoa+vonbCf=e<49ug!EeavzkF3&)fYlh#*6Xd9-ik!9)TFKupF7R z8V*G_DQnqtbqK?bRf`G5rjj<`=8N^i{&w@GUb8Ks!?Mh?4F@Jk-vf_BQ-fkgi|aIU zY}s3!XV%;TkoDjj5j}Pw1juqOukIYQSV*$rNi0LhtF6JC-WgNe)sMhr$roFH4ZG+9 z#`dB4bP5*DYJK-=@T5Xj(!Mk;p-gaCADV@SeXda-xYJd^Fgbo)!k`1nCd!5k`q?EKHLJm2vyLrbhU26|a2YzRyML3@=zH@_?!w6Y6nH^1i4S zv!+_}6Yx|qe$|SpcWGw&iDXj$R>b-|p=}scdXjzMf4k=0-U${=4?&~RNT$g`R6oU- z^G(4)umSIER|tw1z}PUwm5sQq@mmJ_)<(IiH~0*`0+wx0jMrf2Kw>R^X<1w~dLois znNCD8WLKeVIpx6@wdnyBiNoI}ImCz{xyT*X1_spOpivee4}+uB{OLGBp42;Y5Fkc1 zSv1H3M9QmP6>?yj{HWVid^5CzHdGQ%YEhfcU-#Z=NP;(x;n_k|J%7SJcU7PALrR5z z>|*Mg&kZHO*1?{Lsi9n<7epFRwC)7STPi2(ac)93Z=k8nx45qLd~YYj2#Xs&B4Wj2 zi>%BQ>+Y-vy=S3qvC{~D<1PA!)TB$*qm0KmsNc+=dH%G51S}W^#rqP}49HmHZZ>Wd zHERM-cugxUfNK+AMu7vi%5+A?(#e!*lkH1DQ1Uk?J=zu6s$<2g3VQ`HQCZIbSm!wT zaoKyHCkuf|81|7S!%w9N)C|uXH%XZC2UcdgP7feR$~I9oluEt4ji7OUtIE+r@J`8O zKlI}l;!zBI=SsYYdsh9E{O!%i$rI9+iK#rZ0RO1BX^C#SEPf?pf8G&UCjRxjm}u+5ElKR{l!A+E#86ik5VLFR1D&l-J?N$x6@}kzsS87 z=y2|*l=yB{I!ZjiPi1E)PtXK;R8N;tz=kKF3w+rHg^Y!#t?X8Q;oD7LSP~^t_ zZS1al=Hdh4M0zWs_D!AW6jyJH-TOFdO@{eOn^9-WbX`m3=`V79dX3iEYI9)nkS`^y zWIWayRfa>crLBLRtwz}*xN64)(z(xe3Frl3A@*?kc>Yqc62{H9FNhjf5w)!?Rp+1c91Hzl76VGAVIB0JgDx0D2Zd+lG= zfkAbU>CR=Bm`gx3%*%TEeOr12X2Uyx_owsQCW@SjeesHMLwb}h7hzK%YJX5t*Dlx# zoCF_<7{6Z;t*r+szU^n^@6Hk&NMGIl;04z+A1=vgR_TBLr-=u=OVCTe3E(z`6(|8S zd_3{8-B-`ry2qQ%URbEMdu!a&@4axyYiOBgRCm-q?I_JEf~rc*qZ##CeXfu(rpXN14>LA zqZ_=<7wGEsY55g~bZ8ZkHDB>mP4u0<-fOZeaJ(gqU}eSpBn3#fS9AUdxN!;iDEqN4 zq^hEE^x^kEJ|Xs+! zS@g((L8#_4%Yu~3CIcS|%V-f#znIzw2EtBz6}2U<52kIXB>Se57{b^+D+H<)_w1iN zAT>we{Kd%yen^Au0ZI*geg2 z)w>Bk@Q|!S7N8`nl(!n{muI%q-LRW+rvTGLvkX-AP;ej2Sc3Q!(G$9H*Z{X+au{fKvXeAv9i~-=}SQdSLOw-$}BT{Rh#ye;h`ZP+4A*2 z>W1eEbXmK_Y-u7H#B3Ut(?ZIAvl^#;y{3WX!7J=pFuf5Fq%P;LOf!{+{@clB!ea+M zauvqX4I{F0xqA;wD3tfsU)K!`;Pt0hO8^YR6J3S1hv;R6Hu)nQqkgXGK*~3rLoOoVT@APjqDVGdT##cMG2bFb_fZA#>Co55 z%fa%+nEDSB0i=TG*{*hhl(2Z=C&^n!sgGy?eZL2|@cB&Bby@9QHfzdjWnan03b33G zlRtL7wE^-ja=0O169lB$zuep}{wYV#VEBYQrNkuw$oz!HHM12u-lws1gu|HFP{xtk KD3Icsv;PNeIVqw5 literal 0 HcmV?d00001 diff --git a/docs/conf.py b/docs/conf.py index 22e0d505..94109563 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,7 @@ # General information about the project. project = "ehrapy" -copyright = "2021, Lukas Heumos, Theislab" +copyright = "2021-2024, Lukas Heumos, Theislab" author = "Lukas Heumos" github_repo = "ehrapy" @@ -181,4 +181,5 @@ "tutorials/notebooks/medcat": "_static/tutorials/nlp.png", "tutorials/notebooks/ml_usecases": "_static/tutorials/machine_learning.png", "tutorials/notebooks/ontology_mapping": "_static/tutorials/ontology.png", + "tutorials/notebooks/fhir": "_static/tutorials/fhir.png", } diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index 5bec25db..9514931b 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -25,6 +25,7 @@ For questions about the usage of ehrapy use [Github Discussions]. notebooks/medcat notebooks/ml_usecases notebooks/ontology_mapping + notebooks/fhir ``` diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index c7ac5d6f..5560f808 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit c7ac5d6f4e2ad9e9a0774a89690486776551fb39 +Subproject commit 5560f8088a33fa5519b97b1bfd4c96616cf1fc75 diff --git a/ehrapy/util/_doc_util.py b/ehrapy/_doc_util.py similarity index 100% rename from ehrapy/util/_doc_util.py rename to ehrapy/_doc_util.py diff --git a/ehrapy/core/meta_information.py b/ehrapy/core/meta_information.py index f288d20c..79c42c66 100644 --- a/ehrapy/core/meta_information.py +++ b/ehrapy/core/meta_information.py @@ -1,10 +1,7 @@ from __future__ import annotations import sys -from contextlib import closing from datetime import datetime -from io import StringIO -from typing import IO, Optional import session_info from rich import print @@ -27,6 +24,7 @@ def print_versions(): # pragma: no cover "builtins", "stdlib_list", "importlib_metadata", + "jupyter_core" # Special module present if test coverage being calculated # https://gitlab.com/joelostblom/session_info/-/issues/10 "$coverage", diff --git a/ehrapy/io/_read.py b/ehrapy/io/_read.py index bba86500..9e95bca0 100644 --- a/ehrapy/io/_read.py +++ b/ehrapy/io/_read.py @@ -362,11 +362,18 @@ def read_fhir( Uses https://github.com/dermatologist/fhiry to read the FHIR file into a Pandas DataFrame which is subsequently transformed into an AnnData object. + Be aware that FHIR data can be nested and return lists or dictionaries as values. + In such cases, one can either: + 1. Transform the data into an awkward array and flatten it when needed. + 2. Extract values from all lists and dictionaries to store single values in the fields. + 3. Remove all lists and dictionaries. Only do this if the information is not relevant to you. + Args: dataset_path: Path to one or multiple FHIR files. format: The file format of the FHIR data. One of 'json' or 'ndjson'. Defaults to 'json'. columns_obs_only: These columns will be added to obs only and not X. - columns_x_only: These columns will be added to X only and all remaining columns to obs. Note that datetime columns will always be added to .obs though. + columns_x_only: These columns will be added to X only and all remaining columns to obs. + Note that datetime columns will always be added to .obs though. return_df: Whether to return one or several Pandas DataFrames. cache: Whether to write to cache when reading or not. Defaults to False. download_dataset_name: Name of the file or directory in case the dataset is downloaded @@ -379,6 +386,12 @@ def read_fhir( Examples: >>> import ehrapy as ep >>> adata = ep.io.read_fhir("/path/to/fhir/resources") + + Be aware that most FHIR datasets have nested data that might need to be removed. + In such cases consider working with DataFrames. + >>> df = ep.io.read_fhir("/path/to/fhir/resources", return_df=True) + >>> df.drop(columns=[col for col in df.columns if any(isinstance(x, (list, dict)) for x in df[col].dropna())], inplace=True) + >>> df.drop(columns=df.columns[df.isna().all()], inplace=True) """ _check_columns_only_params(columns_obs_only, columns_x_only) file_path: Path = Path(dataset_path) diff --git a/ehrapy/plot/_scanpy_pl_api.py b/ehrapy/plot/_scanpy_pl_api.py index 4f61330e..4d1048e2 100644 --- a/ehrapy/plot/_scanpy_pl_api.py +++ b/ehrapy/plot/_scanpy_pl_api.py @@ -8,9 +8,8 @@ import scanpy as sc from scanpy.plotting import DotPlot, MatrixPlot, StackedViolin -from scanpy.plotting._tools.scatterplots import _wraps_plot_scatter -from ehrapy.util._doc_util import ( +from ehrapy._doc_util import ( _doc_params, doc_adata_color_etc, doc_common_groupby_plot_args, @@ -19,7 +18,6 @@ doc_panels, doc_scatter_basic, doc_scatter_embedding, - doc_scatter_spatial, doc_show_save_ax, doc_vbound_percentile, doc_vboundnorm, diff --git a/ehrapy/preprocessing/_imputation.py b/ehrapy/preprocessing/_imputation.py index 11fb448f..7e23d488 100644 --- a/ehrapy/preprocessing/_imputation.py +++ b/ehrapy/preprocessing/_imputation.py @@ -24,7 +24,7 @@ def explicit_impute( adata: AnnData, replacement: (str | int) | (dict[str, str | int]), impute_empty_strings: bool = True, - warning_threshold: int = 30, + warning_threshold: int = 70, copy: bool = False, ) -> AnnData: """Replaces all missing values in all columns or a subset of columns specified by the user with the passed replacement value. @@ -124,7 +124,7 @@ def simple_impute( var_names: Iterable[str] | None = None, strategy: Literal["mean", "median", "most_frequent"] = "mean", copy: bool = False, - warning_threshold: int = 30, + warning_threshold: int = 70, ) -> AnnData: """Impute missing values in numerical data using mean/median/most frequent imputation. @@ -198,7 +198,7 @@ def knn_impute( var_names: Iterable[str] | None = None, n_neighbours: int = 5, copy: bool = False, - warning_threshold: int = 30, + warning_threshold: int = 70, ) -> AnnData: """Imputes missing values in the input AnnData object using K-nearest neighbor imputation. @@ -265,7 +265,7 @@ def knn_impute( adata.X[::, column_indices] = enc.inverse_transform(adata.X[::, column_indices]) except ValueError as e: if "Data matrix has wrong shape" in str(e): - print("[bold red]Check that your matrix does not contain any NaN values!") + print("[bold red]Check that your matrix does not contain any NaN only columns!") raise if _check_module_importable("sklearnex"): # pragma: no cover @@ -306,7 +306,7 @@ def miss_forest_impute( max_iter: int = 10, n_estimators=100, random_state: int = 0, - warning_threshold: int = 30, + warning_threshold: int = 70, copy: bool = False, ) -> AnnData: """Impute data using the MissForest strategy. @@ -421,7 +421,7 @@ def miss_forest_impute( adata.X[::, non_num_indices] = enc.inverse_transform(adata.X[::, non_num_indices]) except ValueError as e: if "Data matrix has wrong shape" in str(e): - print("[bold red]Check that your matrix does not contain any NaN values!") + print("[bold red]Check that your matrix does not contain any NaN only columns!") raise if _check_module_importable("sklearnex"): # pragma: no cover @@ -442,7 +442,7 @@ def soft_impute( adata: AnnData, var_names: Iterable[str] | None = None, copy: bool = False, - warning_threshold: int = 30, + warning_threshold: int = 70, shrinkage_value: float | None = None, convergence_threshold: float = 0.001, max_iters: int = 100, @@ -587,7 +587,7 @@ def iterative_svd_impute( adata: AnnData, var_names: Iterable[str] | None = None, copy: bool = False, - warning_threshold: int = 30, + warning_threshold: int = 70, rank: int = 10, convergence_threshold: float = 0.00001, max_iters: int = 200, @@ -735,7 +735,7 @@ def _iterative_svd_impute( def matrix_factorization_impute( adata: AnnData, var_names: Iterable[str] | None = None, - warning_threshold: int = 30, + warning_threshold: int = 70, rank: int = 40, learning_rate: float = 0.01, max_iters: int = 50, @@ -872,7 +872,7 @@ def _matrix_factorization_impute( def nuclear_norm_minimization_impute( adata: AnnData, var_names: Iterable[str] | None = None, - warning_threshold: int = 30, + warning_threshold: int = 70, require_symmetric_solution: bool = False, min_value: float | None = None, max_value: float | None = None, @@ -992,7 +992,7 @@ def _nuclear_norm_minimization_impute( def mice_forest_impute( adata: AnnData, var_names: Iterable[str] | None = None, - warning_threshold: int = 30, + warning_threshold: int = 70, save_all_iterations: bool = True, random_state: int | None = None, inplace: bool = False, @@ -1075,7 +1075,7 @@ def mice_forest_impute( adata.X[::, column_indices] = enc.inverse_transform(adata.X[::, column_indices]) except ValueError as e: if "Data matrix has wrong shape" in str(e): - print("[bold red]Check that your matrix does not contain any NaN values!") + print("[bold red]Check that your matrix does not contain any NaN only columns!") raise if var_names: @@ -1119,7 +1119,7 @@ def _miceforest_impute( adata.X = kernel.complete_data(dataset=0, inplace=inplace) -def _warn_imputation_threshold(adata: AnnData, var_names: Iterable[str] | None, threshold: int = 30) -> dict[str, int]: +def _warn_imputation_threshold(adata: AnnData, var_names: Iterable[str] | None, threshold: int = 75) -> dict[str, int]: """Warns the user if the more than $threshold percent had to be imputed. Args: diff --git a/ehrapy/preprocessing/_quality_control.py b/ehrapy/preprocessing/_quality_control.py index 9363919c..7daac9cd 100644 --- a/ehrapy/preprocessing/_quality_control.py +++ b/ehrapy/preprocessing/_quality_control.py @@ -205,10 +205,8 @@ def _var_qc_metrics(adata: AnnData, layer: str = None) -> pd.DataFrame: var_metrics.loc[non_categorical_indices, "max"] = np.nanmax( np.array(mtx[:, non_categorical_indices], dtype=np.float64), axis=0 ) - except TypeError: + except (TypeError, ValueError): print("[bold yellow]TypeError! Setting quality control metrics to nan. Did you encode your data?") - except ValueError: - print("[bold yellow]ValueError! Setting quality control metrics to nan. Did you encode your data?") return var_metrics diff --git a/ehrapy/tools/_scanpy_tl_api.py b/ehrapy/tools/_scanpy_tl_api.py index 3b1157f4..2780eb00 100644 --- a/ehrapy/tools/_scanpy_tl_api.py +++ b/ehrapy/tools/_scanpy_tl_api.py @@ -1,5 +1,4 @@ -from collections.abc import Iterable, Mapping, Sequence -from types import MappingProxyType +from collections.abc import Iterable, Sequence from typing import Any, Literal, Optional, Union import numpy as np diff --git a/ehrapy/util/__init__.py b/ehrapy/util/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pyproject.toml b/pyproject.toml index 2b1cdda5..ca92930c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,12 +33,12 @@ classifiers = [ "Intended Audience :: Science/Research", "Natural Language :: English", "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Visualization", ] @@ -60,8 +60,8 @@ dependencies = [ "lifelines", "missingno", "thefuzz[speedup]", - "fhiry", - "dowhy" + "dowhy", + "fhiry" ] [project.optional-dependencies]