From f1a43d5c18d6a60de1558ab37e82425469c64ad0 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Wed, 31 Oct 2018 18:48:17 -0400 Subject: [PATCH] Initial Commit --- .gitignore | 16 + .gitmodules | 3 + HMTL_architecture.png | Bin 0 -> 101982 bytes README.md | 63 ++- configs/coref_ace.json | 150 ++++++ configs/coref_conll.json | 150 ++++++ configs/emd.json | 120 +++++ configs/emd_coref_ace.json | 200 +++++++ configs/emd_relation.json | 173 ++++++ configs/hmtl_coref_ace.json | 307 +++++++++++ configs/hmtl_coref_conll.json | 307 +++++++++++ configs/ner.json | 120 +++++ configs/ner_emd.json | 172 ++++++ configs/ner_emd_coref_ace.json | 252 +++++++++ configs/ner_emd_relation.json | 226 ++++++++ configs/relation.json | 124 +++++ evaluate.py | 203 +++++++ fine_tune.py | 157 ++++++ hmtl/__init__.py | 7 + hmtl/common/__init__.py | 3 + hmtl/common/util.py | 53 ++ hmtl/dataset_readers/__init__.py | 6 + hmtl/dataset_readers/coref_ace.py | 180 +++++++ .../dataset_readers/dataset_utils/__init__.py | 3 + hmtl/dataset_readers/dataset_utils/ace.py | 282 ++++++++++ hmtl/dataset_readers/mention_ace.py | 75 +++ hmtl/dataset_readers/ner_ontonotes.py | 107 ++++ hmtl/dataset_readers/relation_ace.py | 80 +++ hmtl/models/__init__.py | 21 + hmtl/models/coref_custom.py | 204 +++++++ hmtl/models/hmtl.py | 207 ++++++++ hmtl/models/layerCoref.py | 126 +++++ hmtl/models/layerEmdCoref.py | 155 ++++++ hmtl/models/layerEmdRelation.py | 129 +++++ hmtl/models/layerNer.py | 99 ++++ hmtl/models/layerNerEmd.py | 127 +++++ hmtl/models/layerNerEmdCoref.py | 183 +++++++ hmtl/models/layerNerEmdRelation.py | 155 ++++++ hmtl/models/layerRelation.py | 100 ++++ hmtl/models/relation_extraction.py | 274 ++++++++++ hmtl/modules/__init__.py | 4 + hmtl/modules/seq2seq_encoders/__init__.py | 3 + hmtl/modules/seq2seq_encoders/stacked_gru.py | 129 +++++ hmtl/modules/text_field_embedders/__init__.py | 3 + .../shortcut_connect_text_field_embedder.py | 63 +++ hmtl/tasks/__init__.py | 3 + hmtl/tasks/task.py | 96 ++++ hmtl/training/__init__.py | 3 + hmtl/training/metrics/__init__.py | 4 + .../metrics/conll_coref_full_scores.py | 35 ++ hmtl/training/metrics/relation_f1_measure.py | 109 ++++ hmtl/training/multi_task_trainer.py | 380 +++++++++++++ hmtl/training/sampler_multi_task_trainer.py | 501 ++++++++++++++++++ html_senteval.py | 166 ++++++ requirements.txt | 102 ++++ scripts/data_setup.sh | 32 ++ scripts/machine_setup.sh | 35 ++ train.py | 237 +++++++++ 58 files changed, 7222 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 HMTL_architecture.png create mode 100644 configs/coref_ace.json create mode 100644 configs/coref_conll.json create mode 100644 configs/emd.json create mode 100644 configs/emd_coref_ace.json create mode 100644 configs/emd_relation.json create mode 100644 configs/hmtl_coref_ace.json create mode 100644 configs/hmtl_coref_conll.json create mode 100644 configs/ner.json create mode 100644 configs/ner_emd.json create mode 100644 configs/ner_emd_coref_ace.json create mode 100644 configs/ner_emd_relation.json create mode 100644 configs/relation.json create mode 100644 evaluate.py create mode 100644 fine_tune.py create mode 100644 hmtl/__init__.py create mode 100644 hmtl/common/__init__.py create mode 100644 hmtl/common/util.py create mode 100644 hmtl/dataset_readers/__init__.py create mode 100644 hmtl/dataset_readers/coref_ace.py create mode 100644 hmtl/dataset_readers/dataset_utils/__init__.py create mode 100644 hmtl/dataset_readers/dataset_utils/ace.py create mode 100644 hmtl/dataset_readers/mention_ace.py create mode 100644 hmtl/dataset_readers/ner_ontonotes.py create mode 100644 hmtl/dataset_readers/relation_ace.py create mode 100644 hmtl/models/__init__.py create mode 100644 hmtl/models/coref_custom.py create mode 100644 hmtl/models/hmtl.py create mode 100644 hmtl/models/layerCoref.py create mode 100644 hmtl/models/layerEmdCoref.py create mode 100644 hmtl/models/layerEmdRelation.py create mode 100644 hmtl/models/layerNer.py create mode 100644 hmtl/models/layerNerEmd.py create mode 100644 hmtl/models/layerNerEmdCoref.py create mode 100644 hmtl/models/layerNerEmdRelation.py create mode 100644 hmtl/models/layerRelation.py create mode 100644 hmtl/models/relation_extraction.py create mode 100644 hmtl/modules/__init__.py create mode 100644 hmtl/modules/seq2seq_encoders/__init__.py create mode 100644 hmtl/modules/seq2seq_encoders/stacked_gru.py create mode 100644 hmtl/modules/text_field_embedders/__init__.py create mode 100644 hmtl/modules/text_field_embedders/shortcut_connect_text_field_embedder.py create mode 100644 hmtl/tasks/__init__.py create mode 100644 hmtl/tasks/task.py create mode 100644 hmtl/training/__init__.py create mode 100644 hmtl/training/metrics/__init__.py create mode 100644 hmtl/training/metrics/conll_coref_full_scores.py create mode 100644 hmtl/training/metrics/relation_f1_measure.py create mode 100644 hmtl/training/multi_task_trainer.py create mode 100644 hmtl/training/sampler_multi_task_trainer.py create mode 100644 html_senteval.py create mode 100644 requirements.txt create mode 100755 scripts/data_setup.sh create mode 100755 scripts/machine_setup.sh create mode 100644 train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b19e98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +**.DS_Store +**/.DS_Store + +*.pyc +*.pyo + +__pycache__/ + +data/* +serialization_dirs/ +nohup_logs/ +.env/ + +*.ipynb +*.vscode +.ipynb_checkpoints diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a318351 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "SentEval"] + path = SentEval + url = https://github.com/facebookresearch/SentEval diff --git a/HMTL_architecture.png b/HMTL_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..6eda142430c2b8141f627c8af254232ee7be345f GIT binary patch literal 101982 zcmeFZXH-+$7C(w8q9}rba0DqHm0px8AVm?dQKYxfYeMKnx~PD3l-^N#3q6z&K#(pS zLP-DtseyzZAOzk<&%M|4zW&E}AKrV6k&KMB_g-txx#s%Kx(QKJQKF_|q9P+Bqki`E zi8>kCSqK@~xrqzsfsyC5;U;8cXE<%;<<%haO7aeN4$c~mrskH9EF8=%)s-IKmymcs zM#lBg+{8rvDIZs-=`|CR&R$+_DrXP%4KtczOwRC40(I|Ubg!6hufcaUFpLwU*`YoFS*$( zuSgQKH`Bw-tZB_IyrE6G{k8c=Q*1!M0?p&^*$Dxr0awnRJ<)uhn0)K(yEigtTN@?1 zauh7p%>POXzwy_F%`@f&&%H0)H@5~PchT{wo(mZnL8(lYX$EB&J7kzZhc^WU!@6cSp=21g(I{$DH67H{78pAb)fQK`K#DIxQqQuw!kljYP-$RRxm=*iI+t4-{& z|0RRVG$6MZZ7`o(^8EFsi7ItEKxS{GCH)Um#C%(BQ>a}F{VyT0Fu9q}dHQ3G-U+z# z6_$57<%Bywp>ynL|70@uEkG#ungaaHpBz0`0jT=v&dvNU@^jdmr6CmcjNUYuekM_H za?OzP$(EKu4eVjOf65i#l3@;cGyMPfbLPJ1kJ2@0v{2;hX;;?QE$!X*IMHpJ)Dklv z!j|veeoSMYrx<{gW@+d^TVM|6hA=0LN~n zJ}UH7<M96KAi z)SBNXUm2%DYD_~^1naJ@jP}P@>D!BfRSrvh8$DNUGOp7_pOd2qy+B65&2Uh8u?TVvX%IOE~f z-^cF>uzQa&ys@XpFJC^O$KNHv*nqePUE58-RR;>slt~O5)fhM&<;2CsO|%5QSX;&7 zV6E56dHw-_XtG}RgPW)9OEA=Kq9~z@-zDJNk*#*i(P#FS%>^fkY`$P`-2Ilw!#)}K zXgtXr(y-GlJ3e=}XD#}yM~#ecJ=$Eg!2<95pfAW?w7HM8(uJx~jfz;Da!+}**Qya2 z8Zma*c0qsE zT5s%e_p3GU$lZ$(j;hf)7VcJzN;1)yLfQ?ASgu}1toH@4$CT5RDD*7CTlWkmY+)}` z_OLfn__cy3yHk4H0$cc<7Hg(XgI%9NxOTd|5 z4L+S?X#L!8<)V+&!ptdXI9;NiTCjs)vOO`ihK&cVBWxvR>p@lqY2J3`NXoUe9<69b za_Yw>*sC4;-Y9x5<-fMWXBKA>=_cl~q#<{-rvWozLMOMGJ=ti#gQYw(v%mQ)bgyRH zZU*U5AwI(jg`i?PthAKJ!um0mQkU80Is!eTB%Ec5M0xRVi)c4I9!<#>QS%W2btpx? zjtboEuyQ;rd(!pHeb{YDNvSPJoZ07ta>iy!wf)o(Yu$;YalDO2=Xki*S;3~mNS38! zWdY+xA$#>2SlZfwKgJ&IAT=IEvyk6=>pNpFUo4=KLy{xe+nKlhZbJ5MfbeaMsa871 z$8@qhqi#wG)_}4?u8d)u+`Xh6^mDl%XPP&ir*{zRJdq*)fXc~V<*EQd{=5nR=B_-~C z$+=n=Qpoq?;IxGlnaGTL-1d8MygCzlOGhY5a0+y?=h(5Q&3$`!$)KMV1nt96xX*Dn zg{BxgfL-!o4F|dj-7d3tYxd=ol^+T|B+wJ59Q;j5huSC;BadOsR-vQR?u&p-e-o}8<9#hIME{%NJbA4h)AZU6HR>#d(FAM1E1uk$tRVAx8OQ?&iQ zRao)@q9*mr+$1F_rwdJq;b*L~EDM=GCl`xPA99HuWS(^oqvm9;?W6cWHG(_Ay5!=B z*Yl4E5j00L4lrV+L*oIFS)j~Ro^mowuxb6>jfHI@RRikRV7pk4}!Tic~xOP2Pg8Ra*_kN1vzx3ZuCO zU{GxBE3mr`XmtV!oITbl#JA~EBczifW6JV*9v+m!jMYQT#OOIz|FwW))ujCSkCmX9 z4uZ^I*29LB9BjT1zY;Fr(7x9-y04%?Q0D2uGU z=QvPXR&xo%cAXpD#0i1AjbQgtOW8>N^6H`+no+T{GF1v;@wz9yULCnn#2?SF1D~O5 zzA(zIa+nMf(Ge{S*%X{FQVv0i zDO%rrkm*&e;KFLA*eG*A*F{Zyn>4a2Vf*nx^C>ZjHnx3h5Us*nJM&im! z8h**0J-U!g6jJc~KHI5IDh1#Xu)Qc5Gh@aN{CffV^X(DqTQB-{dTCq1f~3(EnZyB+ zoa?;W2UcN3e5Fg`-94}IueV8AWFls(RY|z?%Kc#d4lCEQ9V{+yq&GJr#4I)z{i?R8 ztKHJ1+#LI@rTV44?c}#R(GK;UeJ2M5WtN!b1G`&l8jjmFO@_?9k_)q|-6>ISpiQi> z?}1ir&WnT&a?!YxX$0&;oCF&-(z5T=vjG%_`@)9W8h!MZ^d*|MzM6Xnx7BLNXCIAO zXZWQ~`E4s4pP4jXmZ|tk0#WPqWK-yj9cmbop-NS0PS~;8!delA4X+FM-DwtZKftk* z)ew#0;$u(yRUTu{Qr9ynxHxQ+t%C-?K}B}3Z&b#;2~>%(wLB;@Prt}c!)>8=tX}ij zc}!{v-g;ik;Mk{F%78OoDf2Ii*Ie%RaiZOZGvp%git*CThk(lx@Zk-4AxjWVNchBS zwO-bm$%>P9x=-P+$l}1XIUFB-L1g(&nxDE01@JCOg}e@psA_BOZ{Rd#{C@OAqohQE_@-ko+O#`RE$yE1>t&iS2GY{vj#H@}FuqL&b0B%?)tg)Mr}lPL=IH6kMRs z0RZV0PK+wz&&6^Cnq5#-t~8~3)!&#zZR2Z(N{pLi1n{AuqHwR#xmu@&NB@b4obQ(P zr9qwydl_VmxkOb%w4dm7+&ks0Ogj?+ol6YHR#}8CzPv~JZ&(>nPpgZts`EYmN^&db zK?TSS0tT?c#)E}I=|7X6o^y2uAhcRQ_Q3xefdFI!_W~iyIxOqQA3U(Db(34dvO@OL zIi7n$F1ez$1=37;8B_%ZhFIvEKf6cHAjE34LyfXQ77i}&8VHLHhU}eOgGXON4gDBMWaNYZJZ$-`lJ04V)FroP8 z3|x}H=ohplJ>Yo#PivS?<52nW9=qFr&Oo0Aj1H~~QTNW5b3O zhZDwr2bO5JG0^&alq}4TBljPSlT!!OJ)1B5RHvBIn=f*vnz$@}FZbP>;KXL~pR=W8 zeVtjNU)>$~VD3W@WjDh=e%dK4#=qf<1hh6bR4z*~Vy&V_U^`{Io!P;Hz6!ZB^rDeN z`O#$lt1KTQ9YZID0@`wx-zRdUKyJb-VbwLF`10ui%QT@tl^9sd2VE}jmpyq13%{)t zS;br0Foc4EAe1*9Gk)lJ0T5UR7OrJH2?U=dlCW6 z!O%H#_fu*{C<==}FFX(hS}%0M4}Rm#2gV=i)vsx$Pw|Y}NR9vl`5i3iNjaA4quyKm z0f`T?wBQ4XZg=H7 zMJ|z>0h8ddd@aa>3<@~?uYJXx%hxNCf}l=Qbo3>1&{MJj%arrmGEtm9dSP zj#K#p(`DaQGcSYZHneD$>ZPdoeGdc1*wyC9Ri$^UNc$bu#1fw$0hH^R@2>D|@>^~H z68g|po9W^ZQOM+jG=;<(rU0gQ)jCSba2-YE0;^Ywf5I>s`6hXAJ%nI-;;D}1(nK*Gu>9S*3Y!2eD z?LQZj3%RISY+#+GsLE4b!jszjOiC9GHc{o}y{*fe3VQnNkD2n>0-VRZAAZ$8Ub8)) zBx~UOr=0=T2Hb_7#7K%iO(?K7U~d=t<5T}V8V(!?p&&?=z}Ch8KSyo=#|^=*x2Jk@ zo_xkfx{W~~5uad(sV%3T@IurYYQuC*ja82L#9%sjKUCd|v2kze@5vHVi+xG!9ikA_ zcQ^*Di`l975w_e)0DQgYP2H^0L|x()7Kowm+}_~<25KOnfR2hYi7&Br@Sw}hv7r#ibFDE!4Hw)9; zR_~V+WF?6Y5&K)rW_VG@(Ncf=(j%gTtbG)6uZEJ9q;pEkcBQaN)x~+)&7y{tJ>q&k z-mkQM)c>IEwbW`f2$HHqC*`_kaho974BKCa`qiQ+5^q5Qwziy!Ys2@;ec%)J9pjE1 zXR8+oN5WAPP;tI?_?~_JbojA}rX8jeNfS<;Fx31d@X}gc-iduMUbcnddxp;Q6zwu} zh2z(UQist~X8sclB4Y=Z2CVVFjtQ3xfMlT#8L!OFh3cDZL_>XIN(aI>vuLKv&(cT; z!Id*2Sy-4+&3PF<7Fbk?uqchDdS&au_R^s#snYr*B_x>5v!U8%f(I@w`BkU6FgN|V zu(gl5ILM;_58v7Ke>%5?!l?E~nAh|QBm?-Jx zF%3|!K4Oo>u4_&KZr0|Uva_%x<~X1ow~hEAke!YsPA;CUx$IzGiXAJhS($lR9g-1M zgMv;@X7U3urb?Rmu*Qu<2@(V{R8`=H&Q`M9tA>mvC*OzT2fDoWR_T~CdYkX+O#fAL zWg%zLCGVzxG*Wz(I%5}Gsy*{vV_PMCCy?Ku{u`XBqNLic`qoBKfnkwOiE{w{31qZB zVt0M4DOXQCIsUt%aE?K|#7tjBfxx~|rB#F*ez4~dD%PAVxt*+DkNvQibLm{A;ReF? zBp|n6N9>v9vDfMyoxM?bAkjzE_S5mKE1q;kj5HeSys6i>;6A3Qo%_pa|7ir|#_PoI zDgkM11(TmL%*g_gY~qt{1LfU<_Z@IGxpc2{(v%ocCat(rk!GU7M{D%lQ40Y zG1bz*VS4(?gtwFwPRjdmi=`g#$V{5eUoLhZ6jX_V&TB-SjI#P|*rPqsGP$0FY0qe= zY;6h~V@vpl}!lS)-t_J zH+yc)QZsYJ%skYTcB0{T;&uG2N!4RIH>%0l5y6j`6!hC7=_-lqkEyE2PK)}7Hzhd! zymEH!2NE8^+daZM?bTds)B`vQpx)B9M~RjObj1|a8LC+ zbD+~|Ri=NzKYcs^52ouh&))(5!np5GzjCXHLhc*HO-et6EDK`RuNr^T?Xg|DYGE#f zLp0?w_oiRY5X|$XKAD3nck-wc~=IrW3U_In_>NHsluuiPOufr zr#u-deQf?e8+FwRY$a`F55`IB=n3JLVhLYRy{i+StY|_Q3d3hqHE?Ch+S_|=wrPK| z_+D0WnBVb)E^*p}rcB4!OD5dC_o=wu{IKeAufWNus&9#9;-dpw%xJ>#-{D)OUlmHb z>_jx`Et3|(St_pSY>fnMO#P1HqZR%p#E|`dZ=2Kb5GZ(WqA-q^G=p|H?&y;ydOHy{ zlVk}aSpsCTx8dn^Gxhp4g}HgAKf_msU;5v&&0t$3$Iwjs4vY6v&+EVpXCaTY_Ked! z>eHxJw<5GX`A5o1HIYDSR1dkdVp|rh~II_>;KHb{4zeyqHI=&~9 zVJCH>yHHP}2?#W@_7UzqCiUm73wq2HjviFa1Ox{@I`P6r=p?ZQWM(h=j7GCwsD@GT z5bEL4tmT?Y>%$sZDZIL@^vhvvug61^v1uOZ$u1HO0q$b409lR1h-W%J?lRXl6 z*|)6KOAC3<(=)Mj#3R=8u!fo{^>O_!tZ5=$tM|F8>(_+tBC;n&g0&rYxY)yTt$I_? zS*l0|Z?Uv38P}P0Uz5DWU>XdO5-}U^{Uz*M5FJ#AyJ!4q$%1>UN2s*H%dZG#NNtNLS0BM0Thv(9-QmC%|b9(p7iek@j&br@-C5UV5Hv{{ABEvl1%+2Ulx#H z>o4~YTzdsEn+-GGcDNH2?se9Swsb9T*T1HMIiof(I?akviUra|^mSysz zRix&ZOb43aP;_6T_gq1_(`ODru`gh`G{gL{wt<2VHr#>7M34kS-4Uf)ntM9+ln}0{vp7X-%d#4WP`i-A0carYdfC3A`7dP zsXgm`t_He5c?*biy#c48Li`RT5GKgU^Vgh= z^Dl8v)_w4j56|sAbhzq^4HifHnAoM_j9jd@+}AULQbxwMpArH zzcZ{xDy~BKS3yY*2YBqx4$mWjH2K>%;QT#DQ`W=xmW_9p-$XNs+ox7BloYQX zIG8rN8kg4NB6XFGblukyX?#ipy-HOCM^a4l`3;d?T8X_0c`m0;YslI47+2Mq&*n?t zCcZ1=mpk>QaF%kvtQsUwid7&KxOwLyraaMZ$H#Rxf(a1q(miEyB_zq%dviC+x5ShK z`vDrxJm4E#_Z)EAl1Q6-So4XM#`blk+H`rqd;LO~YqRP!Vc)DwTCtW$K&O4RD!cG2 zjEw|RfzIcii2>|4z5#bUF57FVj6;ePq7P|56okxXel;KpY#ARAD$66e73*W>m!H`! zf4CECGFkP^iRGD1gV(Lg1x*m6M83P{jeNzY9GjB8)|p9PxzN7-B(t!-4Dm7X{_u#- z!RE4ccgHldoyc94d=;g~p(v)+=|*c~FTt13S48!h85mxVQ7GNob?-eFlk^ zajgn5b^&EA5??Hl&SCM?(t#B?$ibb~Jc3U2AEn}c5TDj;vrvjXt?%7h?Ih@t?kuDr7C zxrNrPwCXiW$x=~_{i1yMrQdeA3WfXHHyCVQl%y|G;PK60Q~JHZynF}kpywxQ!s=AN zhuYO#B>m zzw}@cr59g+HmDC%HH2C}ofOYY^Muiih^Q$4-C9ZB`moC=>)>t*%<15ePp3xacR7$A z4#fF^NPjLCz;F)~xG;JcPhQ#UN+g)0Zy;5bZ|@%C{So3~AW^WFih-H>*xIZS*OX2+ z-8^ZEr0QifVSMZgDUN?G{q%UFcz-G6?&9@n>{W8v{XvPnJSqE)ELY>WA;&Sgg#_F= z66|Tr1KC@_CnC`LTPFO1GXti9lAYt$C-NZW5n12ufngOHjEXGrAzN|dA$|#@no!fu z`u&I~`>^&^h*f{`Y=(>bh)0vP*ZY>+{zy)S3lMMBgIQH^{oMMiEAcrVGre}*KSRo` zTR~V!1_@$nn4wFDeJ=4rp7Mkmv2H=zfH<0qh+OSX?%r2~6+lF!=VX)&9HcOm1b&0+ zP-XSr85=@F6BItvHAXexRjb7?z{T&s)+kBzdi3&TJ>zQ9SyuR%k>6KUuMvt{hhZEPUW_1jWD_Hg zWMl4ZrQhBQZ`_I!baAKD*QKWEXoZ3CXljn})sHcVoGhEoNURdau%!E>qeGG;O9;oV zuIGRedcT7dpNArle%MZW>!jCE6Ek2b^EAV(55Id z*C2k|&&_S^^O)B_?eVo3T$I%6IKE9W4-q;3*7l04)1$2VeHvwjQpf(E8p>nir3{nN zpV0>@M)YeuEdD>I8_(-X%fjs@x(WAg;G7KAcW*W&Zxt2T0NOKFrSdz7q0a@nX)d`m zsqvg+3YV+89Uu7W&=k!>OGYE-h?T~gZ`sonCx_p9n58}VoEpCo zR`-l)?5De=U+^Yt^1993794Lb6j;m}cE116Re_ z5pZGoj@On>2KrlnhXF)~m}9@BWk9zcdne{gp*ys}J6$iMF{S=taB?y=%40}At2zyw zoY!1_@`isNcc5_~aCBh1H!|nlx458#)*?ln`-N@? z;LJlmp_PPFS1NK|6eF-|ImS)L#}YQC=@z9c#kVK63c_O|aGwn;UVnD6H7$&|Y+H>j zcgAqC9p+|v<=2pRO{yy&jKb|2WqPVh5>`$rFC<@Bxb8kf=g`*cx7>L~%=ueTn1K_W zXG}b>!NYTARzQ{N{24d3FRXXvOFz}|RH{s7apLN_#Z!DnMqoX-QI8p8&@`2Z8y+N} zA!%s~xM)DKN9rU&<;IYnlH`HnQd|V97_8}AcBcEcNW|Sp*rqQ)Lv3bCVrjoK6l*E@ z%gC5s7&)<=ZE?(`|0VJNe91jJtyzuWGVlFoHm+Q_cX5SSk%keIIy_}eX z&rM0#M^$+KzB4uw2=Z3eUck=*=k_U2=2CagpfgtDq^xkD^p}9)0U9bTXY-#b*>570c{ChW3f?iR4Ntnx7 z!-BC=V~3u2f$~LueSN^xgt|+`3+S!&Oy@$Sc!9skb+xb6)DcXh_}w(ip7_c7(9kdL zhBfwS{2u7RFP6NJ$tv4H`<`UcL!_Z46kRPi2?bi1+O_Luj@p$|=y&ls?s8e}5kL9t;q*Ge4RShXo#)>H*xg{6H!I)0{`k7cWWnLXvS@pt#0uhlvvu!4$ z!;R`!mgg4#s&Anx>xf_9LqG&V!cBqH zzM{Dd>QRyL*BDUGwPJlqu)Qk76?}9!tK*K&tbO)VfC# z5o8354rh~{#?>vcHnk)7CVL_uZgt*)^!tq{FoG;npzx$LH*G(^2dv&}_nAe(v4YDI zOR450BB8%=;G)F$#QUxG4V#Kmd*kL%!jy+?OH?Z#lwSP=ijADh-Nbjgwyp+CD z-`xd;PqeE@?~XLk&jnDjmPN@Pk1zSo1~Tdc4Lr_`=@;oZ#-}`HpeF?X6YJJsntspY z?v11Ee(_O1R~%+S0I^olP%Mwmgx1XlF&BGoFG?6I?GT*%r3qD8k&--1*a3W^Uq7s2 z>p9R>Bj2~3A!Hdk<&K8R%&>T122~_ivQtatZ-9+Q0N*akZ|63%q^nI|4foM8?>wut zS7M~?!PCR33@H!TXq&^*BT5#F!ycu>1wnt?ca$`+m>C6@VJ`wMZM(M?-$;Bp!^kr1 zoWvk#WCxUy0{bgK2zU40QJp0rZ-aVQ+i5>@7kxD$UT}cW7{<=m&0u3SziY2MpW7*> z|ACXmp!iZKIRaExNNJ(;Aw@Q8l zm6-&%t&Vh2K9uOG<;fC-g~X|txjlB=Y-3!K^v;-hbLOnwQ1-J%&o3%{+aFndcV78_ z(z_rEp(^A&(ObWS?RCMV3mY} zYMa59kzxZz^__!GCCQ)B@<1B1$B?cVRpK)9^DyJ&D38T`+_Z6dDc#%NNF^VSBp6AN zc7~ycJ1d?Szt4CsrH$j~Y5X?MB2JEp+vU7#Ki<*6^(vRp8g81Edn@CE?%x8WCS3ZZ z8KrU;oLglMe$t@<)_vn!gQ$g({YvU@wnuf0_39vg2~pXX&TytWzU@COEP_?^2sS-r zE81M*J(O9*dTj2`N7Zz&`C}4%0ZlLWjp>OP44gQv;#u`3$Mx9x`bm55?RfHRWJtJJ zscdHmTSr1Gx;Rh+5L9;$?^+}0bZDit>rXOfi7&>O2h$cD16-ADx>Wyw;(y8!8K>ol zk-H9m)*a;7<$*JLc~zpn5`hhymc!gz;8Qw*@%zb^bVZqqQh?N%?dYyNxf_43D&S;U& z(p~=rq#wx(xj*K!c4^%_#|MYqkQjykG-uX4)+*9o$uUuvkwte)W&8NT-{l!?SNFGA z8)QWJl-$-`K`@~h*PJ)?8JiMJjcu5>X0!X%0Lc5^AsOqhprD|uoYoLTN-OPm98pHc z3yS;7uCCSyXLJay@Aw=GpxW*mFZ6dbURb&&)ZKwM74a{5_g+rsTsI~mYb#vz8Ll)k z*{(D^`I`7bIGvKdo}M12R^8KukS0Efht*{-iz^ZHf~NF$&>iPTqW)v|3Nzw#N_I1m z@N&VX{bHk@=I*ONV~;5sXQJA)uaZU&1st%6{}{gNOCsPAEC$!}la!n@z#CAg?_1%1jubpzT zNcp38!3<01Yt<7HO3Me?8X+X{nU;EYtWhlbbJ1(I2AzZXYSIT#vF&iKLLJNJfi#5c zrFXk~hgp^Vm4Vw!qv%7I@;NPDopDY!f19)CufY32^^1S^cy*bhzB{a8Z8hwI(+FjX z3SMU%`f4hnK!k<*ET8w~7&$_jiRO@AU2_@8vc`XUpzXK4t7%Z{*hr1GdYv}0V6I?3 zg@0CrDj_*Mm)R-}d&sGpY~!-@U0`yiZQWx$M<7vG>Fo7PtC9fRYq392Bft)*^Y>_% zEiSB>WLvT2{@J?(l-K_Tx^@KDihtF={|hSldO+XL|LMJBq-Wy^d`1=-5zOY5*8ae! z_o{333}%1h?BX1I(mTJ7jG-E5Qvr z*b#Jj0yXKeoaiJ|z*jpd{lsf`RSWLCuvN@Tj>&g2C)kAL0*x1x5LB@qPmvYtxz#5o z2cZjavjZUmEGy_!Z>3O*7!-i?(07Fi@i$#fcn`>onl?94FCImjmi5-%7@O6KS`)FkHUPF2uJCC@ZB<&3n=y5 z<}k@?xtazaD%WK2|@m`0mKXNepc#H~p?UNjP_cp1_wbC`0@f&!-pb zl!UIro1{s3R3ZsdBVsGlyz@S~v;l^fe0JMjGXn(^F%KLcuLmQikh1ilofYX2YHxs8 z3U*sfMOz>Aricg6o99XeErdRnW4s-@zOPGPMlW<))4t|t5)(R=h=gs}2) zw@s3PzW<$bR8KR%OMk0kemGT$7sizIh|{kYQ@G{M`7&dnO>D|MWw|9`jA~BN(6jQAAZpboW({4qvtCLebPCj~v-s zs1W{(0f+Ph6z_pT<0RD>27I_Wb{luSWN+w81or>Qv9yS7P6*d|Lnp@!W;15Vt0dZ4yVK-w z{#`CQaIc;MEi6jRvFGz%4#K*`yh8O>GB0Yn0*8=4HLk`t+8*iLjW;J*U zHCY8+N|DhxoBlGxsg6PIO|HHXsO`iZ_0pSIqGQ*sV?4Pknk4RTh!^1Fk;4v^1Vd#U z%kU<5#ujjx>J+y%OyImZ%(SbtkK*~tzV6cN;xZM$LHXo}^-Je~b&ZM%&`md)?tn7BE@7^*=vrzmW_fupEoXAyv$i?YWR9o$W-G?9XNGy zP;y><2Wb$jsx{w|dJDh7RO55C;aP+1{rx9lFz7DfZ|#VeVmI48gH5jMsSNKLiv)W3 zO>_zkdSXIJ>O4#OqA||BH+sg3-K9I?5Ukg9bnA$z{4UZ-h3$5c2!bUL!Q}D=0+oL# zuTJfXz0oLrY5$jtpUtu=b}ivu97wy9cTrXRlr%0G=v=3^?_(+MF!N|Q%+vYg%vsPj zY*%cQLVv*?E&OZ&(zD+B%0X9SEZk-AEd}bcxe{}I?*+)LKw%ZM$fPW(iLtbR$in5K z{E%5k)D-2RF_X4&gC>Kd*-aZ?U^}h3c(Er>*g-cp*0_}*5C?r}c{+rBesl~g%5SL6 zTMX~cYw^{&o_K&?U5&#bYTKt0_99Aj;UhAvBpAQ@47vceeX}>?RpB8!+-|zttH|Z5 z$E&hZtjQfV(RN9JmC}*OD5n`t0t*dMs`LPTpG^dsWv+O9uAl+0K9HspYXODPeu zuMKs_TGGiSPK0X$PE#n}TD^IdbZvx*74p*imW!x*uu5tbX}0;=xLN75D>CJ__9h@# zuThyT#xsA5KCvt};aR&>lbKgKIb(lV@1`~(C4&ZI*FnP_{Lm&+(>%eg{=h=gKYw%5Wk%jm(Bt;nrV`FgT2G*=4QAc> zL9=_J$9}!Adavo^sAr76@u0m2RcHzEuRym;JIjNJHSUCYQ8zSZh_sR};TFs)@TGHH zs-sc(V$qdh(aCk@lR77F`BEN-1rO_1h_2+?=Vh^yQvF~ANcDww->Olp!Dw)6X{%~} zvW`Yoq#_Q1HJ3XyagnZB%8x0w?w2`!YhLe}EiBv7QHcNSZWMBpZOKon z$Di`W*8XzTdfd!?p!V0~lH=m@atYDX(01Ak-6J#HJEC;c_YvK@x%~Xm1pl(7^o^~y^*;N;%S(=mP1JFmIiFqQI29~$7G)vW z*e?;l%Zi1z~$%5hS&G>4_v;1GlR%@m;!!;-Hpp4)bf=o);(K`2daf`Gv_Sx6{CYsQp<=F(zcHHFxi zm5;Epu^kC7_$9XPLKme3OuNBJ&$2g}XA+~kl)l!ZUv1`3&KMfoaBs!<$;AWc5mO^n zX54XM3qMsP=|6a^0XajFMJ0xYJ2)tV)=i~!ve+cUZN^|(Go|a8szm`V=tBBn4j%ks zbWdkA*RBCI*}+9NtPl-!)ExFf;}Mybb>0s{!2#7LBMPLy8C`iD^%`C``h#!+6`Hkj z_Pbymm$kPye4FGW94D()KhEgIiA{Li_PTo4FYIrk_=EzzxQm|FF%{hKe)Ze#D~~Y_ zhmszT`ocLP{`0>8rb<)$QNnXAk}b7P=hgw01x{Um~fYF{s6ai16_rSDZ< zm0Hv8eOt%YyLn$;qlu$8#dXhMQH5;Ad&wU*F=I!x zTMHJrm*P;Q1NPsax{98zdbc|MiP}bvs8C&g{G`P&!Up2+u<+wntB*1YC>ZXfG0=y8 zQ2F7eeu6qNawaVF!tb*+V!`U5^+4vD_O#`WxqOlSz!<3Uk*!vRM5u+~_2;7kdx2Lt zLwb|3kGgv?=G2GvOl>H567jg&wYQl_gXz|NfN(t)MO0w=X zNln0n>njb7>VIUj*ZT%vIVRSTK1>x?VqaTF`O-L_sRcj9;+gE`Hl0@f_Gqdpy0NgI z*dE+`a*pVa(Rn*>&g6WMriHlEwdwTJh*Fo3jw~67j5=r=e?ZvUqC((9YUVg@N;4NSOURo=$JRJOTx_$i(dcDZkqd4P6MPvxJ;f|Gez>=ehv+5OJZ*wk3K8lLJXSv4 z-OKUL+@SYhL&tI(kEiNY`nc%0UCB;P92KzHKr@B?xYn(Ox&Nr8-QrlAGx!$mHsdST zs1l+8+^si>lA6hkglX+3)_Q*#2?!*AAU%EOdueicj)0s-Y0&m!U%Bd(!`$CGE_-e7 z%e8UfPR$p&QHHUSoL^rqrWibUq3z2^cPU#=zdE@$1zy$;3-LjVts6$8y2d`(H<46O zbyyCu*G15+$<&ACFAExUmbfKlB8mIHDN9~Pz)pnfQ*SSONL}M{^eO(zlk&`Y+Ry0YGf2RU?BS<@*-Wa<@VPiU%j{0lM(94^5Wsx_l~*VHh7>x({43Z&{xpMtFz!( zyHtPnicws@d!bt11zh(1)SpjuXr(`H&XYbwv@0@lFVBXb9Vui}mhLEmrtG;CV>a22pwAJE8~t}&IDLDQ0p*=#LJ%*e z5U<$nm@EsxI9%umV;!QrBhP+tOnD!AU&I!L%(v&LI)pa{LomLNV?g|Ub8VNPE$^Kg z=_q&K7UV|q>IirQyi2!Sz}*gg7^h@eMF4$I!E6oY!yNvzZx_3bsK>Nw<21Ema&9J$?wAS)&FO~~ z^`~8GwB$ZG3R9c;gBTVzp!aqzDmcg82lE>1A|7PI?-AeS{I~9l z+)u4ek{2~K_@^uyzqt(-bHrR)>faXMTxE^Cj=N(W2Z7ggQ@GR{Lj#$$I~cI88ptAk zD$Y;!_*=)xRS?!!qACPuO)<+tzSj>ZhaboqalU}f6;#0br}RG8Brl%l@&Cq?Kp=gz zF%pNjC=XYx4zR~7X@2xz<$j9-n>Z0%^+fg#D4}?pRIbbpdKf@V#Fnj z+{$s5Dd`l{@N~|#%A#uU=@60Za$!-(5BIwB9-b5E-S-JBqhC+(vZ=;B8X`Sonwd^> z6nA2Io==yku3EVMij!d*Gk?joG}h~f25rp%NNo@qzG`80uyy@dxKK`IM#48}v>@=x z+_4e2c%R4eoNS=IY_0ES&)61UP3D`t1dE~myqRF;#bU!op^UN<5}{N1L*q=@6Rl|h zHs8o=V;}crlABHrYZqEVLi}`Ml?)5{o@4SY9CkqJrAe!HmKkTS5Rd^XDUw~FY<(L# z5WoJ@u5P;AHl$+ctUnv(8wV^Y;{;YKq-_sbxW*he5uxpA)&-Rx@|?3SuuAWI*?ljh zKd6BedSw0C8P$!?!8z;{6k@1aW211)I|P|}e7-~OvJuH4h=@DH7hkH^ubcS-Uk8;W zVI!&x2(0!#5bP}O5d_!R0m5vPxy@Mkia8Zy$}q=8BoJ4JlCRNj$&gOUy!pqQY}Ng) z4|khN4omWn_m}ZaqZE10N-KG(ON60!mGI4s7I43$463%a`M42T$UW*lA}8(mKx3W}*t zUMx(F?`x=DYQLFuAwNjXzfeMRa%lA2ic6uaui~g@U^0$7(_mGyK}|`PcZ!1EEwubO zs2Jfn67C!aY00A_7j@dp*e=ZMvo5r%`Kal6RiRSHohf_a3wN<`7O%k?_%KM--Hg3< z_V3VxFs|+woN+P-ndLl=E;m-23abypxRRAP)9Yz9%cpniYlqy%XB#tm;srO+jqRd3 z8r1VtWAji204t~9hxEojyPdR7z$`^R!ZliZJ9IP$<+n^G%dR_xX%ZEQij1N=PtSww z)J(oiiyI)UL969d_4xx;ABjfwCLD(PR5QG64BT8p2@O1fCSB-yTdEH{{%2a6ycB-~ zal9~nmSL@B6xSMe1MIc@q&?#TI75r{%{>&FX$tgZLkg&VyGp%5gQsjn#_?zl`GFBu{bQf8T?~jTajI_~D(bPMB#C{&6 zD2jVzb-{Y>MeJX(cN?QJyj!Ju^Dd)E?ANe+-6f)4lLNbtY%il+K8t^8VJRupWGTyBrsEu2`W=T zz2dM|h3XPcnp78?x-b%wKAQ$+N3ykW0P6*SEh$7dPoT@S=dHZLZaBWqmr64I;B{Mj zV#HJ;g>I~(nAO?p@Xg|SW_7k;Z=VaG%P7Pi6W8DDp?&;5a#il&=`sZiCh>l(vgv8= z;|9|#OqJiK6sBi6UGBDk4zwMaIQJ7y?!e^A7cv4be1Ag&TUZkVU?2b?*36q5Hz=zr z>x2h*0oEp6V`sf`{Z%zFg!DEJ(UbWJCl-`f?M-tj78#|aYR`+6m#oZ@R9*4#jq ziY~HW4-AgZX|RM0F@SoCYYvz<3$kU-wTEwhCt0_dad@ciHd!dE(0KCWX}oN!8U0B440X%rRKy6nfvt@m9wDc001H?YtS} zYoB*Sbfss-E$2Sx%y}onjOevVNeB%~+wYy7^rx+D-T1_i?9uMDlPEN4S`i`nKQ;->*=qq|WZ4ZB=lbQ7m`-NR`stw|4NF;{Fg zxY58<=h)4m*%Y1{AH-)+m%La*mFAh+&szB!8a|;!tz|?do?Nrfy*V%PRp;4#Ce5Y9M7yov#$22Zgs_Z z)zjr*JUb8>m$?s;01`G(d$Z!MS)@{Oc-o1&YN}yH@^qQcduLSh03?P~j2~|HMnp(I zpG9~#k2{JGj>*B$wcc*bx$v*X8+rpdF=2M3U>!(Dc7u;sW!_=KWxi#t_f*#RGnzK| zY^iKc)Tir#9~OYjm3{A!SEvM*_#BSqP%q&UypVt@ns=%RyRFjKn)fXyo}(v=UpfQ2 z;|2{A=Q>;o64-3g_HJ~&uVT-Y13ol=V6xFu2{3_LglBfFIW51cAJW>4qa%X32~D_y-4)U<>SQ94?Db% z%+kE)eI#s2Bk(Y|!9Jyh2kvr{s{nehkJsNCzuts({&h2qP5;7zuV(j4l1}U^kQdBx z(+STakN{&fRUn_|SysJrmBMT`K?>gd@NDXSSP?j!@QFmFez&z{BbeIq*Ed)ozZQNn z8_XO!*(&3dSTA*PyFKdtRE3PsG4;G<$HHQqTE^I?S(5|M=M>pJCM6v2=&g77=lSIF zSbXWc2ECqs6y^=AAq5%AGOy?wNG~cv z9nq*Z{c_TL5uTn$eG;$h)o|2w(po!6*JRivp=*$z#F@$rq&hz{rCORV6SdrKgm}rR zBHqlKq%$O`!r)f>$cQNoMkORMiyxJbFExydzn;x(A4+68 zT5=ez0qV{GbT{$wx~{F}I|@f1q_W;+*(v_<$i}-0n4mto=2HRp-#c#rDjpsdY0kie|~(2%`N%T?S5p6vt!nv@?@ zWt!Fv-8@gX2I-?HoQHbmDwTVA$vx+W5Qdb`hvl6Q{^3$}6%IQiJadZCCe!VXK&qBW z?wT~q-o4D@f$sAH1z4$V@;zH7i})QpTWBB_j~z|S>`LCRWCBDKre8t%;$BO z_ngkfk2}22XD3&q%BtF17VjWR`{1`P(D4L5IjNKGrJCjfJOZd~!e=7zy8Qy)2%(pv zJ6{0OupZAVUN8HE(>D@=J_g*Q?A~VPWg!N~9=C#z%?C5~YOIR3(QcvQnZlZGv@3I4*O!_!apy0T z9J+;ZHL*2$p3ePb@pBv9A&=~#BiI5_qX4^vo3KAk8zF}x{nfz$A>6ju`>0jyQDm>srrb%vTtA*-$itmoCz$^{{v*;L= zpR?K*EyH1!ZS*!Hlz@97Le;|0v^>uzLpGgQ0_CEq=J+I};T_aPHCp?1NX`hyll-^1@{S^c81{tJ4qGbF|GJU*#(O<2wQ zBS`KVzI`8&mk!H2E+EzJ@e>uz!(r9vJwI$2k(`2}0<8PJRA3I$+X3)w+f$9re%W0S zwIJ1Ep@&nZLhr9JQsoj+0`CXAgT*=!!`0`f zx}zE(VPC6Ihb3{g9*R;*YML>&a;ADzsWX2@kfSaL&f@vDm-i6J5xwz^F|Ud>Us!P* zC>nJVyKm(IhWj*wzU93UL>?gC99RG1ra1N%rF3w364mWiUR6$I`DLY(u#R+&0JcXv7GI4tKRys*!DfW@a--E{{ch-QmigeNdtvm2lETM9 zKM%!6s~KS*$O7^7IYVPX_i`^sS7Y9*Dm92} zMarDStf?RC?pp5*1c!VBh#+E6wQ5q5A_8&Q(|1|=RjOF>4Fl|)I;e*TvA2H$~Cc~ z_)bbotGV$y`zErzzy#%;H-Ji0`g_Q&Z3?%hPj+CjwbrVXkaBG^HE#6G7i_#Pk`0Rt zw1OZk99f*TJggSyzxyku18~b2HcWWIi8fg8Rt}U|)Phnm{PqSUhc#jxuP9RZ_I)xn znDDJ42nskJT#+fksqjG*G?n`+7*lW*5d2Yz$nYp1PG!2(DWtqM~P zl5!#`ECZtT)MDwkLtq~$o3T_-7vaw_ugns!`FektbH3QTXx((@9^qtDJ&?B|_Ew3C z(p2F>lWaI&qaYH6=XQ+&#~pA`kPQdOH8zf3*V^1ZpBG;ivmL}%N6{jm$K`-fbaxEk@z$oJsXD+LRzwWx1RKK~k!)TUFvJp%keP zbhxO*!xm%&+5-q1@)}RBv=QrK`ZU{S?@Salh=PL{?DN#FeHBQ7DXC7zMPs%VGa1rM_h^MpfD&-Tj!|d$ol}^PFeR=cPwJ z-?~7ex-ea^H~}mj0OPK479<4czYO7clzWqxRAM?2){u!1oWwlk%on2bNvb$Eq%~p$ z+s%E-)12c+;F{bPBCloUa$8c2Ur|nG%KAXI`I;DfOruZfec-4^-eMsxvePY)AhbN1 zMSa4-uO!MBM0u|r<`u}Id+F)e+Qs!XJ&6x|jW0Dk>@-euR`>ZggYi&;T1i+^0vVX5 zpjQ@_uTXt5$eJn@*RV2lAm-SniVocpScodN(^}kbO0Rf!%a7JsQceSJ_6>z@cmp-Q zyel+7gS8mPWn@W26<Xr=_(q5azq8H-5f5+FC>3$Z^6yKOuu zm2~?eevxmUD6U>aExYkmo3NCQ46-OFdliaL?`V~TViF#th@>9u%I+xckH=3jzkkBgago z2pcH|rwJ93z_8GZJt&Hj0o>wZ(u5v6PGx~I6dmBM-0aoqT^F?r;Z@Nc$)WEzAf&_k z&2TT{8Xh@@)C48XO!o}Olh~w1<@hOZI!EY)jZ3vl_x|?mZBm^4X+ERy^Hh*PW7}}y zp5kaE5vZp7J1P&(O5P=|+ba~NgL*V*mMa2~p7;sI%X!naoTR+SsVG5T|0nate41&i zBcOhykHcBl6bB8NptIhvtl=JybdXcIY_~FKd)zGX^&bUc3}%b!QTQj#FmSseiQ!=D zVAQcqpBLtSGiPy1&*2Gdd{J^@eoIsMJ!1@yr8s1E;w?acw(O}%ubF4;zy1W}lwO?N zGCf^_qTeVgbp%!vZ=~dTpaAVqvxv%1a9f*WW#kkUn(M!jZWJhH-7t3sQTIv^`ApjG z?#U0UJAM!l<2XGitRSF{RNKVbUVrPWfG&qo2+&C}?&)3zds!{weXFvyBVC}OX|&8+ z-7_V4olj>Ad2pdq*kh9ei{jhwu23%v7Hi0nEsquz-At1`rae8~w3S)gPn@yPW`Fe9 zulRJIzmV?emF#3+ywgbuQcpNnP>}VQPHhi}!}97ee;S8vj&%G9Y$4M!0iv*eW*X^9 zIg}vmFZec7p^CkqYl!7(4J&c$McP;3G_GC?P;Hk7;J!XGh|?bO;%(h-T0!Pxm&w>1g=03$ zz%#MDML?s=M=s9dnZzPvF_%Oo?_{S;=~V4qQjIo%qC>6zH1A{ey;dNml}-`Ldvo{p zn@A$Tj;tqjD~*UA5JGB{PK_4}|7%)?snkZ9jjclPXbk2qNWE2|k};>T1ktk7?Q-e! zmE3%liR`MRo^(y7SGV%>?{kMY!>XMiH!`~T!5ff9p5lxlSjq@x6^h$S?B?~phoJUt zo%uA?mb64o^L`7*QbZZjYGY*a$Hw*X7x#F(7kuV8kkhgO@9dt+eqmNp_-?O|_$xMT zC#CkhB9C=M4=aBKxm=SVzxOFl!fN8cAs5DxPbw~p#jJmOJSezyV4tsyt8<#NM3!6X zub^XUc|l@IRIB262uV$E81zkD@BRBkcx>2CU!mT(Y%A82RL2Nl56s<+ZqB>PIZ!Xa|mM;pm%T$7d z1ymPBp7@Z$R2L5aV2z%#6T+stY-YWB1N!3GbH!B_S@`Ev%jGz(Ff0-4NTW2z|#5-*BswO!lZQ2W9Mep@?+JkB%MH zmXQ5V=x*ipJ!TpO%hD#xLz*ejM#qC~XPvf%wOiBj(MPfJ?-vX#E+GYuEF}~p(s@&N z=)1Ee6)e_Dt~P3FfsN!dEeMUMWRtkm_=MBnqflzywnNN21QZ9$)1-8qe3a)g#R^rM zcEty)*)2-4DtkAjDvjU4Aj)JIRWTlngY5WYsAEqqhwdz_l#TpF_+uIp5XYBTKJQMm zw>~q#Qti*nu`rtV#MAgt$5<8b)v-r27E-o8IYQ&nJx#|%n2$|ONz06H3RD`!l!cQt z_KJ~@PY>&Azf)LNnGQGSrc$i@7}q}#XQ{=*^J?!a8MR_*Pp*EVjC#?c$-a>E4oPD^ zL$VK`k$p_qyVFNt-!_>lhX3D)@Eb90!d7|r{lQP<(C-mwG-^j`vR6zU z<-^rfpQYPFCc*@;OZi;8-89Rq?*r?wH2P<;WaW{RA!DBnN}6PGupM8^`( zE33ba{u7A~N2HbnKyvFTO4&Mt&xQL~aRFp}&JXadQ6ONU`n8MAM zU$bfcp>_MG08$u|dr!(4tpC5l^K}A&YYR1Sa>@V3vwext#K37ayu9}!KP_>)|I0=e ztO~GTZ!EjS;QjA#CDaIyl4{Uv^(OxW#m&(Y9*2e8AMctrw-=sw)79gTlY7E8A?0S@5KL> zmHP1M0P)ZB4vD<~UUddmvH80S=Ks{7Mq&d2{pRv0%Ku)4kfb_$(fo%&7yh>}fIrX| z8~$PF`DYg44IyN3|6diCx!>9@PUMB}Jyt@KUSiAb3@O-n23@nYa*1lDr0j&a;t3rq z;l4WoeT9P+`7acp12ACf;zxe7H)85*XWkKYvaPo>Gb(RVazlGU|Ctd!G)M*V_()a( zyx3Y8YFn*$%Ux~>xya^A-+#)3W2%8U5I@mjEuDg~eqMBEH5jQ>Sfr_a^{5bNYSdUJoi`8tLt6W< z1x!mq2Iwb^Ex+ra=VL9kh$P2bcS>573ECg1pPAvLb)Zt=DJ&+DLh}k`MS-E!L}e# zW61^fTc0U22@K2o1pV(=GMHfix@)sT(b_B_SRFTTQO>9|=f&+2!jyz#dX@UWdIR`$ zkL*|A*Z-gY05^|;P;o&p**rg(nFn6?9jFh&S6ye~6bO{n zWA3{xYdo@-_D7qQ<~~z5TAs%9@un@uXj^1x!WaL1*8E`firLqtyCYM+w_gjg zUBX3?gw*NaqVv6g`S+6&{UVfQr@0I#7UNC5;{|Ai!aQy?J3avB%?qf1KRcu^42IcH zs71D5V7?(+`e@hCgtCy=*Wf_I`@H!3lOX(}CrLd)4jv{dozGAyR3#s;^aJ!ivH(J@ zaRA~pjI{KK;fL}`4QbMbEiZp5HG}#E`+uE$W{1MDkn#HdiED=lxTWiHE=lNbfTAx* zWfIGZR0DzepTVI>0e5@3%2jZz0kgzVv8}GKP zXBdRvDgyHV8aZDC-*-6V0w9f-S+D&?aB%R%AY0sP|HPsln#9>$nlS0 zKJNh06QK}RGgDd(0D8lmV(fXqE|d zo4;m~{%kD$h(`XeDPW=5x>I*$;ZnSYCXA$kfs4J8Rr$>IEME0+j*#?t8s3V&G+3{(wu4jJDXv>5_z4D8<` zZUS0*XYw5nVmG#~&@=T+8j8JJ{4f785gbBbPG4Y>`u@4lg}zo0+1lD_yvc_tRwbWF z__wx6#a{GieET8yXR5T~eR4}9ZG9b=O<+m>Qbjp8U?a6A!C3r|$pFEtZOoX8tLK07 z`}Mai@}UE&ii43GCx!I*83W2en`j++51^1Sfchh8`73CKnZtEDzE==Q%MGBn8wEOG zC&F!rm0td7U@HbRz&o!6)=i*+m=5%IJzJ9EN7ot=o4<9sHV5&o>&NfckPCoL(e3^H zkPIuA;2i(5`89orwa+2<+Wrv}7aE}fau3|Xfs>0u!L$u{Hh{mvuh=45??~ci!_os=kA@K_X+C;wW_mE$09ia~q=L?mo zTfVfzDxf0#bquKfs9I`(%!Z!d>C*#sM z^&fSTLM<0SCZul>J{K~y7ch8!eCDslG0eX~1XYGvY4wx@-<;(8_`KloxMhHVi)p;x zGG)4LwT0YD#KGa=GkR*(rf2}zDdZ4_F$iCNmQ1PmkxHwc0Vs?O1E4H^pn$fn7%j$G z4BEy(e|Np1%JF#$!Oi)K%Adze$fl!{@*~v%QSVF0{Vx&f8=4f&QBlv&2L%$(x3Uj& zB@|bvlqpN%>%6{@P+D(<3~7?MiopT%$PfLJ{Ag^S+X~kOcf6=o1(z-qL+`sj0M`M< zai1HhTH*Qm+U4Of>2*@zPzj^}_<@9tjZIYky<3tvA;&(l_DZAw$a7P8M>;oC6;?1X zF`tp3Zl*vWxt4pSJZ;hQktniXUw)}czClCu`J*;t?wgC9QN`I(?Y*ubj2X6XNraId zFkkb+OlVzakT>vP(sWt`z$Q}Jk>HyY13nyJDfF3!`Px(R0REs5gmo_;Hyx%woRkDN z0t^)|t-aCdpgI%nxdqm=GVN8}K z`$J#vtcSOq)BDd6WN+TSl|DYU-T*iw$hH&VkrA?cQPamO+N;hB7FA45DS$T?B>|P5 z2DTBL%jfmAve1XuIJaP}?19|DNMA<7QGTId{0GQl zN%8pGOW{w?&$4|@OgH*?O&5DSH5yycyzRCgkAN41kee)2GunN*fjK7NmGJc*Vk$7P zMzhR_gs0d{W=a0#K?+#uXH+QGh2s=<=~+L!>+Nak5?}7mPEjZ$slqI6$0e~N*lJaI zfiMV!dLLlQjfWq=Z_v>NawTH3np{rIl_k>01D|f)U$dBM2vXx?6Y7n^u$C+e>pFph zIs`gvrf8H3-T+~gq~_-4>Iu6qZ7?0_ug2RR@8t~D65^up5K0xAu4C9`2}&HbAgdFv zZ{H7ijtXm9?iN=;n{t-cV_|;)6^Z`-98B<$6w(6M7r@i^-kGacndFrOV*QeHs7^-k z1a-S#QRqE;3?#6(U?HzK>tF7UYnauS+d%hXKZ8sPDh3Toj?>%as!lXt9q|$N!{Ww*AW>2zWfis!T>_6gW#naWx)Bg{TyYeN;H; zAU+X8qv(fKiYp6J$e&Pf-Jb$sVq)gR1|We+PX&ZLnk&4^;_N|_dAU^h9P*8q3jy^i zM1O|I4YlG8RCQv-u-ovIRJ39p#L!!>Ar2vtVtBD(V$9Shu2KB)rcO4oAFP3kWh`Voa zQBl%!*+hpL1KU|}?K)FXqw#C-eD%XyDsxCHzV;e~uLyO!9vmnBrzWF6ZVZ)RdXfa7 zMS=A;kmOY1*6G07V>!o&_FP!aE3heEZjQls$m<I+z+QeYK zY&GVPe09Q#?pO`t8l-D7gx^%451{)q{|j6!h8boGTVUG&oU&A_A*(K5V_9Sx&kAP@ z8%7GfU{(#B=tU;Lv)b%BX^3x_N8^iqQ%y2i$*TVNsHj4Di2_Zi7Z1a@Jw$e`Dpi!L z`9tlZ5I&T}A0`~wSx`iMzK85elyc2GK@Nu~((PkRR=e{qyZju9=Vjl%Uy1zQJ1F}5 zxM%C&K(c%z=(msmO}gSN6k>cdzP1&=a!PNF`U{f%EZ*3fLIV% z)cC#k7dI6P_4J0Lb;w@Hl>60XR)#Y;rfE;2E0d}NZe|E^vlr0Ul54}s6mD-bpRwI* zdfezeobpmrM1Oqp3uq?&pgY7o*H7zAQ>@H@8Q!D_*v`WXY?u#`Ft%WxR%)fy;3}-m8GE|0uTXyuj0EMov))dGB$$YLjK&7F)+%}2Tnh2lQKFxZy zUKAE_D^p@xLeZw8O`BugIwww(I9;1&esX>&;^6weku6x~i*fib^p=>~=zDkrS`b3EC&Zw{h@gw&V8sV~l6yL}WX5`b+vc+1CKk3~Ns6WmA`kq77gYEs;dTId28S6jONpXp-lNMJq0vA1uRt6<&1LsiUL9y*6%W zqvuria-lVPt<7BqbRS=T=~iO`tb%j<+}J?6j%K#b@O9j7w-tF{SJGPlIL19-6u_ta zov(%whvK*;@)ZwtoaMv;iS4R*p++{%ur^23R}qUMD`Y12R!;)lgj=5rXc&}4Qky+?w8$Dm%Kc0N5unMjjj~~&_*M*artY*JmL;(K! zb9VDUhS^X_c7XDA!yvY``M{~TP#5+7MK^_6!4`IvP1^{8RCeAahEdf-v?%|jSw6qU zR%}SDBs9hDJQ5X=F|uSVZ5HV!JT#rSo@1NWITELL(00V9#Fk5Xvx15WX74364dyRg zTLWp@SkDHzUJOXAXoBBtyZc;N8&<{S0h#|mc;TZ&hlaIf4p&rx_(FI zmFJ%sr;8J12@}3Q8*gv?mUohW!^BPQVf1nh0fQw#BSHq>Ol-7_BLDW>dp@`Y#=_}X zK=?iD47hmCaIJ+RG^NmJaD3?}39|?$>hFlR6+P2_{4YV0MRE3)53A>9iUpcYwMiCN zK^2u&Fz>s6EL6Ib?f3?M4)@y-l(r(5WmQpV7t~Mld7I<>fmsr(xXQr-^m{{cp zmW}S_)WtDx9!WIm6Y-c0Jl_s=-x<~AyVE|b)sIgK}) zWWjwtJrD}Zyspse4tO_uTbXGrH8;oX^x^e1Q_rHDx=fYfYl-(WySY;}S7=NYwI748 zAps4-cko=;BEdwij}p8uf^=s>t8deTey{(Gs9T$BIBzLeyN?E`A;&S6qtX+ZMKan$V5s*Q+4d}z#$6(?Kf2(A0%wKO=$ zkM)cu<+Q;^BpQ?ycvv1?-KLp+DHxkj5qsZL{51POdSx^I*);-qLtcck{YSIXyk zy>xee5?-q$<~#-AU^z!e=3$-M&|_XVAvXWTw9@kAbo8@Gli>T%ACC7z;YlAbs$=+Q`rGk|UcnM!|-oKm7}5 z3;8tJx}IazmwC&olG*JlXb8}rBtRc$dQ&Hv27)o^Gvf0=J{WIo`Js6|u3}$v{Hjtw z@*B^iKIC9q3!yh1{n=S<`PjBGc%^QZaq32ZrCe<*J?*g((n8#@xwL$nG;2W_rH;cx zQ$|9vb#{NmdEWPYhMSQX5#=68x1mg?RtU`XRT$&HudG>4b-Dv3N6w=sV^2 z2(PtV#9WE}X_HNr7EulBPfdM2xMk#3xz#o+O1Ku5udBdhRr)uQSgbxnZ^O^`=#1-D zg1wt%q|uzg2TPUyTByc!F037xAdS*ud#YJF0`>RLOE8BjEf-E%(lc)lFy_?^)}36< zI~%>O_LErebmAygq>4qGbHo+XNv}NDoZ^CaMTVyEzb~jsnzvx44zYKrF$JOz08b)5 zKB;J?3mh0DcrX9q0$6`R5eATTUMvpg51#zD?@t*3`*;q#3RLpjg~nzOjTmC)CgFl9c5i07NeE+;#J1&Q^@EZ z`Fdkd=EEQ+bz!%kNiXk{*9%HISIZYuZ`}R;TVBUobXOzxY8R|xTiZd=d>$Md%~7~+ zfS1bp!KetldwO((T(;pU+NAKtZv>Zx>^xIpG{shqNDPf^i%O+rO*14CxG+R?XK3sFAzTxM6nr{sIdXzH1 zw0RxA`J{$wa8ArD72RD5GVdqT(s;YhV*c=&GVA?MrVuKIU6-!ZXMLa2x)$ z3z<~Yp9#vF1s?mR0k2J-&kS${t=67fBM-(t7eAVO51flh^(L4ey)2A{>)dk)+(G1 z+n(2+!@1=BYy(*qC)N3BzapC2$KzDpwzQeUz1HlFR9)^BN&T{Vl^omF2TVL%n~W-` z{?cF?u4K2hTZCNG$`8{_?FeB+^RY}-f-pFrFfbvpJRcu!G(ENsr=r^T4_scCL&VA< zv6tlZHnQr0+4Sddjm;D(owpGWUp|K6v&I7&~roFPX$rCuNsWtjTXJ$!QeGj_kn zQ^SvCj6oF|88hyQzK1rf()>wsx(Vuvv~X>_lU2{++z;`w#99>Wj%~z9+r?QNTTODy zh$k4LMSg&llS&bWsSUDMp18mv=`?x?Mz7@A8R!d!&Ei^N2KdnmC8|?vHYenNw;(iDD!8ZKwLdvTj+ zbRxXmIxSD@bm3U$G+0SZI#^LK7~Z(T)$=VQlkN{A7dE#}@9e7ma^B8wveZMo5zLYY408Pl@HgH(t* zZrY))Vkw>Nvt<3W{@?s@(6b zr%bbjRYwx&61s2+M3D9j3$&ljgI4*jjgl+d}?Dd}-4{p&lL#Eq3ZVn6Q_U6H;F2I(em zVv@k~drskit9y~=3&p|F4Ekr$?>(}SXhfVh;ix}40#=#MM4>c;DOdKrzS?lE1hl98 znKpJ~z{9Nhu>XFn`tEi2!SkZ!2gS!3De!LCb1m>=KuzxFpDcslo(r z6sN*A0nAy*-S6ncs2oSaa~`l5T#Ye~`L9r(NJUHVy$fdQO zm&TG05OFnquQOo4@#&<~1vsc$+GG^3E4*e&o7ZIUqOntw7v?!x3Nntk^_Re;6EgD) zA-DN9g2qD~8|3s|Hdo`KV5;KUG`P+>IWCmHu9RD=DX1;uhi>K@K(+2947L*}w6PO; zcKK*68@ksTiF9ri0I&A`SmyVc-J7(hOC)Kxn?dZN~l$ zn(Xuflhe-l_@~RmJ%^1o!bBz$!WDvcxpt^hj3-8c+C-M753b-GMYe30kcT zd3J-x17%DEZm=^;%;Bk-C`xLc@rg$kOE`eym;>cL09JAs*uGVwNpX79pn=GO`5wvr z{?;^?Z2MbSQtrWw<}}QEqnoZnF;-sQD#2dO>7NB%)?tU=`c_%zb4Sz)?rkedUc?Vl zIc^lhUXyacJ(VB#T&i9w$8Y`kHc$)O+<(i)`%D~iq}z}Z9Dn?fd);c=Fz;)v<_qD|gdt)ERmd5ptPOYmT$RcZj^4WG-spN4$3r0 z!T%|dNisb8CRnj)OM~+(i;Y1ewG}xtTyBcG=ogxshtro;!AXj{YZKj=q&SpOEryil z=NIs<2y9~hh>*A`p&E#yx!?doAHnG*?gt) z=97l}TR!8hyy0qvhw=WO-Elohlqd`{Qsevt5R7dO0gArN(Q&iensXe^9Jr9f?EB62 zVj^srQ&9u?(&pQXCg^i7m*&`S#4SAN$J=%t)<3vNwqAE=(mGm?fAi92A(SyL4HRY zH9LOhWh?yn2+|7Lm;!G`{P^xkp&u>@Qka>JV3^T*L}se3usodNSVXE8v8AY^Xu!pK-3Qt#p+ee z0gG&_9xpqG^Qy;j0O@jpuc=GC-uicpDZzYa8{K9jDeM{s`keq@XClK=bpa&!PuKt; zzmnD)yQu%Dqx~4(@n|6y;7|A>M~`C-S3!vO=b02}2G_R>eC032wLc+9rSqgp#M4t`eB*ck5Hf&X@6Dakk-nPqI&mVmn0OZ-#!_f)Sn^SK8mQty z)o%VeF2bGhj>T7{L}MHv?V8Ri@uo_(Io~NWf;AtHUg!j0hwHY8 zwCWq^$yU5$gn>wb3H`I-{LOMhww5{mmG?_iqs>3U;f1t^^x9C|jw|*m;4XagL4yX? zoV*Dms{O5zWHwoPWCTh{A7nNZnT~YUtwH%FV%Qo?x4V1fWmGSsiQAVDY}!{1nt*^n zsm@b$;r-V!LUbI9zqHUUkB~DOnFdt%2?D9K4pT&YHb8{fHh*`QVwyz;-_=LX z&dMG|OBww%ntQ`pJQF;`iNv6-t$%M@dl=z{vT%_DlPM#dVcU-vLy}&HGbx#miw(D# z1(P)?XKGp=g0j*OkgDQARHO+ab`@$N+Ccp?+62DBsY^zsXH+gkJRA!|1jtsD@{O-+ zQB-JUFIArg2L_(Ow`fQ9rQb_53b~9IEY?d26uUujGnU1!e*S-fNH1U#+V^rp!oC67 z8<@z9|I8#0!s*A6FkF^O*)2S+BHrhR=+?(;%@C1aZ~*Nn{#HH!@;bU6U?DzumI_+| zuZ92ygahFH!*AZ^{FOc7`i^O#A|M>J0BGEFEQ8;evdkhQQ5nG>a;J6y zAdi6tCSc=VTOJW5q@j@-2^AI+!okOT&HNV{94Q1?FBzH|Gtxh7pqKz{r4 zTy97*mMt1#cz}fe=fs{1vqW3sbN+81|NbB@3nYW-V zSI0n<*bQ2(D(*6L{3S%$fO^rt-Z6f&)@cHkum(k*XRY5`u_GuoeiDR{TrfH(@jjW`o`PCqnUX+Y5KvB(eN^N z>wUi2#Ou`ETBCwc?p#WbX$}O;7$z51D(zAVWP&HMt^Rn$ROv<DQdCK;OS zCD%%e*y)yfyCW3jJ8NPqv>PQfHWxPYhIAsqIk6NyZ>Hbc4$<#pf|Ep^r+8Op*GrGabH`Uk*Mk&A`I0;Q58`5uixLGM z5MDzk6=}!8f zlX0K)w#XjBS#JANX4<5>0B8V3I1jAH7D!rhvB<9%V zrtar5F4dl&F{43wia_um8J*n`gKX;x&s09RS1Ir7r$)~c9w3ApKV!Wp1CAib z*7l~&Ely-w%wW4n{)%xGpc9hFBmA|NJ~c4kp0c&;StNRgK7*RGR(?Cw4x;;{j(!hx zJanp+t&KA2kH4=D#1H{g=Bh|Fx++0poZ<_OZj)08)(>!{@qvm4rwMB;m4Z;eOk6e{ z*l=%(j~saEM^tE`DuwL)j`@t z>r+*9>+ZBL(?<*Sa(?HV8Nno0Zye9t{-&gB4i^h?NNUPT`jbH0sG5vm?`2lmy+=X+ zPIRm!o47%V!R+?s{A_B?c?^>KR~bxgTNo%sn9|h-5@eRNesy*O+^rD;UXn+rx>Dww zvbMcIUGj-`yG^sF`ii_HiSuUf?IDSBREGi5IWb1;-I+Y(mf8BBI%U*suw(JO$hk%b z&$;rC_xoE-3LP%#j`<=i7D;%OBrERGFDLr_D)L^!#6y-Ay&o@{*jr~S&}i=rX#E34 zP2aseUb^J`*eL-MRFMH0?@JzDyjGNTQ$JY>tstiDRo;o&Eb%E@%Ec z`a6TvlzxzWKCzWnQ#2RXghKXqp10i|3kAB`j-dXv&FppO@67|@X^Yr=iRppJI+sHZ z;H@^!5QKCudvsADiP?-<`g9I!M4NW$jf32ziWBo5V6eH_fg@s@m#GVHa}G?**E_P| zBZyMt$yqE_zSmmLQJdhm;m>LJvx3#i?3~o&X5&EJ8dy6rx<7}REw95H*~TwUY4Y-+QEH^Z7|B|0A!yy zp_HBVmA7z!!pW+{;%gT*hoeBVXt&7z;Wzb`=C2XKPc4)4&Mg*2a92TI#2ilRS&W#j z%=zA$RXgFsTf4L~k2572v9%vQ^ZAm?%Ewk3b#oUH-B~WyrC;N7ejH8yv6JBugsx#A zIpm+|K6-TK;B*;!%T4Cx-9}=(yRTf7ca`%0u=W-}bu~@9Xds*5?(S~E-Q9wQ-~R_(|G%nX*=Ue zZF%ycFC!^&ZCTdf*fdreX9t5|K1vrJG@NsLIaJ$a)i*jYh2C5)0pQHh|Ay z(zSW=!j!p4Jh=GDDUZl*9NP+XwSu#hQT*B_fNl+MvVKmyK$~ zTMq(6{8?>0O9Row%H}6=z*UhW?}JO@{e~E9W|=nE2*j#LAu%OxT4p!6{DgL#I&8T! zBQ}vghG%5`QjxnzP=|IfFlERXs$+jJKR0qYtNEo)M{zR$B2Nrky|ktJc5z{+dp$x4 z0)!^>aVJ|$jYQGLLP_ie8GDf(Uw<%#CBIbr=6D=>S9982khr$~L=(p0jQgvmuA()l!u&>Y z?+*Tq2-9*fC#T!f>6o9VeguBoQs=z!q+52iSoJ&Xu@tJ~Lj#AW9QVDOXRznXSB)39 zK>k4O{lomBfM_-UTg|m(EgwN5Ax}wFEQUOe8WXe@?gde&6YnH9s&+KC*i@IJ0(so- z6I$XkPuWY&!jM}54udp}mtD&$ZO2ThFY&Q&S$zD((OfJ2I_Oj zGhcncb-ErCVvIy7^;j;nVxXnIAH(mnpWEk+0C=Xn*=ytn7jO5LBF=Lv`&92Jfwbwd zRnErj%;ez6FMzb4Z>eE8!MhV`XBFfG+eT&zD+uQ(c_2abP} zLxX`sqRJ3C9W~N?73#l1J@zCOX?BnUwO$i-U3r_TKAIwuXuF8Q6dPNsaV+2vcfQ## zVhYJQkv&~X^YQrvgUNJ@(_+w@%Yj|M<34T%_Vw{*(3JzC)+BEo2b*ci=aN`sRjP5* z^!4RPvR4DEmF;R;<k$&fG%M5^ql*s}6Ayx_q-fP-}m)dCnzV*yD(TCPyo%kDcPyN*j0G1c#@yHvF;WUw7do|4JxaO{EXzmY&DbFvr^qKl(H3Wi`qL>}0i|0^Dcn2IJXkLNo7K~0 z+A?nz-B1Z+9SfOaZIT5%dVMt42l>%~RfbWk!-7vCi9>pW43E)-e20NZCVw!cLD*-a z=nhYO_)6hZ-1hxhuBii+QtZdAVBhT4TELRP6JDB>eHwW(oDZKnTrLL^ z3B|cz)YPwqS<%wS1*z4MaWDxZUX9vR70nh?vVVjBP>I&^-PvYdbT|%U7Q=M*dvBe^ zgb*1?FW_O?Vt8X}g284G`xSwdnZ;*VJbt8#i*U8zQ=nG%ZSTuL<~YzR>eBEewq$U%7c=Le^;k7Um{C>anc zqx_l^9p!V{gX6P(sl7fc`oBufqUKJQs{s1gs~-<)bvisevM#<1am<-+NQSx-hzPp$ z!H67UU84r+CGRi4-$tT1CO%Ov8F@MZdBApXH>v7?iF{_IfJ5P<*dvfA>!0UE6famN z&;=Y72=;*^NFxP=OJEv@J*>4}M=ID^e0|JMmy2?qc(EC`A@ysVEAz2zdh7dDY_YJ& zQ%79QFDp0e`Kqt)2mzzD&2pi4O9X1$bP2%3T%Yx9NELdq&ru=?eNB*foKDzR8wJIz z-%5%|qrZ0FZxdk={LM{ajafg~*;cVgDcXy*RB7DH1m8ku?L77fPb{+XWXYSt^epH) zQyzO%7h7^BM;upqkqREaz^D*0DwD5)=$qyb^scc*(%g4Qk0$&}&Gwm+kN3J(2J|lt zi96l0SIQ#;u$$qbIep`&!KA z(z~^Yk<=k)4_SwRZ+-&^vo^maS0ZtxFlFQN_<0=rt>o?-Fg-1~zdz)Xg|rwzq~k6+ zWn^Gnv+B^AoO656DTT9CGVP`_`TTz4e7}XyC2`qHbBgrymC?Yba zrxwGRsIBLvzH_&ubH?gtMvaWp7kd3BP#fyO8phggaxm&L^U;MZ?M&H3iiL>rmJX{-6CPi*cNI~31Ok5z%}NTnF&V!p#Vq&Xr!xtL}>6N1)PX&MX6 zgJ1l3K-?sfOsG%M2oG4fKeRyFpa-QaTXm(%X zeUmG4emEH!OEo#;{jjTwx*OON^K9L-tP;`6IUIHRY(FE**)fb&4&BOpj6B3qEqUHF zMi{iiQ*` zWOM&&4|2ZDnGjW+VrF;!TNDEU=EaWvN=Yfz9uG8&fy`k&j82D36H}M<1>sqbl&_IJ zClcV=lzRC#Qb#NVCu`?;6!0c}ZigcKIvN==n6xt*d%irHoP(~$VE@`}a*q}sJ0|tE zFGv@_bAGypt!64VELPdiaXK8>Gk0UR$ z1;9t?|{gA+v z-obP;Wb~7<+Hk-zX0Ba{7{ow%x-l!zvoNK`?$#xaYbD+wXv;f-&*eLj-KO((0Nv-S)CoQb;1E5%V4_>zh#%f;mXmNv}GleF6)!;z5H?V^})B5U?G0K ztDykF+S>+Sr%(&s3zPX)+^edS!il3)WV6RGf33T1y{g|EQ!8I5<_sK*%&(5WYXOG5 z+D}K~O;pwZ+`L~~ye_TIta11P!#CQiQcSG+?uOT+!_%@2Cb6yk<(ppcN3z%egM4w@ z;i!QR$3J`BgHiJZ=7ThR`fL)d1uYaQ3wl@HF7vWmljO`|e~_rkg;RSBJ)FW#iDH4rj6zXU0SyCRyr&Ksp( z3ci>u2!47Ecf;~7Ky(K&=C$4c zuX8=KgCD^zOW6e49G!)cGk=(9VmvfSiO>h_Jaj4XOLWi1YSeg~wGU6#KTU&QTb@$Q zg;a%7Rl;suS%eJVUMb>nexxI8Y5w)mNrtKK;P_qUxA1Ls5fGO0OdYND+=KEP{3fr| zIfc0SDuaG)gj31RYpTEg%WfYLH79r&m@JbKh8KsjsXi8SSREjS3(_~UbXo=&?3%*p zR|r~As;i%UiEI|z`D zf4bNnXvQ~8wYbQbyBJa2&595%(q2wz9=zF|cQkY~J<7svO=cb4J$*AQLn|}h(R64% zz(R{#{CgX5SW!RZL4cCXGKKHLoC)1oKUhGh(TN4B81nfOvt~%jBflu80wJbfrr#DKzphtqDQH?z)=ezegJoIRs2G8v*`$TrcFYsjrgb~isZuS&4!!nv z%VVHdcPXekbancQa4Rh{eckX0&-YMQ58tf-QQ+ZP5b49jC}X^jv>(oE)ZN2CSRqA% zr6{3o80ZY8&x%Q^g4wJk*c#YAU9uCRf%7dyxMy-Z#?pA%J-qE7L|%PmKLns0Nluq2C}`YT;xMW$nIB^ z0~ou`IQ-e;Zzy+M<_p9>%Pc{4R`32sq%^qqnT=UMnv0ux=>{VpQ!}%`gZ{)U>Oxlj z)VH84ULn2ry&tCwfd?B_s`Mf6BraKW*UWr?QcrzMNSV9D-$oJ_5Aqq8|0#8K;tt=# z8qeeU3XYVt2gc2Gq+r(7Y_Z+Y16LYsnK6g7?Y?jRKuW5G&|1^&xef%k!Xttg5`tjA zj&D>6B8@z)C_Z~3b{cnjE52nq>t+!C*8aX)N_Y~np`A0~o^{QGq}QnH$lX4$pN3Nt z7rT3}Y$e_zNf7C`$hr@$N0PuHgZ)K@u>5WIx)tf_7yUXUKHhvLGgF`JfRLS4CD39| zV)p&jf#sf;-3ctO=l*xV$qvL?!P?nH$4W7q6|Ynoh?QIF%n|#uXYA3)p&ofd5bL*z zu9XNe%xBJw!2k{!N|;4{7x~KH1jg&LNk26ve|`bSZA+1z{(vSnD}LbIW<)#^(mbhD z(+UKlPmqNDZmRGXd_yt{nnzppASmXbAYOpfa+lwNBML%WT>cGv;&1hm5%+H`haye= zrn7gHC=m+;Y<+Gwh@a8BeaVWBS0usu8^(XUEm0eLm9a%H9{eu*2N$Z=GU+;QY7JXQ;j-> zOdcNTE7j>Xnf%${7hXZklA_#S!V>B|b4diRl35I>w}Q6HbJ`e@!Z84Hd}ZW8&!dC@v{(wzCF!#5#*%Pl-27O7}IliPIbqMT^)o)QiQrSF!WppoEdS^?9*yo;!!?S_u zeTi1cgz4Byra~nenlMQ+Oe+i$*ql37;M}rX8Tb;Dtx!d;UMB5%eMo9+d-7=lZfPz= zSp3ux?$!XM4Z~MvHhzkTD6T=reNeCQt#N;JtV*FmsWxa97)hjeezT=Pp*Niyma}=; zSoh*Dx+FSOMK&uZw17FtBR8=6pCCxUv+Y9+c*&&zhhAZ*t zoUu_+u#iaNBve#dqtnVCyqC9g?_b(;x~>qV?t26+o4dhrpsQ6Nhn-&vo;Sz&OuEfu z5rhkDvHK!tfE9Kg-WfOTi=P0B*?0~VqjtUgF|`@;y)g6vOx|d6Xi7MNSPOMTq3MT8 zJZlOnx)8D+XmEHP9%#bNU3tW|J-;=+6o>HbOc}1k7XCu* zs1P}9@NJ@I&?E&=qzxhMzR2^hW@VAw1-g#sNH{6nhKLDwF2iEbtA{nap1jldzQPP+ z_w3LN9*dl<)ZR@8JqK@9XW>@}(w6sbAMcQvK1~j+_12@4A|j zLHy!=!2pG?YPM)#5sy z|3_UPsDZjZpP;h;%Qx}2eL$-L^vh|1QUCu@*K5E-o$|CoZU*zeH*WPa(72{mg}x}l z*v{V#5yf1!|FgfUB7hnnG-hMLQ1Ag&8HgYt{BIS7L(qUw{hZrz9+)IWk~cFccU4Mg z|Lux2Kj2;J#$cS!mMGtX-Q|>(SpX)Wu>bLdu;0hBHPTlQe3Yx(1(kE<(9D`p9%=Gn~Y581cUDvA_dG+H-~4a4m7Rf164Rkb-E*MoV>& zM7@IGn#k1RvtJ`^^B{~DR`}Gy>aTWujDbZY^V9SsoAMeq$AS-ODJ#J_v~jMVMMn-sjW#{BV#)oT)nX=UO|`j7i>mz#gIa+Z zC!O+J_5{l&A1AD>@r!mIK?@vo?_7z4 z#$tEYE+s()2DykqsTV7*VJLrI?`(qUM4V_k1{iT5L2kB(l=#2K_J$&LKGx#o)PdZt z%PbFA~lB>LK@T#5{>8p`y#pSn#yXF<&P-x)XwdRt?Xg6tpy z;HVl~noujnO4L}Xbz9Hk+?E!$v`1+m+00#BE7N31UDu>EDSS<2ueM>IPByMD@2BNV z@rZi=k#H9opiR2VSTs3Ny)Q}4Tg-=$_1M;rkmmc#IyJa9ErX;ul{Nw5_>i}<<{T=Twg+t-%4zsvUpajvj5RY+)%(Ye`k-+0t0UE zTltSq6+@S&=1FVAjcPb%RDUpFf67-014b7^EHVPD<4DY5(;82skTRQWtI%=G{$i>A zJa!lYD1nw{LZvrg;kFW6l^K)Ms(H#$H+nl(YX`=s{e3J0{8)#C(w{lNK;C&Um0-m! z7rhSidW6-5<{zG+YjL2*1eOqY|8yZINtMAU<{-)STB$B=B~?f+Vu}R%zsg_$;Ox<2 zeJ$k*z7rJ($}uu5(i{@W2XW6^8CQu=KU~Z z@nqD!(1eK3si8}Z`EMn@0eX=3@2g|vcANdE9yd#_v@|r*%*@#VeUZeflLZQUi%pKO zm=b@^Y2Y=&4;E1M@|_}B;1#2h&Od^XZ!o#~d~0a(#~0|4-YUykrgdI`sGHj6^*;hf z@)1mCGgBj`03KXBElA_RxNLw#eG*7WUIbto*$uitu{P0%*W8BPFcwoqr~+=Mesu3k z#G&A^oU{aS0OaAH#fbtMycUSydjF}%YcwU3-kh8qvra#V2}V{53UOPC(M+L4n@F** zNcp?FX33RP)AV8BZSvo%2DUL6YICNNgsTM~16aVS;yPCcUX;W3``A=H6!9)K735My z+)=7hIsuefk=6Wnt*aW75n06y{&Crhi;G;-(G2r^fanF0%~DJJZg>u|+HWfj@I?#)4XIwaHG|Ek9s@R$kkP5oIq3#mVb^W}0lrx8)S`A7Mw zgSn5O*wy|V2tSa13|})=ApEcvZ2zC74=_IfdsI<)WAONO_Q2pc1hNOdVv717x{m=E z{t;Hazk0lO`J=NdBsuwG&OUG84_V*mt6j8Iv*a(|?O{Xd*R1hsJTVLkH2m8$;>}zkXxKzMB4IhaE!U1VhB z!89H=Ogc4bpT)(+Ln$7!7zU7`VGix(^J_To$Ef9iuc}UJYTqwI-+{Xz1BlD#%fCLQ zU=|V@jN(Jimr4A!{?4FGGYkO}_?{rRGl5YXI7Ucx_W_%ADU~HsC*3*=%rod1H%QIz zpR%_{QdMt`mx@Kcya~2NVVq<1Z(w0@x|vgMuJ@!C;CD<}78jo@uTVk&B##rc7_hee zp$cWIQaKiG+ESQEP|$?qkm`Biai%PW1_Y?RGSC~v=SV{}p^W4S&xx#23#wA2h{gGK z?}sASlVzv%ZdfUzH%_j~kU$obT=U5SW#F%FoZkh~!}7p}lhYa`T$AspJ9Ka~aEwl( zh+3G8m2LYSB-=Kdppq|R^qm2~>i>fFNJL-b;5~ z&ITxDl*5t&{kaAGce!kL$6DtvS9YTTUZEEkLl8+64Wx3V-gq&l?_|M`*WC}R?F04y z!WseVf^6&KLY<`)kW+@585~3I&6|4+n#`Qa}YQ`ivz;kLeY7ks{gek z?{w$K%WPmZq7F2J&ASHB_D*u6Lc}mrKvp<^sQ$I?Z6U7{t z{})G-@#&ZP1KMO3Qzp9kB2;5E)O8YILR5&XUJWHW8L#LyJFhn``?b}4%1_v!-iIRc z$Im08`wtBepuEQccDSo4p9V3p1rIY)1P%dK>8cNq;o4I<)?Z)psLCx4iu(^{frTR< zu!1~L+TGkLe+klBcG%iYuUPt6G=Eb5#e^h^0@$>+I!&6WRV(m0D3%|s-k$HF^L{@? zZu2oH+anbLSpEERQFkW`BbV{&)Q|QjA2B!jhlb7t6bP&z$7Y*gZsD+(C@rZ~er|Kr zzUi$1g!g({_GjP|+UnCJ_kM20CcS>fAg8U&ak0uC*q|cZDFL&|pqED1a3b8*pljL9 zb$wV+*!5n8@$JkMJ9r&fwF9}FEGrQ3C^GiQ-cKI(kXN-!AwumSNsz?fk^%WtjA(p+ zENXeY+^qOIUG6HEv+K-J`}2K$O?$cansJMDdr%=Cj|->-m_f@Dt$$xXDrB~_lIzy- zVAdl4hsI==KiF_6MSi-I>APS22)GTBlqYjPgQUo#1ly1i@qf!HT8jZjF;a>=ZvTT8 z*S$TZ5@n;|qCI7xI~tiFUg+Kr@Tr!{_cowO6O9b^^Rkt}cMMCv1OM zEI^FiKAoK|;v(YUSmI%DgOAJXkbv6Bec(-%;k4B48xMl;7U=}Y*G%t$V?PD~fz@{; zR@DUI(;h4kSREK08_GwH4!G4w#^% zsFd0rUqro)rf%o_-1);=-5h~7>J=ZDM+xdLj}l7phDrD9(`)->rKg|JiiJ(8FRbpF zt*w*e@zi|s2{L=bwnF&t4%BT54|!^oR(m*T%OjR3Ra7Gyno?nV;-&l6pL9+S~cG|a&&Jq5MHfDMM6u+ zwnCmuU0rn`B=$cHLK9PI?4RBO@D&wD@26jcB};eSBhLQ(?q)A=lML*E<=iZsl4AU2 zX3P6@-jWeH@u`rP`(Xb7-1dlAo8Z1=xW&xW5Eu22xulU|OBr0Fr#yCTx!#ZO=BIdl z7nM5104*wpVhmnsJCR^o-O(i~$@EgEx)n@Ev0?*NY1l-=NZB_j?`}Qvquu~#)wIk` ze1o^=hhJ!`ubr#SM)2@BEbFcA=c~yJnn~maQ>>)padGb?Ev=vqWxN5oTZLksE)Q3nQ zoK3drR)DY<8|bzjhD$?p3q^?4o2-SD84=#SM{N?7k^k{6fNRe-$?|-@nWA-g*^)k! z>ac78!jz^hcN1VHIIPDr#6Hy5aD{YRL~_Kzyciw0;SZJDDXI# zaLk|Ao`vrIXzK&>gsFX(H_&Qg_lqDFl;b#fU!Z=3r*swrRk!~&AYeR=c4&8V{Nj?I zCg2>iUjmG-2^z65i-(7YFEro4nm8lh*}D$+WUckex5zKP@r$HQw<}c{VvHWGNHu+@ zmjh||W#|7D8xTPD^$#0RhTDw&W_SC(A=vHljeHCs-_idxVi*JMYwmlh(aJoSkR21) z^Fa2IjZnxl!a}`5yc}<;a2?8M1slF6@v%*6oy&xn%a!Oot!fb7l-t*9Vm?V%V!=Go zyL06`G|GOBd3@KCiJrz+%0C~xI3OweXl9mmn;dvL%lPD|w@LKTIYJ{HQViy9a3hBmv@XQsz33n zI~?V&av#cQtqVZf`6qe2T^e$9-_5YWdqQ4kJz2+u|5w%?FY#^wa6TSh>KaIIjANf%7@4V>i?yIH&K|xB z#0zyYK$ZsByS%f4w+9eU`Z|~=p;dDV?l%OeC!zC>W<8h1=W>^i@ui=5^2g6Ed~z!V z-SqJ+4;oI!{>zpl0kGw0lqFlJ@VVFyi&4L`n6P|bQRZZDd3$}NZJ?7t$@vV9>{B?H zDp-v|X{JE<0q}c7H6*r|Uim^JsoX%k-ZKu^*L9?c!m8Aj0>Uuz9VkPFo?|{ka}5W{ z157!Sb-znoi?~@>qUW-e34*%V zrgQ`ks#RICYUXsa{SH&z!5znvx^>6To2b6Un@L5p{#k)=L7~OS?t2c_kc!6N~xe!KRxScSbXCTX8=7T$*?m zHpezoo~wQAhU>0z4Z`n|h0LeCnq7CY`CqC}DE^SW9N~|h_(ld>t3HZcPc$7|4t1KR zZ~~Al|GaFfa}Tej;GnR58J<8=*SjtS1zbS@Fel;Njzfu9_h<;PAQL<<^9oA9Q-rd+%%J%D+)vrH&*U(>XhqXa@R-(f8bGCE{ci%3;!CvZ z2}THcM2*DlJ$=i*aa2$t;7RLh^*~=}2wKBAqt~zob!*IXhT{(;Ggs6YpXez}6G9^o zIuaXv;s|G3Wlw(dZcd{Vpv4%5<;OgLbKi72H0k0OlP1$=M6K*@F^O}Vg%JzaeeogW z|0a2L0EE|sxGF@s%_2kFH(GaH7_|T%!{D{eY z>G{co-E~0PeYxHdy}Gf3%X+0Y(~Z;iw$gjkYIgifwSJ1=eJf#jCW}(S)BhtCPV9Hs z+QLHUs|SbLDK@(onG*GyRx7bPX6KZtTAv-AHYZAAU^fAT5FV15N$YjYEH|bOo$^-p zpi}pcJsbvU)Tm-NyD79XcOPQP)tztUwdA&svpCj2oEj`N+jdOErNys> zCR`mv7uqSZOXVq3!4dJgt6+Ty3UsmCZeukof&Y6OT?Lee2O46IU7l(r-nLQCNI6vYBFf^9B^z;6!`kgFgh<0Z~BaI zMGn9FXliTaJOvC`Se^LVUmK`2E@bSsgC8n!+b?=G+WbWI@A9bTMYOn07bc0lcv5Oc zO+J*%goPI2x6HIwWrR(w>cx@}2Sy#A>_nF&-*fC$&rau6ORgi&WA`9vOQ#J{?wl>K zN`S)&rY+xRmE*5>h!;QXIJ>Y>GbQwpcj?nkci%TUdPr(UuR~oD_em5=EpMY|OvaH97(aYf?_xFd9kVrFptID;dOOGo7)jy%-}^sBiLO$LtPn^5Rh`CL0{4(XWv^h_17Y zV+f77?wujqLZ?A6VuTRx|KnBhq!_Dp?t+lSX!D-*q%S-#Vj%l~Hb1R|+QEPuuiCZ| zx~imr*TPmpnP{h}7BB>az^oJ;{~4pVmpcs)X!HtKfRgqz0@%OSgjl{!1M}klxz==wSZTF zl1xjTPB+EPdTE@^^jkQiUgQ?@HR@#ij-fy**`R#3L%Kj{CjYee>|9J==$KpN=QR6+ ziMVG+*bnj&dI7y1mOS9D@IF0zjMvym@}=HHuhE@3RcE1C1H8V8iLz1LTC+jkT>3P;y=|k z*J2AVXFKdW9QS8)4WFN7)jvyl8m4A_$HE$WQOyrT%;E-Hw171U{ePo*2@LrkG%vj1 z@J!rZUm)7X{III2o6RPF2$RydAT*bk2$7%sfMCusjq@e5?ZPiI=XU)TSrjHfN%TY_ zg`~|jPg*s^Ntu`CGs&(#iUjH&i9`xJL_8f$8w`x}2_eK@Tf^9c?{WaVYeTA?4AVgT z$|;Xc;i#8uO@r;uh$pie1)GTsyt3l(hh69HtBdJ7wgQIEW!u9GIb84*mZ3_S5M5Q4NU4Y_u| zwtx7Z14nDn_x!YgmqRdHy59(w7new~?}&!=+$QUbt!nZzBHBLd7(@}e=OdTIzk?iS zxd_@?@zmF*t<96#{OuDlW4$>Lty|1RH7RqUUgWvk{&erQ@y;39yZWo#BJ`)xwE5-~ z)rcO5dN1`7Z%+YV0@UwZ@_5)o>1bqweXOQZ2Rp;(c#ryxmbca!_ap9?>s(SzD7$PP z2yenvJ9wz*Z4!JgKOQ}Y%GA4v*?Yv%TfUPTd?Vurk|x=QbyA)|7lW^P!3zv||M_Vt zK)sh&KzTjjwrbK7XW5tanvU&-M^#-?Sk!kb_b7x(y>LxiI)hA!*mtSnHE-R1{xoK3 zf|-A<#mnzIwjcP{`NE?Rglh)}@bjru))9D*^v_G47?^M!l^=#}mKw`{w>v&$*eE{J z=UvZpm}{!u$!UN4v7*??zQr}72Z56o&PhbZrfNA&qeB zA-h>GNHODc+gBuV(=9p__$=&-*4q(xxuP`WY0>v9tLhEbd?)ausCHU@&O}+j5p%kjRW_1@0QUhal#rF`ij>4H3?3o7cpT9DA zG@O;Da^W|BVOpH|FEy{nnkdAV{dZ(n|>n-WlKlBhZq ztAViyMlbQ)&4MUW-C}&8dlxyhulo~^BW`+>IF9YGne&Kmkh+$KPwQnUa>_P>7a2`l zsR72qfk^WDk@J&$_7%EH=YA*VoEBoA`Pb^WNVqHXL19RWRSQrvvUZ@Ux^ISm@cP;i zr_-aQcgFMW2J%#;{t7Sb!S$~I4WUopaQw5h8|pZusxgu)KfdcPN!4`ADRU>}bCA3< z;e$R4PvG#B?3eVdv3pujSSl#erjjFite+}N%Y%Yi8h>8cM0Bs<*AzPvs=d!hYPY`7 zQ=^}sFtLz}C;x3RT{54?vd#;7)_%sWmnSe~%aR>Lb!hc_mCEiK38MXiE8==Ae4R<@ z(X%K@&u6qo=5?;i9B%mYP}A+HZZ-!hG)((3MZoi#z))Fb8)Y74fna1|`eux6pQz;8 zDtNS^Y;Z@@>_(~PUO9c6CUb0?AmjdPL}=YOb$_AWs{j`w(SyFQFQqjc_!kA{9c#_a zL=2?wkGYh#T9$eDADz|Zm>;Pi&`2QA?C>DwPu}y6hQ6mOtlQT_JmThEsiEqgj}ivK z_GjctlzzUkm8c4)Dd+S()BsVO&h}Ck6eZfAiN`C0>IxMzb2OGbloy2XsUf6by(VgD zwPqsc52CwLa!o_Gz~V~s2KF1zUJm8RjY0Qn)f@+Glpj2rn?iCqa|+koZV)_@;)8cu zh5A%GE-%9;H^Nj}4l7J?#EJMmBG!*YZczU0wEE3z--=;XH9~364--K^nO!0zHahxY zPmG2Sv|Zzfp)s#u-CjY=fBn>fJuqDmFMc#0)22!r^QOex5>xEDVj#4*+PmGgZq0$8 z0>ubKQ`Upy;UW)70m#XpYgr~sraUDI`88~m@Ephtk=06YnDS5G0NwMS%O8hS*nm;8 zsz&vnI}jg8s$_B38(mNGSA;&`p!fYL3kwoshJoT1m^^<=Uv&l@)1V*IsuqX?*>~TX z{vv?0(Q}2e>!JXxj5K9U*LQ2X;9mN3<*+!V1CYXA%Wfki6*|M}T`*`d>{TEK z7oPzE6KvG0KbM~)?Ct%%AwYZrgMd(LehUD|Na<}b7OlqKiUJc}0jQG%x|xgEzx8hh zu^JBs0QC>x6^66P&M|lc^b!VIRuxvD-8}oh#Xg`!Jy~XYU(eG+%l|lv|E2*}R5GAl zjNdXO^9aD0TcU8cGQvpJp9JW8>CfVzNQpp3Q$+D9;EBPPYd1)K2Eiii$$#Vb-xaIBCi*L2kaHB1`I{zDc0!$ zh1}G3YF7#(k|fZXi|KxmiUc6gExA!b?Xe(cL?$STT?Vc@9Dh`J9mvBR#7@=C4-vZR z;{3o_OJhY{b=MsP^VxhdPcp~PFNv5z3K+drG!Pnv{I@AwRzui8jzn%^0uZ?A=+f!D zm~dD$2rM|A!jKult9v!n8E+9QGsXBRopyeuYx}kmy@3EK#x3thZ;6y^C;~`~D@hdh z6ZtJw$0m~47LLs7G$^B|H~UFSPd9>6C&SOTY^Oh6y1Xb|;ON|F>lLub=0po0&fU6R zl_>G$RbkQLnE(YglKU5q5;1l#pF_OYa*C~2o23$z;d)9APJPZZ>rR6>NIqX7v6 zz5!X*MIOroxd3OKINuagtRP{;uXH0-0rfJCPW0tet1R$fXTdJ=x|6A)Va$k1{6zSV zSlcN;0{PJ?OWv1mr+jhTP#(x&t%kuR5*Z=O0&?dN1HQNMnitzUeeV5U0&XYZrCDHJ zj5z;-k^)NnEAl8`KtTw&pCv-E+8jV8kPGzm)qiA9-0xLj9gMIM=D#y&l&s2plFI31 zSzt;=6IA;K5s$=idpJA62#_(L|6&2*HJ$jR{hxA$A#J2q0cft-Vq^7!ib+dh7-Y)t zbXlY%5Pt;lWF%jH8p^-&m<#%psdceEVt?;c6)y>A77=5_gGS=NO9ISdUYZ)@f9sG) z@z!ZqNvG8f6_{?2?+J9zG3=EnfB~W@bHCc#*#vKRz_Q!|%Wg3Rlj-~1FNIsn5$U!E@d+Red2FZlqLPTwBoXjfBxID zf4a^darZnj4qHT0XsPyU#K#Ig#rt95tqkd|VPN6)lx(UW;ZW+;GEq*eJ zk;Be*#v0a&IFmI?0jfYq^ND=%j7Ofarw145OqOtG&O=S718x-GUpf&SEd}Jd_iCKR zN95EI=|5(5gI1ZH-!YP^A8Hh7Hjp*nJmc`_jpoGu=DdTmRJW?efW2x_s^+@nsU9Y$ ztBh5G{(*q@jl|#D^CtVC)1TXJlO{vZyF@$MQ+cW6{aEQmp>!;n1V}r=t!_r&C(vjx zzAI%<)tP)+t6p+F03wNFU5XGm)GZNDQ+KZHKbw7^T5pr8FloCc{Cu*ND;2mfHg$I~ zZrLp@)^3}|-JimO?qE6;;-Zj3lg)E~qq{wob z-5El$=TbDAWf;-JIcAPY=qtQPgD0xB{^^crc1<*00Us}+{+;(f+RH!0NA7vSuehYA zl0?{^j!vZicC~-(Hy^&YW=UYyvqkAT)Ld-xR?N&bRm=?JNe6Z?%7rR?{XF~kOJB8c z;+lVR9`!J9ymTiZdR967wv{dgNuYME_joon+liKUS!=bxOUQ0YMCc@o-4Dbtei5wy z)7??;+=U_Ek-XiC6dG4(VvtTEd6llsF10-CxPIAmp;7NbxLvLhjQHF2<4BmfbToT0 zx`$(HaGxU4Mz+>o4M_7S^aOhp?LG1t`dh1sPXu?KP%%{Q_P(|B=qlVP$Tn}5)NdTF z>S@DHPuIwGI^5wgWie(;lM-?iEhUqVEeTZ2TSn)NJedg>F4B(_|y``gN7tv4z)M zwS9jY*`O%sdrcveVRv4JTZeIZ)HNQ5H$GHoyZ^g#4}Xn_U{8V;yP=*)nS1v6ew8!8>YX^L=@B=Od|38or?7;q+QMKA$muWf-8M+ zKHI#8HkjyyFyO%4a(C>Da5pB7h;uh4AJ@q;PvtVv{$Xt8bYxrBlrzquGi zxLb}J%QuEsW76dvrfu11LUt5&NyeBt^m{7{)-EO>@sVnmFM~<<7yOGYt}Bv7DAU=2 zzK`Vrjc^)y0dNN-^kT5+l)~W)wp!E&n7P7~i|^fA$`qQ#w@3B-m-vwOFN=jnXX$zJ z2Qj*~266ZyRAttGcz`6JLV zalGuP<@D+b(Oh(!(|D0s7LkRcq+OMX!uxLBd9K|(6YHH^0%Os8p$!^LBDpA@?wiQ3 zK*EQ$N{NHZl8wS8EsgJl&psFBXw7AZj5SE8#`YgiP?zNMEhx)VC;Oc7Km6zy=~&(0 zItb#ILMQV`=UxyTiuS94xg>l-&)|0t5!B`V4P%jTt<6;wi3^WyuylRocDt+GPgMV4 z(PK2TPC4g&O5zpADV)u3o249%SM~hxVl=N0BOeE29D?=3t4_4xV8u6Dr9487QTt8P zcjFqT><-(d_VBHPnT@ft4GT=#R4r?`=hOzKTDL2_>5fVtv&8E>TUgZ{X5r{occqf3 zW+AQk>M7@Ijguu`zmb+HZ>Q4cNIJ}6G^iGt&r|HjJ^EMjG~p}{Zch^lFHH_Pn}v^ zMFp7wBZ@{<^gg4-^;jgJPEbEwbiGr24QnQX*14;MQhT+xPP;#z3qOP3=d=GxxLC>5 zK(*Dzr%%IaSTDzXATXTNVxSD%vOg$tBPI=b`|P1J;->QsqkchLU9(R769*EIX zTn=`x%(Qsm(DiMizIDBu`?GUz0!?s z?}NuvB1QeTI>qxGQ`67NZ`hiBU+mpdt7wJxzpmK5EWa*WV8J}N`85GK!0e0An-BjX z-eN^XxBkb&^wrJdJiYb4tWKEily9`q?E|cq5y7 zC@j_a+VKAVI|6=Sd zqq6F{wqZdKP>_-mq(cOxq@)|9L>iRt2I&ToZlt878>CSwX%$338VNzVQ99mv;&t8k z^Ss~t;~V1~!=d0l_ugx*IgjI*Ws&+&CXpJ$hva}^H@6~czpth9yX>CB!nUm0gNm=d zzn?KElCY8PJVG9|n+`0cG{z_p-?n{mZT>V*PV@YW@t>3ctd{-`3L~kx7JLTS2#-k< z*XnGfkG~gH+N~H`pBvpwRD2!c>7i!pAoK3N_wn4|a7l6nO2dzE!ev2owJ!-+T6{Mc z&7Il24<7@A`gvZ68RK-(kgm4*Mcgdf`MaIUpX)=#MJ)zrr70eYEturFUA|b=N%uX+ z9cZH@dJc8U9);rE9pPP5E4w|Y?RM-+f3j^qw|Nu?8DKZXEe-o1^@BTL7X!=lNBuzHnCKlN81Ek6rbr9u}ADD6*zm#R> z`aC%->02(==Ru%Xq>HO!|5=SWjKhvl>RgOP$0d+S(OKDjS^F_khsVxnDtf3^+jMRJBevWVE%l$pjnA$@JME5HcD!H7d*Rsb)r7zGqO}6fot>9N@72gSf33s`BP!w3iQNrT~)U}A`r$(&1d#1?=00L zT8nZ#CQ66WkCU7F}r=gl9xC=Q!H{yWGvmxc~`nvju|M^;6`^S zR7^8>STUUR1L-IkMGL7)*ZsQE#aBZL-6)K!c>1kG7eghM9CWyiL}j{>(W#BWhDUpA zT4BdVBM~IS&N!uUwj9o>w7B1U1D*!|3M1e${v;RTFl1)_XKVzEG;ra|b;ksbRo6$) zbY8Tmwy3a{%gOl{Hj=Y=OZ-l4)0X&DNe%{H0}n^7hh041a6G{Gaf_^TsZP@~9jaHs zd&`*o^>$(ZrQ|6CE}2k?JFh<`Yw2<*mw@z(b;kI|#r9O<38$+_ zPxrAxi*-Lvy>o6ZofsF!DNwh)QvRbMYwv*kVQGsT6H?E^p0Fp=X?za*ZK!Jt_ILO_ zO9ziWsOe5gC*<`C27mYdeTv$I#`N@RHPh4^59cPO{x~l^t=3yXd+MdFL}zH(`sGtH zne??^Z(c4SZdCTUIR>F~d#?*q!_ItXd-{h1=FYZL^6|Y%9E0ii`T=Tn(_bX7|R zN&0*4(SQ_ZcU;~Nz7I7kVnW~2-;442?BvZ-Yk7C>bo|Tl$(V1!_w)MHU3`0Rq4$;%R+Ou3-7sijO>aFMeZem+|;pVAT+OEI18Oy3(bEt9rkPni#pv=6s zHTtd1U|`*OdvAFrfvZQ!8nZ00Udikk!`w#FFWFXHWJ4U9va0jDgjDUOSO`=hGe7x|kxDZ}2XM~BCH7fAcWDY56G$%Ujwm9G6<{6lF&$5E3~p!_vKDV^6WzR9$cFx5>}s-v=I{p|RSGfcuz zIL4o+FwOFR^77V~MDgR(2&PB+xbx(RluV9>QF>{rlfkCW7s00~Gb5{s`|h!!dKye_ za*G#Nm5X}gg{6&O587foF8W}tC9JNx|H^L_67W4I3%(F6Kl>FrOE`{`u|`|vT`7bS zm?RQ?b7Oc~mBNWaK!TLCvVv8ypwr>`m*YRbh2$NQSw#0!w9p(hsE{@4QD%xT591$}YQxPr9}XQp zvrdRDGzc}-K3-bB``q+X!0~X2L6oHuO1npK)7od|b2IOgkNrHh$$grEVh*=igg&hw zNbq@U83-2MR6Zw;@1Bo7K^#Tv9&4?NL-da{md?K^) zpAqkuh=hOs{&a)*L2;@=a}KVu=c&Wbdg5=-nTr$6vr0^Q;|Nujum(H_yUuv8V!TrO z!rtOlvG=oxC$4r)FoP`zV6QGqFUM>b$MP!u(Fpu+seSBfPNXSRDF{ z6Pj#ZkNuUb=XKmPNAM^A_T zgYB05!q?Ovc+M$XH$F?8Z@l5}+3`rUkCbJmrdJV~k_ZWtkI^}cDm$SH;_AStqYDYk zU&n|)vrJ@L4b~f|UW*=caB41cmSto(+1%f}Ubm?Jn1rx$-Kx4~Zgpm2tjD*2$}+o6 z3Ps$3oifVr8H@E~Bhm1qYr>S;U3JilTeXDnJlIIi+eezk|040?B@2G{xYH9^KHn&! zFr;g#0qi)jEy4IOsc2#R`DY&OdwGM&S|no_XLh0yydhc#xw3JT8Y87t`4cn`f8(2d zT|Mt~uWAxse>)*i;MGj0V)V97#9VJW#c23o;0*26b2FVBqM!0w>P{U<&sBp8muHms zzb+6bM3hbvqAhx2w;ZNFN#wfo?8xa=sIy+I;PitHo~QL3hw=9gNDFtL+5NCy_6?2Q zNci(t02f2gl}$z7#jvyQnr2*4VU{Fh%ozRFlw4VVbC-=xfiIMhhRmQyH_Tq6`N=lD z+uHG_BeU4}n&ri7FJhV26%<3hrp~)IkI%_F-e-A{VHmo#8G5zF?pEB3rxW++VQBn_ zE9?N9ra5}C&xqJG->L3ESsaOhPD1h`s>+pKnqkh(gp*o8C!)5o>a7}*bc&{0=%Dy@ zE9J1HNYFYY==={qpVoLcxVJmBi0(T#3Esb^qDaP{BfpT@5t8ZY!GAL;<$@*NYGQrD zH3jK8S3`U5SmoHC3r( zn1-+$5B!5&Hz#7=HV50A^j*WEft|W3LFwr-KV1`P<(j3~esA*uRuait7)#6hUQOPZ zBoyw7$v+&~Q5{srRu#(LL)XH>CGqlQjfe;SRGY<2iE@dHo6v*W?8fZRKU)5C;LMYI zPwv&?uW85f((yM2fpFXD=(Gy2Z}C09c5+@B;W^$`n=howd<5ZMS89a#TB=z!ZQ6r+ z{gz)#ON+v7nsP}I@|vF$B>3+R_1)2bl)wpAty1G?^(~8Kdv5EuQ95uWj@u-yuRYh% zdfa%}#*waY4e{u@axx_l*A z{x4QDwhzy}Q;$p;RTN1WJyvF7AS6hB84`>XvHd0R=ZEV|Ey@1y7oKmq>O@n`HXjT| z^KocsXWLK^%6$CmbOSq#D%p7Yo;bSj8_{Xv)i^l9vWy9K3~Tm!N8S@VOPOxPr?1%& zE5L)KrOaP#7L64+_BpbPw03nYf4qa33K5y*p=E;tBU+i5R?~$O*Nb4wJZqT5A!yx+O@x$Dcrk{{E9++tu=wm>NdP3JbO9hl!TYH>_%P0T|VX) z?5~)%$%C+yBTf!%ypxHqbNZtgCby>ZAFwT->=bGqdXvF~0D}kk42lxB@tIx(1<2cz zAj@)^iDuDL5O+P_J9RHrTiWZ`w{hG5T|||M0jrlGJ3Fy5GG*&VvB|CZ-Xx2xsMsWIo;Wm%Hwb}#{S2<-Dw>|r9gCe z+8fLap19CUuB&+Th?q14=qmTVRhub!L5Uo0Nv}37bb7dkN$h9)q+{Hh<0BV=bGzZ5b6nrKP?Go_Mpa0o8CJbjvJ~k9^|w=KpYObZzUS6h}jD=aUF*) zpKTaA{XOaYjX%X1d<;On^<7|gz57X2Z+NY3I39&JW@dwfpuSWZGB9~g^pIcOmfpv~ z_X=exZjGdgzP5oK)^N)oVxqI(vc%0N>sWs7e(cHniiNgki3~GLpziDrv_V?PR;tz5 zb1c9cXKH#m(JoE>Jf#W;Das$-13v~z@mVQB%c?&|`one}-BUKI$5hxslU3p1?V&}Ws5 zL{#<%s{~|v^@K|Y>ut`pNxwoJqrMH~`aoqAi6MYx9P>0UobF}P}p zdcJ41E34_iUJ%g;L_ne*0^*As8dsK-V8k2+%Sua0g5a_-k`td3pJ-lr%r1B)lU8G@uL1&Wewff!gvQS`a8@z=g2vJ735>kw%2jybb?d7S`$ zguh-*j0o6Ye%wU;+HSL=US1Lp`Q%_ry3(+d&HNiBaMCF@E8JOQt}?irpvDl|5Q@rv z;R`Zott1X%C`<(iykS%L98y8**Gsq!;@y;g7kXOJukCfc9#WbxyoGLjoR8K}f{H^! zNgXkcrgelDfA%GbRT{>+tXI?A=}+aUARK$>w0s43cD=pr<&D}$`K28Qhx7N-NGoEn zL3e{33=v!lm)CVOZJK|iNQGgeW5OrEFw5a&f5QW{S3ahSm14lH(QT(DXB`eWwe#xt zAXBK^Mt7+Dktata;I`cw(+?*5@~O9_AaAL0r}X((*h#vgBKsMkY;`bOsohJGZEflu zXPxa2%i?VF!r9zFNDZ-^r^!Axr!YpfKirmm`sAzEkG3GO8? zN}CzN2u6zOt%St4f6WWrFg*ZFJhUcJclf*Z8zj-3gNE$8J}=y z3Na6N6Ldtcn5LOJPc+;^D;r0=Yjlsw>}%RsjRij9baq79oXTof8lrZHgz`?9{(XIv zNOm;(rzXF-k<(ZaciX_{-yV#e1BQS3#|NH{l^XB68s%h=n|0N}X(R08=%~gAd>rW? zBpk+Um^)y63U%u5!ymyPT(@rjch_V*f?ZRPq4hN!%or1T1jqu=VWI;}0_Gk8RiWKX zvu2YA7o=wy_qf)gXc62RMTTBr9Cu!ul>e-c1Q&35NE-cNMeKOLq|sdNmj5coJA&yPXL&pVl7<}{+>FTxQ za_Zan*#5onR5DzcN64M*AzauMPxGz5|8#e)@h3c$E5N!cbuHovcwDhqv}?W_q`knE_y6Z2skZCo7bG(Dt-9rg#Dv5UI)x}y?Wd9Ixjw}5oU4ne@#OxKH`dj)jfQO zD+(Yv=stTRf)sIwf5fSLtNEmy`;%UoHrJ4stHSS{9Z{I2LK&bnQmTbD)Zn~Ec$>#I z0$b?tievA;$2UU)7J?yN;VVc1t@G#t-p98f&qs7+FwX(1p4(56k&)SaPTfwAmNIe7 zhB9tHdUe%@HqFO7Bx*x-E0xx(k zG2YtyqP>5@uME}M=$tS|M}+Gie_?GA@#;jqcJ21!-IGyc3Z4E8K?Qe`|E|N+G*|~B zVNo(fNWc{@nZqPZ-E*qZy&B1HeFhJ0EUW0+?R##TVo=0i8+rj{`L|8!nPO*rFT4wx zVuP;B|GR~W=pmEqrP9q{$Y`4NMk&{AD!wC9Z#|CZx;e#ur6)s>pWAxu&4I%wpZj)L zD?N+$l4YRQHrx}*aEV5DXE+hokU}gq>SBLpbSORpexI$2|TNnhSFD znHjPn?JzOef*SKQJ6-2XVPbF zU!Y-20akO^o)vBPIy*aKcU+KR)@>v%dXmSn!1*z)m4B--Co(!D#)=|C*f$EChbX78 z{j8ri_xwK|eby0?B|618=XYsb{gRU_04BV5*$9JvH>CMYOg8+kn;i_9UWPqJZ6z2X z_Do{srRtx*UYnssqPZov*4C_g%)~m-7rv&G8g{kFCEdqa zi`_SXV>E%hlr0(lUpEXi0<^a-B_uRBg0m}^NC7nfj#G-_pBLCdeuaw(j4E9ZGDBl& zD1F9Y+#u{~b;@V&or1ccXGy?oE-TAgmCD{v{~q9SF?i~dG>oVhmj^gxSadu|{hgyp znxN+hTMNK~pWu|b?OM))0A!Htrp!J?PPnHw*d^Vu^;!$znZUW^dXK04;*9*qs%ED` zGUGD+*K=5w#(>uIvy<9A%gV#M>gXFU(CraO2GR-PfuhBbsVVzYIMMLwZ$NMf34>At9llk1&Ptw&PdR{tTNQqyJnI z4K6t#n&SP}oVveBLOV%d(zv~NPaVDUc9texZ#+XZkVJNAO2stZ<;nVrx(-G{zKwRk z`rn{oYK`Uab?91F_up-k{R-aS-OeuvA0Ut}^7t{1cAa$*#c~A#CfEW@sJ+54f-Z>~ z_mK-4YCqG~S-3TN-PB-r;S*w}Uk4ibW$rKBjsy|n)+Aq_A#RP#|IgZ}Rqz*;e0lct zRATH{NC0P2f6!-^ZH0>yo(3?y{rAdHEU$xM`)m}2a8#AamuzV8;&%Km#YLRDS!*yh zX$r445L8dVAI{l)s$H|_(e5vY-38fpuV12iy*uGr=dGEse6GKIYLO(&j_Z+CvI<$Z5z zuZK*+X&OjO9J{v;=Oj6#P&`aK7yZ}2P6I~`k(o`PRVN2G=z!7{Nm`Ijzcp6b&c@^Z zW^1$})2Frn6=OWC!KO5&3#1MouT@k!%u4~zY-dAa=Y{E*oQMQg!;rv#{dbxI9E$4D zD#6P=XTLH?*Lb!+X+Bzh2MD)WvL2kIwIDiO{b{G4?;c=6{)&u>INvI1zz43aHbAS^aw2>mY|n+6oG$=pSolBNf57o~CnI<>>%*MutTj zX0oWY8tsB@h?$-TG60y9&gFp|TwxzCSQdeJ5injgJFy4`L4Vhi7!6209^E4Znf@;l z6mf&Kxxvbw$lz~I{pR%UWMILnXbhDHddQxo7Gu1irRf~#MV-(}?5 zUwa&s+tMp#lzN+nZZV(WFesA!ULVH?5H1b4X``Bi0jVRL2%APK;+rS232Tlwj#c(> zNtcwA%)h+=G^8OnerNXM)&Jr?N>$`Kr59O*h;(leC;YcWBe@q>yCZ_`XL~-)_&x*f zUAf-&8NzlT5QsdWRBPZ3SDQ~Z`40~dFX`0?O0jxb@c~EJYL3?7PB#UW?Unt_A6AKd z2ssAvVFRddD$<662i2eZmN6W~chK(7?#vgIvy1Rhye!OvajQzHhCHcZhu`sPyA3q=Q7+T~ym5X=NqE@Og6RHJHcTGLuv^ClI$oBcb}ZNMLeP;M-q==Nx6r$o zYY$!?%)0>$1rF2x_`gh91RabT)m=9bi$!O*6d3GRPgxF#OT!}_p&i#m?)!CCjvHKX znpGnEN^cIyP(DEBd%hUB{sNE{lVLaa`Bb@PC3Wxck8cwu0L~Fa@c@VI+{3MZicbY0 z6e?v3b}zj9;BdN*RZr$CkExwT0REjAz*IMM#lsTEDHwrj704M5hx2m}j*|>!Hj0wby9%eqAavga26Qd(sy-doo>+6t|aP2NZ!{1KzY!K-85 z%hcsF@0D^;)t6kslYsKlQuK!2(gF$_^3+vw-k73B1}jTZ%Er+q1JB8qPYFPH{bO9| z^}I(3Vi)IUR3#t(wY%^=x>spuvr*PHMygCOJ63Lpy8XUx6=S$7Y!J|}9lmyzVw}O~ z_t084r_1!)3?cV0l>((XP%ZN#k30W}CyrK8Qc|`8QlZ5eMj>aUq3KpiV{;~<@@Bx4 z2>~WUS*|x#NFMg3b_<`M@Mp)4s8k5kNz^c#aGQ#Cy4-h_mwEiE3h3+6@#+fhp46Vb3;rj zY-e{lXAz|(GL)w%KLcCfA+BI_-14ts#Lpf&>8v@P7oJp_o1-~?8ea7LqYwOIc9Net z!8*UAmhWILdMp30Y8suc+yQ>{{mYVv0eL^qk!%v)4>bNgc-v}rNlXbn0TmO8qUn$L zV(Gl`Iu=P6dkjsyVa0f;?dLwfPB3IH<0v9O)w;pJM#zAq zL4~8Vv2-^@xanNL>o>~M{U2Q~OI+<$v|<{|(#(X_?XofpgNdWL4~pqs!4H!?^y>Le z`GOc1cb-a%!))hslV`bO8H`uKZ*ZHV-rtxhIQO9-z1R7K6ap4s!q&&DXc8wze-+KW zb5zg;ZK*~gNk(QUFz^l?xbq<05md7plHUUEyUB~3u_#XyTHif)ul<0r(`QQ9QPa|pilcIo zLEiDML{VKb#K3?1-?7TfwD&AxfF_>w3XO`Z=h?(qbn*#wZTGEv*fIxK`D0034%zs^8O z^gN^UwlPMY7~ zg3g@$`j=9sP?Jy3org|$Pft%93GkOTkWeIKR=}#%S&s)iedkDG>U6OAtj6}oZ2$qC z;D0fXGG52%Vpvk}+>bE)Or7hkSvN-LHiO$eQe?A^# zMqm*HN(LU-SCt}#Y%BXl#2*OhX75(dTLVJdw=eHUQ71uQK zZh7Q6>h1kWJENIq??UCApsWnyp8y)rfvN;S13=7~GT2 zU+?3vGj5ida!jQ2-l^!^Q@g=FOFE~DXr%mJi*@$RzXUEr1U4)~n&el1l&LhoWGXv= zsFtJUkqC{|H2$HpWzUO(LYI^xpG#4sG0^YOAil{GI`I1)npI=(9;lY6v((tlOgrUG zG&k~hsn9Fv6YrNzVq;qT)jnK-eD&SwzYR-9E+LYp@#WDth#K-{Y6XgSY*e$fYc2mm z+$(@3bDwE^FVX0~)vY9a76_`M5ulpq!JtW zwipz4vbk;RLx$P@uOD>ethQwBk115J23rFWA-PRicr~6GJW;Pvre0qvNp;p*UMCy% zZo3`J__E}80VqcTrEmBnz~_b+f`JURmR%4AdEDscAb#+(9@^iQd(dsJk5>nIxyobh5w3_=& z0(6Hf;!?-lRi>7Q+E7l@UXeIjSxg%;VV|d4G#kSuL>-5JYgPY-92ajd`R82n{A)Km zR1aZ4?3})HxGD?cZH=tY4pJw01ROyWkWorC#zAB=TYen)Kv|q{UYNS^pEv*%uU@xe z_r1xBjJJIk85F_xH=-|ULbu^`-d?0vEbDyt=YN1VaOH_2VDu^>B=vh^lBl`4Ip}N0 zp^`r`>h@Yi3lM|9LJ3Y2f@HGVfrBSkm<6Vc2uZznUJH_rj!?85Smbb$&GiI?&I#A6W!wgz0e6TU z1o~)jW2NuepHZwM#4!vZK?b8WAT&NvXJhs73i}2t{kaJpB9MtdEXN2A?X$QZiy{!# z+Ly|+Gwr!$A@{uA_6J9=f_zZ;$~MGgQi`Acl|?DFF%C~oj7|@Cl6maN;~11|mvz$2 zG|HcnAZlAXin#xZ2O_@_%j{h`jt|NKlKW%^kDV?!iylOs;q${B!tWBqU?&NEmcM0bHhb}bkuuBd;|11`ii<x=PYz~}WnZ=CL?wwt67aMMpN)(npi zeh>Jibv+9~Z{dvCii+sA^~l}E?Riq5XhXaQSE9VR$?v>s>|w|DuYe`e0Gs9p(S1UC zq%9f*6gu1Y0@oV!DO`UaT!jIkq5Rb+f4MMF8Hiyb_&KGNOm7>UBp`sj>OcBE|0C43 zvcvt6&38PZ#i2vF|McC9;OGpA;teYv=rjK#IM%Ygw2HUWW+40Jw5QA2v9owAY*KNQ z1}gEOxkq!ETZzMsK;_A=N}>%_grZFeW%cevC9U7ut;Ns zeSJ=Mzp&;@g6f<*^G2goa{$2>K<1X4&50HojH-cmVV%4H@uWYNACfPK{#cj=sVvzU zPGoz#ZL^)SHb7Bq&`YcH_9$Og-sdvb5G;c84l#k+yVAWkWhkAHKV3beLY^#KnXEL~ zdiU+d#4iD5Z1O#gJ6)=OX5(0lo^)`JpW113W)<=_ZVcq0eNJMH24#kLmG*BKti=-0 z(_ahkhUQPk=co@l+JrKQOkL6#TFyyLr64gn-;QbIdtzAOAmk(gf5`TlHYG%uVs_5otgx5E}7Si5Sn>Ws=FaE1iODH7>MgtpiMlJN0F} z&)Ly2exCfS{q6PX79r~*sDBOQU}ei)6?v&yY4p$XT@SM4h}oU33N2%^q4)bLVcLgV zr|Nvb1AEf$hcdFWmk9cvdkqPkbb#85fru?bsBXPl-^(JdWmlNncEec^%1&XMDqJe?MtG77J$pK=!arr|K zR3Ds&w!%Z@Y>8`xVCzL8e)0SB8HCv}k47*+AeHWpx>FV}7FDKaE1O*?si<)qV}Gt9 zvb$y?$JygNt*A5yk)Z?$GEjcs$D)Xjwz7gEK`{6*M0=qLtuA%BVw$blqOQ@TuAJ#! z1Wfz!*R+oyV$E<)?Aw7?QzLs0=yoz!hZ>m4Hzu21skBSCKt@GrIS3(BDTp9ob!G~< z<5e;2Ln1|xmt4T13uG(A5uP(!B-(}F%cZ+NQ*TRJHWyZ$>-^?=E*BO#zxT0|cm}QJrcU-v#uDp&D(VBdVvz|I zV~CryK0_>45@)A_#{(o!H5qAot9)ce!ZG9a7P_&D(olrE|o!r`x&6 z6UcDhlcxcvg9f&6YL3|<0!EGK66~tQ*ZF>6KIs=aoV}{Kg;pu>JH*m>Yr3h{GH(4& zB{mYYJEpl}XsSatb1BDy7JG|T_i2eFP{F|(KCNf*YIQ&YUoPlu>ck-wORqn}8dUC% z`uW!_R9f$g5y-?TqQ;A-m)$1jfhO&RJu?gxf=xZ@e)YNaa(6qE5 zR!96fk)>qD@k<8o$>VGZj5D@j2o;_%Uj~UO2T>MGV%9}d1wFar=oKu@mG&!|juulr zgj1(-*0E@?NH|a+v7@?(DC2}qjnd{DJG`q|Q$*-&9An;IFiFHsm1;wcQu2=j5YB77 zyM8JV58h}T5mMQY^MWA~S;dUw|0FR`i@yOZG4{cg^`3gExw+H>Zj^fV>tZrStdKau4 zR9|i^l|zu_n_XU?dY_opT|mjn51sExNQPeSq3n1+0hb=j@licr8=PKavZ@&c)%p!_ zaC((qM~)p~1nW#T-+$XojMsYetu+*i>>m~~pOt8d&#}Wd>)_6Qeyx*!|K`XL25S3NOjNLH;XG8_)F5r<+37yWfx~#z7xSYhmJMarU9>C;a+_N z?PJgxH+#^l_iepCZ#gu8+2Jp7J(B{g-B12b=c1W6u zp%g`yE`AHWT_6ZUf}}h!4di{I-fqjD!qC7Ge(8N9{9jdnCt{Wxh;8tnfa@0B_Pvsr zM6_49G*bGI`Ek=9fL1)=pyf#E%42szNbf-O>7Z;TgH2J0MVoSDTut%995;mfBLF`= z3xYUx%8eC7Kyx+@pd{9r9YCJDvwD2C&9U)=AS z)YC;(drI2vxYq5Ny4kh_epY;4vV?mv)88HTn$rrD57cCo{ZE&3qN!O5F`g;Y;GvD9-6U&;6Vyb?YX+zT zCl95gFr=kL7>{1weASADIvo)pU+O254*DL7Ib4KnfzZG~iK7ESlSj~jtj-L-!DF8M zn$ol}r>wHS{Kg&y0B@ss@(dZ^E{JGH&vY%!3Z-FVV-pzGN<`dJnwW_C2qF*;nv*^v z@I!xkXJt8(XS#J8WAClu5M%1k&fR>;ruv9PdYkKrBM6WW!QDyIXm1YiV~mxEHD?S^ zWIm{f0O{xD4H-yg+y2VGWsX!PpTc2pjPKjr@$IbkNDV ziG8^FEM^(xpW>P#4V-aKWz@lI*AYp@wg?Walfiv+f)ss>zh}C}4P~TEd;4=yI2*lK zN@+%D41L?}OT^!B6=|oE-gDBpQ$c-NLTxhko7Doxl=m%ZqC6UF>5xi9pP~t8}8Mu4-VAd+U3oCe;AOYF!G+Zr!?N;{A3`AK5)zt-ZPI zO9)LE#zJojK2Xx6=@Q(|XO-@()NjoP;K+UY4Ex6o8R+tLeZU&f0IHTjKKMB$QWB&o z^!-wbUBi03pWF0jM2F4uFl22hoP!|jjaWjQOcE7ATmibo`q=m9u1(d;&uBoO#3E>^mm4wP$2O*8{H|2#34~hDP*N_t40ycVE`7M&YUkItxZM$1>9}L{Cu|-*>|__E zQ7$wy=?JvMvp;!IGRBBgDj9h5iYfmB7NlKQj9iH_{lAi$9VrgaZHaNQIUm*0! z0z7b-Dh?hlMuwK}b(=i6<9~+}5$$P2ix1!(Sr3v>A-PhqIs9gJIx&vvvp+jMt>K); zB;gp#1^g9-gJZ-6zOah%9&2r)ZF1n0d-WvZ2b*I0GBcS272U+Cb79d+?u@a_S{#Ub z2CFFzZNR=7t?aPCFi}isZ&shO)FUgX{A;5MpCI%H_<^zh8*g;Jn9u-K6Yg>ZqPJk3xn3ALv?s z@KPJ*U-2#yQ@`~&^LWK#b%I=?UFMj~?W>6y8S*2}Zx8+Hdp82fs^PAt57=V2`(WOW z1YMkBwRjBTOCR}t5K9jmdQPg-=vHaqcyW?nMiq&H@X>e-vLV>~+;t>S-KS9r83m~i zq-18*P!gH74DYsi<{@F=PrdS6Kd>38EALx|@l~O|7iXjNk%+}w0)=K#Ad&n>+?lvl zTL;qJJyV0|3{HlnXDF86-G|r=PS&O$`(e=POX~yT;AXTK%oF)Y{q9moccp*a@o zASpL^_{8n7vUClf?L$6XWHb|1S_XL&RG!x-X)vj@=*;x!RguLJqm&Xle>5I4;}g1C zivuu<+zyw|w_cHQtH`awi-2?S0}+yNOM}cS+E&wV`=d*kf z*Wh&#`pToi`EAQ4RNcr!6CiQ{Hw!n>-Hi&y>W7;dT_<@TGCyud>y@v)Np%&wH=%bx zMIwN3uMfb^#!4-xW4XN_F6fTVoc-vngEV|D|KaM36U&?RFE;O){W{-oH=9C2HVjG! zkV}B}cPXmAUY_LR0m;r%H zP!WnEk}CI#7D{oW3SrSuF8cE)G}q1oZY_qvvkJ_!kRi6Qv?ATZ?a4b*MF*yk{QnoQcy9uP}w-ldc#3HRMYpt zgDlbUY71#_iH7ZLUws&jbUAafkUP=>mPaO4rdAlD0t^u++4lK#lGlfN87j#|$dC`j zxJ8g#(^ljh^!stw$KBBp4R0pv1DuaW&zrE$T8=PUSnrTfhotkmTyvhWtO%^wqgt-B z9ecs|sP$xhP5tn_5?cCwWVIKTnX^!|FaG({&Ws$-s<-*`^V^X%D^FG%(;epZXivg+Ci?E+Y2HJiLReNuU`s}mzc&c)t29j{x++5cSD z%9C=JUeaswx|(41>Sj^26(QKMQ{Qa){5xS>z8b?$2SK*?lC3^%kYAl81Y;$Fu8pkvf~k|q*w z0s0Sc+}E2qA{9m1nmvzGi6k2SwDO!8c)Q-oV)7n0sj zsh8?rYtq1)&)*;D-r_C}kg{`a`%Fjr+j9Cq*eR&g4f^COE%8|C5WSFc9_rqzFOI}} zw>qx5uAQ}-&VPp)bD#8FShYk;F)5O$A3L*tt)TmzNxSVW)@LFGUwj?B=Jh|WI&fQ1 zr63L=B@z>ysa+onXTI$icrpw)DSSxOsoU1b;@zSCxH|Eh9Pq#uG8zi1gmlBd_|Hd@@NcJginpgxqntM^V*#>2j`fpFvvCIR^q zCTmev)%$cx`$<}2(4$E?TV9k|ujXqU%G$X}=8t8nmX*en1Kro8LD~>7fra(l7r9F)iDqaXLl9jZPI!=u`9zGsUuuF6-rXpW)tDl4w7b5}Ea?65y7#)z*cyHr-YE#G%3 z`91C11)b*kZ<79~sE7@e9eZUfX#hR-8A9x12BUc2(~_4>19YXp(5v_4jD)?V?3uR29-a9)xnOTzZ=kd|cf%fi4vpRmzTW2JH-$d2kA zUNf!N|DzXol2gX%=yMTiy5%FW{e0QfoNE_hz6a!zFWa`(2>Y}e|xrkaVK6vRxJ zjpmvt{Nl8sy5=j4H1ulgfiT^2Xs8cHcAJDK3kngUptDWZ@8F9)ve9K4H&hW#_Ayr6 z?X?De--CZxH7b3$5N_%>!Q;ZeZ2R_2ew1VgOl>ohLHoa3SZkH}4FCUXVZHmG7S=wA zhEVScUqsB%dSVihiF^|gMGC^`@F*9mk@u4DpUZS=>OXd zdUpXM>JDFyS9SV}L#Z#X1!=szyn4g#`I_@Mr)JxxFuQMGPqz?FAkIyD7T!_^BR8ou zGD@@dDYU(nf2yL$}zA)me=&Lrl>0q02Rm|@cA!j~c- z-l6aGo664fzx7hA4A|I<$qL;Fgw|H5obXg2Nc{*|Z;M(Hid+!r$O z^pqjUVd#bWqNpAF>q6f9*TPUG*A0<8rEf{*7QK05?T7rRVleghG{4<>IEIBQkL#xm zllYlqzWDX8o4sBPB)k1zzved+>EMW5@rU8dR^2RaL4JCE_+NdlXHfcGK`y#(t|#l8 zau1sn@f(Gx=p)(m3s*wv?R}$DB`!4KACXW72@fq*TWav%5+I@Ah)~``q9+awi%!Qy z{Os}<5dXCl_y4f>m2XkKUE6|#QVNQKD4o(uN`r(nO81DwNJ)32f&$XrAl)!D44{B? z=g>;Gz)%AW@5S$a;(k87f57`5&le7kVKe)Rwb!-QI?r=$8lB$9+F`*ruyL=y_+LMy z870a|A5sg`|JNtif77LxD39YSrTFLj{_9J=DEPhQU=?$b{jX21Z%`=J@k39;YdiEmZ}iVCO}~LpC+ZwD{%ctOG2npe{L?A&{~j&)t?RM>tgfR@SkgN7 zN^c4Sy;veA=(HIR==2O8&IQo>xXXju5{0Qwm3^u8Pw^}jAI?2^Bw*dVJ?AKaPu9O? zUbFmJA`VZ)4k}Q;H-8S_KBahs1qmY{E~czq)V&1(O^4ZU6H&zd#0gO-HAupVgkuN)%j2`xc`#oUOd--@m^yAxaC)n7W zc*Mo^kkt3c>uPQiuO%yBx+_!_zDZ{weX0ZAOg@=b2#b~IR?EqD>2FZWC*CV7=3cGa z#itN*>Y5|$zT?k8N~5Fr=T~^{CIRVSL~ohs48=P`-XLQr%gxFnh+;nnWH+rfrt> z3|E@DT)XDO%;P!QsVDxp*!2e+y0`<<(iz6FWH@TlfrOXSA&hx_xNd5GA>Txh)(;;4 z`xe|c1UoydpMa+QH{QGQ?G`iEp_wmnE*eu8E^e{t6_*n} zMozYHjzkZc$S9DgZh1BPn{hu-tGU2atChUto;_)!mZ_$DwfZ_}*A+n{)k4A1qD1|4 zg)L@3F9cB{I)8{&97oP?2qYVbBG)c_o*8VfeSC z3H#|b?Vw_>@N7L?ej4Ie*dM$|U!K1}E24W}pmAS6fJD6no63@e6po#(H>tILE;$va zOlQ)P{*=sUTB!K1Z)iGa2r0Yk#NJdho3*D0TaC>et)L?nHP*!+^}X(*K_7+?{%_Iu z8AAjcLHezY;zBT@bT)DZcXz}VW_A$@4tW&kr+d@&`f;3kEFW)$#thRlLvY?@-feW* zwVz(=`b}@4W!8C|Ih``fP8)Y3Gj(K zeVfcy*=MO+l;cN}#-krs7oAROi`Hiy&i*sYQ=Sz(Y_UohIL@b4|zEul-{5E&^N zT41F2D|Hcu1zt>YBXGU8md7N|b)N9QuX@1tM*40Z$usHi&#bqFhqtLt&c)DuS3d*k zeNe}5-Pe5wd%KLqdrMrMoi`(NZ{O+CD$`;17T;viUa+fJSPHfhmTO~jUfz9@9z`_& zh(T4vHGbA-Zyd9OHvPNyJ>_l5As<;+zpKGLkYUnM&C@qm{{151g5TZ-qlI5eyEK?F z*rIzw-$#;SLEdrLU7%hThxhFhcV+wCb!^IZe8S_kD*#Y4+o3mism0hA?vld->Ks$7 zGOz_U6Eok?``{U@h={xVT0ec=)C!@$6&^{hz9w8Gc5e6S0yD$AcGmSY0_)nFt=ui&i{09cqwlZ6;$hUj zifOz1f$F({S%zaXDfIsF95nnAO^^<;sr4Q%wBcbcFoLe?b8kI!ouFGIUQ6>0+}{xt-mEU{%Dtn1vBiyMfbT_yu1a}8{ut80({X_5DSF%H9obqowVbd}V|UYmvxS{w zC)0yU$VrD2k(IiQM5z8sFn0IvZHMM1ah>Vsd`D7wnD5jx&&Rn!ujroD9~#_D4WGvd zStCk^xY&J_lXk2<7g9FPoVP2i@;s7T*H`ALrDvH-Vio)BexhTAU+&?h_R?|j|M7HD8I)s)l`dx4ncg}6v&65q34d7sSs+`d}ViOkuS zN%%$t%Oy8?-SS|%DpNl-owxWld7Y5dAkR5f_iH8w+18ubiI?m{(0LA2Je|d5@}_^S zxQsLn3vfA6S&0Zuc?4BqH~CSHK?iphmck;p_0S`hH-!mRB5S;Y?*rOvqj_Jm` zk*m2(wc4LJ2Q6D^5rblnX+tnZX_-seaZ~*#a+YRqMX~p37FS{Zn97zR=&!cMG;W#* z18f6$oWagc;`FBVH_FDUSny6>c8@@LnB~}SUyq6PtX_S^-P!%K3DF{F5{u!&3f}h` zGqyW~T)aG-rTXw;UF~{HcZ=yJ|4;yRr@MN*a30Qls#m)T6L4Xg8{&cOHH!@;nl9yD zQ4%mYuT89_vg`;|Mc6h|v2h`e%C8#WZL8MH!Z$q>Q_XSqkqu`sa%Tdm2Dx5J1^ zX3KrxF@hB_zMkf38YNn9ekoxz=6#MFc^cPjxz|S{2hypEToBPK=qn>8f!T>}LbLwZ z#5Un`w?Kic2(aAx=-1^}WTSshiI< zg$$kjL$A)bU`#LX{Fyb#y%f~mZTj7v!G7E4uuwy=;N_5zcc$4yt74gOvY?HBF}D;o z^98r&@_V}x&dI9y+x>~`a}$OA8w1r2LQU{P$DPzq1Z3=v*!e$Y2)^ERBs2h0jy=MA zhUbH_1zLzGc!AzGg$e(p-EzAn9p8++vMXK0ykN*l`^U1LfO5$U6pNF7Uwj_VGQv zG+UjWBJejQe*4IjyQ=cl)>K#DW1X|?159Q_TYp5?h?&9*SUR^_&Rk=d>TYD@rI&?s zlJCNU^X!Z0YHTzli_^F{5F=8b%46E_CP-qHZ*&rB zJ@9z5(){Swntk`rhvqX087=)!YI_Ru#`#HND8!%S!eO%TbP^)mz-SBeuB!5;MuB4CtranouK$vdY5kG(YTx;*%%!OQ# zL=%4dCfLSAAA(V6uB|jag!oJ94cNpxSa)QlMC)1gY3yL%o$}fqS9684-iEaJOdc`) zTI=|dGt+DGZVg<)P zWwqgazpE5|^|IxI%3D{A3`{;Ut23Y!KUT#lYO6Rwwb@Q}u~Zk`^hGc_PjP*jgS=bg z#0HsBL|*AG+nyY}MV5L@so(v~(Q)U_R&BPY;|}T|>{v8vejgd;rwz%v_CMyuIU8I& zt1^}##!dlq;9mP>#EG{aAAa+kNTF638eyaF@&e|g*4t2A|KlLo>9QhwAj zA0|LlR2iK+_bXFg`X`y^MEgqX2_&#ZAp43cxN>#uoU z44M%sd}O6e`5o8nT;Vob>7Ja3zS171jdgx9)`g-4rDl3QRY7^C1mhwrjG#;4v%haa zNtutwh|_sDHqT$bN>O&6et5M5gWC?5kQF09#Sd3AT$(xgY=66feTRK9@&-@{BB<1K!7HqAEJ8jZemsuqoD%X?->_ zaO_X*OD|;OoozKdBGTIOq-Z>U8<0&8I*1&0abQAN$+toVMULa(J|7cH>_OOqfy34e zr_@kMRodz2Q{?mRyJs*ooJB1=Lgeq1@7`5rC{4Ho>a~_FwhxhFeq&4x`O)7~Gc7X>lv78>8inyx_ zIMyeD1QPccd8-E*4CxuvluV)w0{<7KoHQHlD&D?O*FT%+XN&Qq`U zqJybR@8x-YkK2sni*=K|Cq0xz22ZNQlE`ZIBHtgpg+iKW-k$4bv{czDqZK{b)ad*q zws9O1pyN`j1tH!{6vOA(04k0HMAS1jhv<3%UGtBRuOutiyY+ZvFawm0*|H^txhjeD%rPu1v4t^*)CxiGH~H>*Ad+m|msV7BV6~HGn}6;Vqz#X5 z$(bu(%^-6!6Ls2M4U_jC?O)_ae`7bQQ*-)~-|+r*el6LT8GWvlY0cFEgJovKa+IPe z=<|V@6p^G6^^b{pu*Of{>~wNe_lC-I_3lD%ohOK4)0y_wd<&%ZwaawYIvaNzww8ft zV83M*4;!~##gE6T0=BxIe7zQA-)sO%&j`CYaY zm!TQt=znVMl`H|@_b-x3)UhMV9V_*j8AwCSgD+M_PTJBE#fd$CX}s+`UOaJLKNrhQ zv@iekVCWI)bmFbVv-B2^g&3WNe9N?B z{8hyd!!6XJvxo4#!a_DX!-kBd7EUCexclG0euHOc&cWKej64I&6$dnYr)(mZW$puI zo%%Ca$oXoQ2njLOP`Mcg=&DZg?IUfGRBV#SDJWz==W*=j(73~oA-SY55u=hfgA@L zoyCa!<+kXb z;oKzADl5@aC3f+*%~;;HxiW}7p_bQLl#Tdru*3q{dGBPpFSW+(%g^}ThOToDtHQ`| zX^8c)0;;Dxm{7w)&YKJX7sfAKYDS!2hwr_0)ILOdg9P9?&wz?i)m%fP-SO%RIKGrPv#j1a3LPU$4o^q(6Ki!v{ z0S4TJ*Ow00@o@1p{N+uJp_htgQZ@x`U9w=4I>cSYt$)CS6 zICcYC6ItaLmX^Vo@2`IP=8eED0P#2mIAa*hzFSxlA{$1*0<_A64e~W*mN*0}@gNkY zg{97D^k}owGb)gR0*2!$|KS&j8eEDgTXfG+vf1x|748XCtgmt!$NhOt1wd0urLG2a zmRODz26af3Jg|vGJ>fv-ap)*#<2E<4+De-+yeE^mOAHI4Is^pdIoEIQ)5djBdEsx| z7TB$+ZTN7%v*c3|`$GkMe?YG20Zl@l0&bpSFj`F>QmOB!PKBJqrs{98nW;GdauVT| zQuiwS2T!nC{N|J1jbzJ1EPd{!wJR)+C~`fd{Pr4XOm!%umS`euuswDf!o+A^_tr4c zEHOL)Rkfe8;hM2q<3-FsLlOa+dZ-0I_&pt>Q|~}=0&$vJ^253*9aI#{t~-JV?Pe)} zB=Os@f){2*0=>Q>XfPLDxU0ec98#!V#wis@7zrpkhkM@dcv-6dM%-wl5gpUnHy8th z>~8hHnL2O*rV)Yg{J7V4f6y$?x|=aWpmBm=snKO;(?B%_@aUGYGis2m`shcI8lec6UEkL0oihFgS^K! z$jQ6GW}1QecO`6)N)U8EiBgc(-g)w&PI96Qx+g9N$owlmZd0RLR=*wcfa3}f zcs4He?`36M@F2MV@1_5j72pe3fAgQc0RDI3{&xlbzg>Y4TYsP}?grlXJL@ty-(~(m zy!@p}6NFtqjE#qMJkR2U+`fjPxM_f+#A1)4BewDf`V~dMU^fHw^h6;Pp!YM`Ojkt$ z4Ij+X*X1R+xQXr==sr<@zT3D0{5Lu0!SMH2D;;Fe7@)!9Ydf#J2^>^JDnR3q1*-G% zT(3dr3}l5#5O{g$YuV68#ygo%ku>*Ja*Mf!YF?XZ4TrS>8w3~LHQvUB4}@d7fUC(2 zOLNazjOYY12q=~3_H#k6({d-!P^REAUj!SMAxHv98?!--$L+kSVKJ5;3HWpj zbHS=9FW!JGPJ^7!GUfq?o(3QwC+~_eTrbU^>vK9y?_(uHj^8b-W|^S%Y(93iAkj^- za4>J-#%r$afZ!U%!mbD%Kzi^v7SHW1LEJF{`XIt|OtZIqyxXYst1TIew98zSIO~`1 zcSkd;zC3>|2qBge@u$d@5>9d(&yvM;Q(TNuSBV5n>T`mU$&8%r>@L7qz1ZTF16qR` zNB;iWgVki)IM5zy;UwzSHB_0211Qi>uep4M-;dKlDS$vP4>k`}v7}YIjeE_}cPWJ+ zB}+J9JBar{rXO-bn&l-=OYG*N%vRh+ck?Enp@7M+&<*Sndhq$o2JjrmL@55ljiR z5nqp0GQ_|VgA>W2il;i%5i9j!rOVDVIcv^<>t%zb8i`o!=gWFCS^ zV1Wwm{fv7BFW=#sqI#vxeGF9ySu86f7JvXElA;#!Phrc zq3&Yw;ewx`9O>5s8V?rE%}2@$-uZ07lcSM~rADgq!<86m10FI7;Id zF!QGok#9NZEqHVYqpmSqT~1*3EA6I!eh9yvc>F+#>Ex*USdjawG$X|kk(S-WU2?3& z;AG_jK&5*@^;4^#1XKUq^RyQ0A+SOMtYqpE&gRp1V07gS*9m4B7~Mt99Y)2MSfH>2 zM)y>IpMfiDt+5-*Z^E$wO8A8ygzGOX34;Ut@Uu^FK5*u^MHaRUSqsT-_4eaTTBH?D-z7~&)Vf1g2nzA36j=Q)dBb3n@Lb3l01Bk9)8 ze*aFBH^d6=yn+vEO}xme_nF(g-Yb~7#^t~MH+56kf(AeM>(x8<6#EjqY0;1;cbVx& z8ZsOV(gA&;qr$F8ZW+6gL9kXnjhW?SBtB?$1>P4qkVPTLj*Vmk)`P%otd>HB%!w}b z_%R5Yxhe)Zc7JXb7PV44V_mpo9xtc1*4;fMeGo}e@EDUuYt>Pjak@B-FI> zOigVAWv;e&Fre0?wlXx@9 zKWtPdN;|`%C({DlSw1yKI~D<6midX*--lc zQ}(fE$dH_AvX$)$7p0E&7ItZW(_LBn^hw{Bn>%cHW0kd(e;8da?TaGGKS#`@M4HR1 zVBMKEzeB~a_A4Tt*8C|BVL~XgW*bBixWiN46q*0g=KpGyFK$Hn zY$5V^cZzdnvV>$*S9a{_n6M%WIlHE?RVp5C3`c3k;tKbma$8M_sNq#17@; z;|*C>y=TcO)QC-~e^E`n8QoPDp5U9{aG+>UCt$BLcXxM&*@}!kLWZQXw;A_RvLL!h zvC#;@sJ$Cg?j3F1aqO$rdm`HK>f%k2bVsln!kvJ{xO9KXF`95Ma8YOkddrgmU37Ex zb}8TCYWF*y3;V0xIpcBBiVvcKCl$<(?vpdM;a_{X4tc8$E*Wu=`L=o;{&G1XDTCCS zoot8izx<2b!7(6DV>d;(ktpC&Rh;sizPQHY&Wy!XJ*~BOR@n=$^^DL>OoX+5HFtg? znqCWWIHUO4<0xR_5O}=%(8Y+vedlAMh2&P74?1Q~oHkgGgq`v92k}8X(b~HE!lFRN zr1KMwC3@xi#zRBAu|L`0p5DU#K(NfslVWj0t!P}(SeNS&pIQw&l(a@{SiA3Pccxr| zBR)~E8mXyGlF#U|q%ICoyVqsIdC$UapHZy@Np8&IB)dboH4#_>!@)KJ6rPZeaU;-R z*d0m=ttbz&D%U{KK%2MVfMkdTK}3O*Wm|^O7V=aVdd|c~G#yjU1(u!7v4xyUOZ+lQ zYj!p{33<9S*3CbMoD_iprgf0V`?w;Sg;kA846H_|{xf z|7dM_(HZZ;awU7QSuH3yD3QVLvfwdrvs@WcbgKLUU3U#o+mBC5?mYNif9;b0WK>TK zrjuy|zccHV|8bx3pJE(@_(g?ze>w6kV5&-Mj_8wH>pTG{51WbO@uH=Q{O4Sw zRE}50Qmb*d{1q)6*+@vQV;Bt@+aCdH*-D8SJE2a2s3UkKum80<5AGCZM|*_3ULlI5$3({&8l)$sJ5)25&d1?0aT1PDClRO_8JU`M^O1 zZhnCBK2gAUopHM_Pi-Hgp4j#TLGQ$+HoEpQ+RSvl8Wbl>zUdePLaiNYlwN7J4&4~f ztFZPBwsf~0=R7W48^w}$^&~xV{n3RzmjZK7@2KLX#fk8GXHSwXprMIEu+o(As;^%A z(?yX{*$p2KCu}P1d(B~AAsgCjiif|xEhnGMAC=#FxHY?(TnluI(qEET>ym3b)@bXrr)A2qt2DEuzmae7# z$c)R72=?Q)%wUSNUR<#O3tGVAC*-UU3KeX$7?tZ*Gt7gvpr&qf+mjYw4P_qm>Dvl- z&WjukY8={mwn_3dLM@G&D?j;YpOmO>J*V1yLS!b_`^5%?GNm_uBsI=2uST^_e|xer z91pd}u71F?a%SRsp*kRXNQkV4g`XXvt;jDfPq-a1*c%o;{XSPnm;tM%9ltM^{wnl& z!xi&L-3Q7mE5IU@IKXeZvbJSW%a6F30h@buc~C@nXHZ%Yw#J@mBKbN=1d})?ME>?e#&Zn015{LOq!+FCM zH!BQu>4rr?q$3n9v4;@+x#}Gu0p373zLywiULc;@wcy(hB(rLyxXUqc*8y z60VEU40bM~qdBJg?=a3gdNnanCe=8OnVQ}d8G=;mOb3i7lUUN=boK^M3Uw)InGWse z{%!YZfiz@(nbSYGr9v{Ni59DMFfBW%|tHeLEy7C5AKUziNp3 z-!Ci;()=Vj60C44z|{{cN%}jfL4IcIJ5wd;U0-D=3~TF3_l?B!r7T)7TdR()4_Q0(5H70ZZ@g6qq~K&&e<0Rkm6#B)K@kFC_JieEU*0M@0lmQJP>;G=pRqr2Ld z#Oc}ZwD`DqBR*}wb$*9X;9w_1D*V?a=%DdPSf$wqp6=VVy!d9Jmz&27Jz&_lvL+v$ z(psW?XoA(N+HyrZ>AFS5Yuq&$&D87no8I*G!eox*j^4rWVp%`gN10~p?1j9Wea=pK z!atJwF%2eFW)t(vRbP-mluAhQL@FGyYMLH|CaTpNxbNdw4tq^F zC7dEu#p(SgMSQNdY*loPe3p&tGe=j9FULam5L~rT=Nyt^Zxgii0-sfFqH}|3Hc`La z!V-l@@KvgZf=&Hlcb@6^boIg67u)5~z;6uDc@YQKvOKeyFiN%2STIzdmTlNeI#BA%Wyq>}v_t0cXia;A zW2RPdC%Q;Jb#pwmb#ES)2A=nxmEI7a4fj?5^Hk8=B96mhsf|dubA5E_Uj0fiCuYeT z_4DxHG}TpaZ*O5(WSBNJkwt3pIZE;EDEiLw+`Q`fR`HB;SH7+L#rXC8y-`ZvRfg0m ztaV|BY}DTDR|!xg-{0zjmc4{)8>;g&QWZ8^Ra#T^W#7D9)n7ehXEx3w7;vTfep!sT zJR?)dSiNvFMWS)4ra`gFFEnQ z9USA7f}}Sc>IF&iBttlb3|501_aZAud|d}~eaYEkl-_l}1f1*u~8zA~q- z2`wM`B{KY|!3?Ne5Me=nLK@~BZKs3`Gt-4HGo|C7lQ5g3;kj6f=kP%Xj8Ki$*2R$1 z>0e`a&qWDcxDVpWvFtMDt}BYeCQ0B#^p0nrHFc8F0!iGPPY?C`iOeJ8F3*WhwYQWK zrd>ClwMo*y`nu;);~5J3F4S2{R&%|YR#p}>L7Oe)M$b;#4j_Q7Z2(Ta-ml%QHvS`w z3a9Yd8k4*$cRkhB{~<9EeNNO)0*TVea2w8w8cq2jW{DZy8uJRd`>`=g$}^9sM+y2{IP7lGw#!zD zdEMcIHIJDV^Fe#Dz2+@1`dP*wwN{1Ui91rGTb^qD&)&ANg^SV25a2AZ8wm&$lXxZH zq}IR&t$f{c z>!H)7bDqm57Ngu9X-F|%M002Ev$bjwwthTWug;<05wX4Tjsbjz+NFJXQOt}TwaCrq zRV~%kRXb88`1`h5+q{Evg7TnPF$|I;nYGS&d)ACLJ(M01Vq$xeMg8ZlVz$)%y#Ca+ z6y=7WeGdwFlFmp|DV3Pd|8gu{rE(}i(+vtLZL1FxgiS5i3(b$^77jWmKhmo(@61qb zJm)#@Y#@y6(fr*`a#^IEpv&#u+wd)UImEWb*{DW5FV-~oqS<60JiouOh1jxHjx#59 zHn?4XSA6}NbylpL!aq)(?|ijaa^P2wthr0jbQbxljo@uP6Wlw0<+n+c(q5UM{pS8# zjloLq{T)|_6&W%;YVrS=-I0goOb5YgjWaDJ$PhRI9kyVmyrT^}9JA-*yJ7257v#v1 zcz1tT{#irI-z<`RivY#xs(Y8!)uoAF+Fe(zP7|JIwFNj0Llb>ZT?2VjU7$g&+0|oZ znxiecdAlbZXvW{grP|#{ZKIt&+*N&Q^rIb&N0SZ(eU$-UQ5plz!HT06(IYVfr~70G zu@54Z&ZQDDpDIDAe4b-j^h~)s99|UWG<0scnI-<~EFg=D>*OAS3zp4T zf<}+b=uIDIHZP&6Mp>m?lFoZN>9KK55rCagIiEmLDJcPaalXK(-K(GOy-OI>lsQwO z`{-r}h`!w2GiRg*3NM(5hYQyBd^Rm$_ahEVTYe;xD|6MHaER_TsH>@sV7k`H5k;I0 za<`J;fKGF|wu-TG?|=3rbrl+a*mvnys3qca-0y0=U{ZCyW&h5e5bEv~E`v`NrC+wa zX%NkC^!VW%;l96Oy-KoDrn&nr@AMxSm!sOkq>J|2^EQYh8tcz?q6%t zs$M!#1shK_lA*-``*|Osoz02PkLKOzEAwyrsh{vQ$O(7VR~bs&&|XM%N3^tm>pab! zf86xr!a6Jfe%~Q}W9B5mD#$q?0!HmMM2?R<8)!^9x1OIoCqFwlnoKd7nwX5wEXlE< zF6^x4xL&Z$!T+&f@)CJ*SAXka^hPE*JXB%mzc+_{CA@pb$)MYP*c*+3Kgsep%BHDX zPMg+?E%7C(+U^_% z`n}rd(nIXGYT85hbD{y@{9a3Z%w=8GqG`Bz?%i-@6qI;byuqMwV&I$lfK9EvzHBO< z{#DP$oyVuXO(Pk9gFri#1L3RUIJL6XihDOb!aFpvPIR5_LoT?qyx+IdnjE0!xSCPW z5Ld@|PPOlr4=uc9d9G{}ekmoa69$d-ajQX<)9)$Hi1E^!ebIKTHkQ2MKZU#etR;=| zTYu&Y1#?IZyFM#@qxhq$EsJ>9DDH79P;ZxU!bUTiVE!JbL+Y& zY%(2Z)wL+?AMwo5EYcVj?{~C3m)2`BndDqcst1kO6?QDg1F+lVHl!wH8e6EiNIP-a z;uAy7eyF9g97K!>K>uLhoNuktjdWeyk3KrPNbl?(>5je}A2b_iFgQZSV(#1tn9w)sw7bObBrornR591m1e^Xs^@AKK-f}`Ugzvke4_g zw!P|k1x_%gU^P#PK}{yyCF8;K2SWraYY9(y;q7I9fV>xQ1G^IvSEi?&-jPiE0aEip z+=n2kj^zQ4B+XY)oCZuHlIl$l(Ig<{0d;&M;`Ni|eJe4Lc7r6e4yLT~ zZ(=P_h^_{iA>YG)pe9gz0N~E`j}uS|nT0bk|Mw^0w^ltr&{rn-)BpT`Pf+OC)r}?r zcld8n&s%U5nk&n^{Gb0X3JUEf#oc)SE(szo2YMxV?lp|@Z~qU*`XkA6V)@hm5PRtm zEZ3;WztqO@Nf6aOSRP=+Ib3z-gK|rOMI)WkF3w(N(bu|LZ1`jW$2rq!wDvZdd64B4+B!}Nzjx+ z)YR0Lt7>*k(VMXlt6Z~VDnZs)cd4Sh&Ni5rHoqwt6L-m8H`|j-^*Az{`Pl-mcwqWX zVjC>mI~e&Wee6>sg8}P#g98V1j@(iTLVs!Ery6zT)~cU^rFDbuGb+l>-O5uK&XV~q za=(%O8ZlZ1?Ma6VrM;uBX;CUqnq^9oqAEX@nb=Q@JRtDjzV}SZv&LG7OP`c$Anjs_ zeKP4Yf&B2za~b{*U*J)1C;;uT9^oaov>skTjwbZy%8T`$k1c zfq@B5C+b}zdjGjsex2YpBLB}^>fl-2bUiB(nsU!QpxyES+O0%&)>t+!P~$n2C}5!j zWzUcs-WruvyhqZl=fJe*ZQJU~ei*7AM^XQ}Vgg5We796foR|AyVC)%aXz5_S##!k; z!*Ah?2HGw7;HR9g9llMg24WB~chE|hFfb)Fg(+jKhbY248l zFOZ&MP!Kgcl@o0+*5EvKjM9>}s<0lnbgvp@J6Jm1#}qp(cNzCMgo!wu?_9g$E062a z%LSg3?{67$A&;sH?!S&K&|IZLoY0ywxuaIyCVw&35Pv}c>b74;<*V5gph+e;H7dj+6O#ZE>^?$WZ7RgC9DEB`-k2 z&y*vUa?C)7>@bTN(*B%x$V!!AAaG|bnkbD0adhMs^(H51Q0I4X(o;%asKrYv?6Mmb zRy)Tz?KAfQA+tG=PuqCB9VZU!kHbg>buTCKm~H#1=FH+To8~Jq0M+LS#AOG$gyKNo z)YDVs^_J1=?C-Hbz0ob`tpHD95iqVwg}9|#cO2Po$X>$JgBQzgjtLNzEQ=MZi~?XVpaTLTi>>qL)$4b#r$d3&i!vPSD8{~ zM-9#+wQEZbM;&D6@TM%O+ZH-&59-eg@>Xs-%KrV@xQnt~0;r{%I!#hDU8AKEb*$mR z@^!Eamm4LO8sMAhnQ%ivPMgd+AxFRW`rs4;FnlD z-FlnOyKA93pz5V8c<{-(|5TB(VXVpCgyd1UeVBf|m-`ycrdW5%j|;P&2e3!);p(cf zzkX$a?rs>RmmFj4zRg|^yNR%b%x@%p$iWqd!K2Bu(c5B)jqfWC4`z7=2n}{bT8a&Y zHzu#tkyU0^L;RNyeL;DxXB)>?aXk3pauZxn0aO}3j!ad_c&sSxT}IDiBys)#5aH8N zGQYPSjlM^^yt36O%xoF1Nd0gMTLgg9Ww;bqHCwUw)m2pSDY&taIXlxmp3u8d&q$&|Vz$O8Sffu^|D)h-3v`iJ%d$AKPPW#>GOf#5_gH;CqVbr7;MtZ<u)BZ?+K-abpXemX5Ph7nXR zceiXn#eq@w*yM((2i}RaT=jszC%Y30Ql2e}Uu$sF6$QaP^b@nK;~gfq5+=|lJc?}I zd(a;Jdnvq+fo3HIl}YNY?5EkJ^Jeq|v*VY#;pY|MfbEZ~EU(EZAaoB3kdWNprVY=B@ZALu5 z^_n-=0$w$4JVQP0!(?+_9O~t=@%^H|lLSKiH^T4o^eK^MaMrJMzgxu0o0IVltHt4W z-3bi>6L};Ul_Z7^s(v{v+x>OE&y`4{bhDbQ(KQWJ@iSK=DXXyp%X6mHss?6TJb);x-KI{QDF`L;Seh`Y zI1MCNte^-7T~|Yc2Miv)Xn$R6vrw--PkH*8$KXv%`xcViP%oL*eMb){#D0Z226n$- z>%T%J)oN3FkBN_dzZE2O7MNxMH(cd1vL3iljv*Kj>rK%tT3Jwr<&SlX1SaZ^U^7d6 zl}W=Waqlk&ejrkeh3iwE3m3)mHq?C=xf*Cx!?bfBD1^VdcqP(HNVId_zBH2IB9WSu zx@!%swf`<|n9VGtbd)-e$POXDRS4r(*LQlfl%OhZUmD#T9n~oq?XFrjKeBx=F!Cq3 zbl}L+gWa%7*#~11W#HZ2!StnUt$1|nF?G`!lZA1>F<5wc*%`X36msvYPrG{$g0E;n z3@PDpX)G%iO}+uQ7H>qonNXGop&uZ01WNvTW5OWy=317P?#(HFflhX6So9*ln$&=; zR)pEi1Zw&M?8#vW;`mwFFGI=@J05XTZ@^gmOZ{EwVIf}jJJnbVc zy5&v<(Wt&(-+sDSyl$*3NchurID574d@xnzpDRaS4Lij?>TKc?*tKQ}pjy>I?9^zY*P{=hqTd+$5$Ahmdo zeAgWc@zQt`4mofLDAJ#{)pC9hw-TTb?se*EiH?Ho?g_)eE8TpqHo>|`U47WblwUn+ zW)5#cRqho4!MY;9QsT02>U6Y~kkNL_1ZOpGgU?V`~GPJgZ5O{o+RJFRn6yE zE|!Y_Z6oP=^*Z4OP7D5!e=l={<}u{7a4Z#ZADqifhA&s|QN0~q8{mny7{lu^DXjNR0If0uq_22Q zUyY1QlkdgD-ylZ#gg52G1kW1G3dgs)s|9kYSCBgMe)>DHavfuDcL0!=MWVD`b%Yig zQ9~%0Ii~6v@qGI5Zff;PHgG_MfxOFgn$=iAuU7UB^ZM|)U#d=F%=%?B9fwe2$4!B`Wv_1E)&AO) z#GCPDMjWEhy)6Ub+~}}%yC0$K%@T@xJ2{>BhP-kOp!|9^pR(gI+Luu`K{z?;88SDw z1$v`*1%0STCaQ@!wYd2hmN+oVKR?MHJ*T-e8@OwUIMRZ+iXFvVYvb-YbPgE4(f(b3 zn>xmORqrK&_QcP)E!A`E(#CvfjO9#t)+?lsC%Sfa`UszyniAtS-a(e=S^Ld(wEo$0f(znzZ+fTcX?DplT%pT>$;claJ zt(lVr>_b$Ef3VM)js;WekUsV&POb!JAye5Prp#%Tk<~%XO z#3EvLYuOm2cNcHM*S~YmSZ)X1z(%q}v=OQbC8grX9XHkfN4=)qZh9s4U!;qu z%g^(D?h^~X!i0CM((m(*8k<`?{UkszCk&(|OlP&7IicNBdB3|ySCG7{fZ zU+d!|@r(Jp0MYNwJ24-HHXY?IRp_J;ob`t$3dP7Dj%{gdYKRMNPY^aYp^E1hRvlj# zy5Ygh6%id$>%c18(U&B8Vgk^?EB6%kR1svl87B|qsBo|u)uJ-SytUdF_Z;DIOS23x zTNJw$Vn0so|9pb|pA&1^r}|nG18=5gEHWSXOU>k4@n>WMK?KxVKu`pE0gvnFh^7Cj zn?rv!AHtd(LaL3lqOo@w&3(=mZ6pvopB4S>l!-!~tAwn^ir5K6-o`THNI{eO^~(dx zRHvvzn42A)M?1fOCG!JNNaPmiLN!~@Eq6d-3Xb8LI?Y_uQ< z!`*CkxN6P$5D7{NP59f~wPLsi_;JA>Tt-?=It~v5?wK3UDMQHzm1U8jPT)V(T={x~ zq_SEB)CtT%<%pY&?E9s|ib`bG%12wmCgbiuk0?{yz+LI^wrA?qT`-<-Hz*=F`AG_M zTU709W0u!{rtVC7ALW#xr^tlr{ubB8?-VSu&0d_Im9R$6Re{K@g+e!;*PAQN$OmcN^> zDo=fR9$pS9mY`7qL;RCrT*zxnt`&^aa=~CALjCI}J|s%(ZCSnlMWuKiWE@(i&9;zl zKhz-Y^M+3iYGK}KsA`WQtam^t5 zrarAS3hkMNvf3dnKKv4ap@{s8uKB(4tahi1l7R1RCFt}+`Gb6t%Qn27dH(b}l z|3VWGARlRzl&nsW5cptN{+S((AkH-_&G!!ZyWQXJ47eE+IFx;&hK@Mbl;eNMR?%^m zUj5F-jJkY|xqVk(q2Jox0(swkJc{#!{b~@eapq6B_aXK-jU05rWPCH#>jfHCr^_ul zQ@M3Xl~ago(ymozBG|W&wZh->vGNKj7uJZd z6(cT*>0&Zx8$Jx=H%~9=ux( z+^g?Xp4J6Z*@vLSLeNqZ;*P6bzswZrwaR_g*s}f4Oe9--|IdIUH-*=#*nOTGZ~Gp+ z$#U+?-^I_*YHL302c|C}XNP)o>kZ}kH%c=XZe3mGmu>lYo3%?ji`wT+<+sg1%Op4D zUOi94!)c~c!$KhRR2&{=P8FW4{T|K|IyHC!`) z>i+g}IR}iJw@hjltLrP%GcPR7-X^XZ$F<=;Po>QNDEUb1?6f^1okhTmWybyF#LQr+ zxYFJ$J9i|TO>cQ4*a2EOoFlXOiAMLazKNO4>?au49Xufv&({Skz`CYr>o33mDc*9U zU+j93n_G)st;y%ElTbBguT=yd)ZhtRHs%q>xcB4MZ&R0MU-5}!$(mOH3W9q}fazpf z>*D$;N7KSi%zb_BSY@WB9B_Cj$YFoMm5lOvFZN1&-4#0R3hTG`cV4pOdY`e?DxV!a ze*@5spE#yiF711%eSZ3>v*!eCHy>*PmIuH!-z!YF&z||vOz%&0*}7Qm-{o(oD5p6I zJ!}6Mz#J69a!KGq_JyLE)ytmhhn>0Oy{72}&&HqKqW;PJW=qvJEvNEQKGiF<)< zti7U|D2J&rUI%8}n8j+!NDc*Nk~p9T?sZK;)t?P?)w;mRg78sTkd+Dgz!CR+S1(lk zra)I+UvZKH+5Z9?fK6`T2?weu`X9^!x@t67M}u`VwT>17qh%{f**IFZj#gQtErHPn z)(B}}U3;OX#K6Ea)zif>WV8)C+BF#M8jSY1Mmw^j;{&7P1Ea$(qoY|k2MHhie|vo+ zCj$e+p$-4{e`8(lcav2yno~f*p@D&siG@Sp1j~^qx3?V-4n%Z+L&Wd3I!J01CUACa z?YSig)&SL6$GGuZbfgg690dmjVS#MPt;!%nVWNMSlHP9Lqyn=GDD0r7aLp*o9Z8MA z57r~M@>4yb?g43S^l?~|dZiCxj)1}k?vAbZPfbF0=FEnO(<@{JVRiv6eIPE7y??4I zvNMesH%<$*Rzh;Y0eyvQb=uy@&P-uS(p_BUg6x8Lhc$o0rXf3XG@w{GMgxk81zU + +## About + +HMTL is a Hierarchical Multi-Task Learning model which combine a set of four carefully selected semantic tasks (namely Named Entity Recoginition, Entity Mention Detection, Relation Extraction and Coreference Resolution). The model achieves state-of-the-art results on Named Entity Recognition, Entity Mention Detection and Relation Extraction. Using [SentEval](https://github.com/facebookresearch/SentEval), we show that as we move from the bottom to the top layers of the model, the model tend to learn more complex semantic representation. + +For more details, we refer to our AAAI paper (LINK Arxiv). + +We release here the code for _training_, _fine tuning_ and _evaluating_ HMTL. We hope that this code will be useful for building your own Multi-Task models (hierarchical or not). The code is written in __Python__ and powered by __Pytorch__. + +## Dependecies and installation + +The main dependencies are: +- [AllenNLP](https://github.com/allenai/allennlp) +- [PyTorch](https://pytorch.org/) +- [SentEval](https://github.com/facebookresearch/SentEval) (only for evaluating the embeddings) + +The code works with __Python 3.6__. A stable version of the dependencies is listed in `requirements.txt`. + +You can quickly setup a working environment by calling the script `./script/machine_setup.sh`. It installs Python 3.6, create a clean virtual environment, and install all the required dependencies (listed in `requirements.txt`). Please adapt the script depending on your needs. + +## Example usage + +We base our implementation on the [AllenNLP library](https://github.com/allenai/allennlp). For an introduction to this library, you should check [these tutorials](https://allennlp.org/tutorials). + +An experiment is described in a _json_ configuration file (see `configs/*.json` for examples). The configuration file mainly describes the datasets to load, the model to create along with all the hyper-parameters of the model. + +Once you have set up your configuration file (and defined custom classes if needed), you can simply launch a training with the following command and arguments: + +```bash +python train.py --config_file_path configs/hmtl_coref_conll.json --serialization_dir my_first_training +``` + +Once the training has started, you can simply follow the training in the terminal or open a [Tensorboard](https://www.tensorflow.org/guide/summaries_and_tensorboard): + +```bash +tensorboard --logdir my_first_training/log +``` + +## Evaluating the embeddings with SentEval + +We used [SentEval](https://github.com/facebookresearch/SentEval) to assess the linguistic properties learned by the model. `hmtl_senteval.py` gives an example of how we can create an interface between SentEval and HMTL. + +## Data + +To download the pre-trained embeddings we used in HMTL, you can simply call the script `./script/data_setup.sh`. + +We do not attached the datasets used to train HMTL for licensing reasons, but we invite you to collect them by yourself: [OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19), [CoNLL2003](https://www.clips.uantwerpen.be/conll2003/ner/), and [ACE2005](https://catalog.ldc.upenn.edu/LDC2006T06). The configuration files expect the datasets to be placed in the `data/` folder. + +## References + +``` +@article{ +} +``` diff --git a/configs/coref_ace.json b/configs/coref_ace.json new file mode 100644 index 0000000..d3e5326 --- /dev/null +++ b/configs/coref_ace.json @@ -0,0 +1,150 @@ +{ + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref_ace", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/ace2005/single_file_train_rahman.gold_conll", + "validation_data_path": "./data/ace2005/single_file_dev_rahman.gold_conll", + "test_data_path": "./data/ace2005/single_file_test_rahman.gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "coref_custom", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2008, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6044, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/coref_conll.json b/configs/coref_conll.json new file mode 100644 index 0000000..d538bac --- /dev/null +++ b/configs/coref_conll.json @@ -0,0 +1,150 @@ +{ + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/conll-2012_single_file/train.english.v4_gold_conll", + "validation_data_path": "./data/conll-2012_single_file/dev.english.v4_gold_conll", + "test_data_path": "./data/conll-2012_single_file/test.english.v4_gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "coref_custom", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2008, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6044, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/emd.json b/configs/emd.json new file mode 100644 index 0000000..a921499 --- /dev/null +++ b/configs/emd.json @@ -0,0 +1,120 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "lazy": false, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "ner", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} diff --git a/configs/emd_coref_ace.json b/configs/emd_coref_ace.json new file mode 100644 index 0000000..c5b829f --- /dev/null +++ b/configs/emd_coref_ace.json @@ -0,0 +1,200 @@ +{ + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref_ace", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/ace2005/single_file_train.gold_conll", + "validation_data_path": "./data/ace2005/single_file_dev.gold_conll", + "test_data_path": "./data/ace2005/single_file_test.gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "emd_coref", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2136, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6428, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/emd_relation.json b/configs/emd_relation.json new file mode 100644 index 0000000..ed3fa90 --- /dev/null +++ b/configs/emd_relation.json @@ -0,0 +1,173 @@ +{ + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_relation":{ + "task_description":{ + "task_name": "relation", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "relation_ace", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "emd_relation", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "relation": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 3, + "dropout": 0.2 + }, + "tagger": { + "d": 64, + "l": 64, + "n_classes": 6, + "activation": "relu" + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_relation": { + "type": "basic", + "batch_size": 4 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/hmtl_coref_ace.json b/configs/hmtl_coref_ace.json new file mode 100644 index 0000000..57a6fdf --- /dev/null +++ b/configs/hmtl_coref_ace.json @@ -0,0 +1,307 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_relation":{ + "task_description":{ + "task_name": "relation", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "relation_ace", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref_ace", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/ace2005/single_file_train_rahman.gold_conll", + "validation_data_path": "./data/ace2005/single_file_dev_rahman.gold_conll", + "test_data_path": "./data/ace2005/single_file_test_rahman.gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "hmtl", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "relation": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 3, + "dropout": 0.2 + }, + "tagger": { + "d": 64, + "l": 64, + "n_classes": 6, + "activation": "relu" + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2136, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6428, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_relation": { + "type": "basic", + "batch_size": 4 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/hmtl_coref_conll.json b/configs/hmtl_coref_conll.json new file mode 100644 index 0000000..8aa732b --- /dev/null +++ b/configs/hmtl_coref_conll.json @@ -0,0 +1,307 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_relation":{ + "task_description":{ + "task_name": "relation", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "relation_ace", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/conll-2012_single_file/train.english.v4_gold_conll", + "validation_data_path": "./data/conll-2012_single_file/dev.english.v4_gold_conll", + "test_data_path": "./data/conll-2012_single_file/test.english.v4_gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "hmtl", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "relation": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 3, + "dropout": 0.2 + }, + "tagger": { + "d": 64, + "l": 64, + "n_classes": 6, + "activation": "relu" + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2136, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6428, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_relation": { + "type": "basic", + "batch_size": 4 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/ner.json b/configs/ner.json new file mode 100644 index 0000000..d1655ea --- /dev/null +++ b/configs/ner.json @@ -0,0 +1,120 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "lazy": false, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + "model": { + "type": "ner", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} diff --git a/configs/ner_emd.json b/configs/ner_emd.json new file mode 100644 index 0000000..b5ee471 --- /dev/null +++ b/configs/ner_emd.json @@ -0,0 +1,172 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "lazy": false, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "lazy": false, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train", + "validation_data_path": "./data/ace2005/dev", + "test_data_path": "./data/ace2005/test", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "ner_emd", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/ner_emd_coref_ace.json b/configs/ner_emd_coref_ace.json new file mode 100644 index 0000000..b47df4f --- /dev/null +++ b/configs/ner_emd_coref_ace.json @@ -0,0 +1,252 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_coref":{ + "task_description":{ + "task_name": "coref", + "validation_metric_name": "coref_f1", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "coref_ace", + "max_span_width": 8, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path":"./data/ace2005/single_file_train.gold_conll", + "validation_data_path": "./data/ace2005/single_file_dev.gold_conll", + "test_data_path": "./data/ace2005/single_file_test.gold_conll", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "ner_emd_coref", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "coref": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 200, + "num_layers": 1, + "dropout": 0.2 + }, + "tagger": { + "mention_feedforward": { + "input_dim": 2136, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "antecedent_feedforward": { + "input_dim": 6428, + "num_layers": 2, + "hidden_dims": 150, + "activations": "relu", + "dropout": 0.3 + }, + "initializer": [ + [".*linear_layers.*weight", {"type": "xavier_normal"}], + [".*scorer._module.weight", {"type": "xavier_normal"}], + ["_distance_embedding.weight", {"type": "xavier_normal"}], + ["_span_width_embedding.weight", {"type": "xavier_normal"}], + ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}], + ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}] + ], + "lexical_dropout": 0.5, + "feature_size": 20, + "max_span_width": 8, + "spans_per_word": 0.4, + "max_antecedents": 70, + "eval_on_gold_mentions": false + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_coref": { + "type": "bucket", + "sorting_keys": [["text", "num_tokens"]], + "padding_noise": 0.0, + "batch_size": 1 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/ner_emd_relation.json b/configs/ner_emd_relation.json new file mode 100644 index 0000000..08d924c --- /dev/null +++ b/configs/ner_emd_relation.json @@ -0,0 +1,226 @@ +{ + "task_ner":{ + "task_description":{ + "task_name": "ner", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "ner_ontonotes", + "label_namespace": "ontonotes_ner_labels", + "coding_scheme": "BIOUL", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/conll-2012/v4/data/train/", + "validation_data_path": "./data/conll-2012/v4/data/development/", + "test_data_path": "./data/conll-2012/v4/data/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_emd":{ + "task_description":{ + "task_name": "emd", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "mention_ace", + "label_namespace": "ace_mention_labels", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "task_relation":{ + "task_description":{ + "task_name": "relation", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader": { + "type": "relation_ace", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "ner_emd_relation", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "ner": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ontonotes_ner_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "emd": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 2, + "dropout": 0.2 + }, + "tagger": { + "label_namespace": "ace_mention_labels", + "constraint_type": "BIOUL", + "dropout": 0.2 + } + }, + + "relation": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1316, + "hidden_size": 64, + "num_layers": 3, + "dropout": 0.2 + }, + + "tagger": { + "d": 64, + "l": 64, + "n_classes": 6, + "activation": "relu" + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_relation": { + "type": "basic", + "batch_size": 4 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/configs/relation.json b/configs/relation.json new file mode 100644 index 0000000..570dbf7 --- /dev/null +++ b/configs/relation.json @@ -0,0 +1,124 @@ +{ + "task_relation":{ + "task_description":{ + "task_name": "relation", + "validation_metric_name": "f1-measure-overall", + "validation_metric_decreases": false, + "evaluate_on_test": true + }, + + "data_params":{ + "dataset_reader":{ + "type": "relation_ace", + "lazy": false, + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": true + }, + "token_characters":{ + "type": "characters" + }, + "elmo": { + "type": "elmo_characters" + } + } + }, + + + "train_data_path": "./data/ace2005/train/", + "validation_data_path": "./data/ace2005/dev/", + "test_data_path": "./data/ace2005/test/", + + "datasets_for_vocab_creation": ["train"] + } + }, + + "model": { + "type": "relation", + + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "pretrained_file": "./data/glove/glove.6B.100d.txt.gz", + "embedding_dim": 100, + "trainable": true + }, + "elmo": { + "type": "elmo_token_embedder", + "options_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "./data/elmo/2x4096_512_2048cnn_2xhighway_weights.hdf5", + "do_layer_norm": false, + "dropout": 0, + "requires_grad": false + }, + "token_characters": { + "type": "character_encoding", + "embedding": { + "embedding_dim": 16 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 16, + "num_filters": 64, + "ngram_filter_sizes": [3] + }, + "dropout": 0.1 + } + } + }, + + "relation": { + "encoder": { + "type": "lstm", + "bidirectional": true, + "input_size": 1188, + "hidden_size": 64, + "num_layers": 3, + "dropout": 0.2 + }, + "tagger": { + "d": 64, + "l": 64, + "n_classes": 6, + "activation": "relu" + } + } + }, + + "iterators": { + "iterator": { + "type": "basic", + "batch_size": 32 + }, + "iterator_relation": { + "type": "basic", + "batch_size": 4 + } + }, + + "multi_task_trainer": { + "type": "sampler_multi_task_trainer", + "sampling_method": "proportional", + "patience": 10, + "num_epochs": 100, + "min_lr": "1e-7", + "grad_norm": 5.0, + "grad_clipping": 10.0, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 0.001 + }, + "scheduler": { + "type": "reduce_on_plateau", + "mode": "min", + "factor": 0.5, + "patience": 5, + "threshold": 0.0001, + "threshold_mode": "abs", + "verbose": true + } + } +} \ No newline at end of file diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 0000000..46e7748 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,203 @@ +# coding: utf-8 + +""" +The ``evaluate.py`` file can be used to +evaluate a trained model against a dataset +and report any metrics calculated by the model. +It requires a configuration file and a directory in +which to write the results. + +.. code-block:: bash + + $ python evaluate.py --help + usage: evaluate.py [-h] -s SERIALIZATION_DIR [-g] + + optional arguments: + -h, --help show this help message and exit + -s SERIALIZATION_DIR, --serialization_dir SERIALIZATION_DIR + Directory in which to save the model and its logs. + -g, --gold_mentions Whether or not evaluate using gold mentions in + coreference +""" + +import argparse +import os +import json +import itertools +import re +from copy import deepcopy +import tqdm +from typing import List, Dict, Any, Iterable +import torch + +from allennlp.models.model import Model +from allennlp.data import Instance +from allennlp.data.iterators import DataIterator +from allennlp.common.checks import check_for_gpu +from allennlp.common.params import Params +from allennlp.nn import util +from allennlp.data import Vocabulary + +from hmtl.tasks import Task +from hmtl.common import create_and_set_iterators + +import logging +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO) +logger = logging.getLogger(__name__) + + +def evaluate(model: Model, + instances: Iterable[Instance], + task_name: str, + data_iterator: DataIterator, + cuda_device: int) -> Dict[str, Any]: + """ + Evaluate a model for a particular task (usually after training). + + Parameters + ---------- + model : ``allennlp.models.model.Model``, required + The model to evaluate + instances : ``Iterable[Instance]``, required + The (usually test) dataset on which to evalute the model. + task_name : ``str``, required + The name of the task on which evaluate the model. + data_iterator : ``DataIterator`` + Iterator that go through the dataset. + cuda_device : ``int`` + Cuda device to use. + + Returns + ------- + metrics : ``Dict[str, Any]`` + A dictionary containing the metrics on the evaluated dataset. + """ + check_for_gpu(cuda_device) + with torch.no_grad(): + model.eval() + + iterator = data_iterator(instances, + num_epochs = 1, + shuffle = False) + logger.info("Iterating over dataset") + generator_tqdm = tqdm.tqdm(iterator, + total = data_iterator.get_num_batches(instances)) + + eval_loss = 0 + nb_batches = 0 + for batch in generator_tqdm: + batch = util.move_to_device(batch, cuda_device) + nb_batches += 1 + + eval_output_dict = model.forward(task_name = task_name, tensor_batch = batch) + loss = eval_output_dict["loss"] + eval_loss += loss.item() + metrics = model.get_metrics(task_name = task_name) + metrics["loss"] = float(eval_loss/nb_batches) + + description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||" + generator_tqdm.set_description(description, refresh = False) + + metrics = model.get_metrics(task_name = task_name, reset = True, full = True) + metrics["loss"] = float(eval_loss/nb_batches) + return metrics + + +if __name__ == "__main__": + ### Evaluate from args ### + + # Parse arguments + parser = argparse.ArgumentParser() + parser.add_argument("-s", + "--serialization_dir", + required = True, + help = "Directory in which to save the model and its logs.", + type = str) + parser.add_argument("-g", + "--gold_mentions", + action = "store_true", + required = False, + default = False, + help = "Whether or not evaluate using gold mentions in coreference") + args = parser.parse_args() + + + params = Params.from_file(params_file = os.path.join(args.serialization_dir, "config.json")) + + + ### Instantiate tasks ### + task_list = [] + task_keys = [key for key in params.keys() if re.search("^task_", key)] + + for key in task_keys: + logger.info("Creating %s", key) + task_params = params.pop(key) + task_description = task_params.pop("task_description") + task_data_params = task_params.pop("data_params") + + task = Task.from_params(params = task_description) + task_list.append(task) + + _, _ = task.load_data_from_params(params = task_data_params) + + + ### Load Vocabulary from files ### + vocab = Vocabulary.from_files(os.path.join(args.serialization_dir, "vocabulary")) + logger.info("Vocabulary loaded") + + + ### Load the data iterators ### + task_list = create_and_set_iterators(params = params, task_list = task_list, vocab = vocab) + + + ### Regularization ### + regularizer = None + + + ### Create model ### + model_params = params.pop("model") + model = Model.from_params(vocab = vocab, params = model_params, regularizer = regularizer) + + + ### Real evaluation ### + cuda_device = params.pop("multi_task_trainer").pop_int("cuda_device", -1) + + metrics = {task._name: {} for task in task_list} + for task in task_list: + if not task._evaluate_on_test: continue + + logger.info("Task %s will be evaluated using the best epoch weights.", task._name) + assert task._test_data is not None, "Task {} wants to be evaluated on test dataset but no there is no test data loaded.".format(task._name) + + logger.info("Loading the best epoch weights for task %s", task._name) + best_model_state_path = os.path.join(args.serialization_dir, "best_{}.th".format(task._name)) + best_model_state = torch.load(best_model_state_path) + best_model = model + best_model.load_state_dict(state_dict = best_model_state) + + test_metric_dict = {} + + for pair_task in task_list: + if not pair_task._evaluate_on_test: continue + + logger.info("Pair task %s is evaluated with the best model for %s", pair_task._name, task._name) + test_metric_dict[pair_task._name] = {} + test_metrics = evaluate(model = best_model, + task_name = pair_task._name, + instances = pair_task._test_data, + data_iterator = pair_task._data_iterator, + cuda_device = cuda_device) + + for metric_name, value in test_metrics.items(): + test_metric_dict[pair_task._name][metric_name] = value + + metrics[task._name]["test"] = deepcopy(test_metric_dict) + logger.info("Finished evaluation of task %s.", task._name) + + metrics_json = json.dumps(metrics, indent = 2) + with open(os.path.join(args.serialization_dir, "evaluate_metrics.json"), "w") as metrics_file: + metrics_file.write(metrics_json) + + logger.info("Metrics: %s", metrics_json) \ No newline at end of file diff --git a/fine_tune.py b/fine_tune.py new file mode 100644 index 0000000..8cb4964 --- /dev/null +++ b/fine_tune.py @@ -0,0 +1,157 @@ +# coding: utf-8 + +""" +The ``fine_tune.py`` file is used to continue training (or `fine-tune`) a model on a `different +dataset` than the one it was originally trained on. It requires a saved model archive file, a path +to the data you will continue training with, and a directory in which to write the results. + +. code-block:: bash + + $ python fine_tune.py --help + usage: fine_tune.py [-h] -s SERIALIZATION_DIR -c CONFIG_FILE_PATH -p + PRETRAINED_DIR -m PRETRAINED_MODEL_NAME + + optional arguments: + -h, --help show this help message and exit + -s SERIALIZATION_DIR, --serialization_dir SERIALIZATION_DIR + Directory in which to save the model and its logs. + -c CONFIG_FILE_PATH, --config_file_path CONFIG_FILE_PATH + Path to parameter file describing the new multi-tasked + model to be fine-tuned. + -p PRETRAINED_DIR, --pretrained_dir PRETRAINED_DIR + Directory in which was saved the pre-trained model. + -m PRETRAINED_MODEL_NAME, --pretrained_model_name PRETRAINED_MODEL_NAME + Name of the weight file for the pretrained model to + fine-tune in the ``pretrained_dir``. +""" + +import argparse +import itertools +import os +import json +import re +from copy import deepcopy +import torch +from typing import List, Dict, Any, Tuple +import logging +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO) + +from hmtl.tasks import Task +from hmtl.training.multi_task_trainer import MultiTaskTrainer +from hmtl.common import create_and_set_iterators +from evaluate import evaluate +from train import train_model + +from allennlp.models.model import Model +from allennlp.data import Vocabulary +from allennlp.data.iterators import DataIterator +from allennlp.commands.train import create_serialization_dir +from allennlp.common.params import Params +from allennlp.common.checks import ConfigurationError +from allennlp.nn import RegularizerApplicator + +logger = logging.getLogger(__name__) + + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser() + parser.add_argument("-s", + "--serialization_dir", + required = True, + help = "Directory in which to save the model and its logs.", + type = str) + parser.add_argument("-c", + "--config_file_path", + required = True, + help = "Path to parameter file describing the new multi-tasked model to be fine-tuned.", + type = str) + parser.add_argument("-p", + "--pretrained_dir", + required = True, + help = "Directory in which was saved the pre-trained model.", + type = str) + parser.add_argument("-m", + "--pretrained_model_name", + required = True, + help = "Name of the weight file for the pretrained model to fine-tune in the ``pretrained_dir``.", + type = str) + args = parser.parse_args() + + + params = Params.from_file(params_file = args.config_file_path) + serialization_dir = args.serialization_dir + create_serialization_dir(params, serialization_dir, False) + + serialization_params = deepcopy(params).as_dict(quiet=True) + with open(os.path.join(serialization_dir, "config.json"), "w") as param_file: + json.dump(serialization_params, param_file, indent = 4) + + + ### Instantiate tasks ### + task_list = [] + task_keys = [key for key in params.keys() if re.search("^task_", key)] + + for key in task_keys: + logger.info("Creating %s", key) + task_params = params.pop(key) + task_description = task_params.pop("task_description") + task_data_params = task_params.pop("data_params") + + task = Task.from_params(params = task_description) + task_list.append(task) + + _, _ = task.load_data_from_params(params = task_data_params) + + + ### Load Vocabulary from files and save it to the new serialization_dir ### + # PLEASE NOTE that here, we suppose that the vocabulary is the same for the pre-trained model + # and the model to fine-tune. The most noticeable implication of this hypothesis is that the label specs + # between the two datasets (for pre-training and for fine-tuning) are exactly the same. + vocab = Vocabulary.from_files(os.path.join(args.pretrained_dir, "vocabulary")) + logger.info("Vocabulary loaded from %s", os.path.join(args.pretrained_dir, "vocabulary")) + + vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) + logger.info("Save vocabulary to file %s", os.path.join(serialization_dir, "vocabulary")) + + + ### Load the data iterators for each task ### + task_list = create_and_set_iterators(params = params, task_list = task_list, vocab = vocab) + + + ### Load Regularizations ### + regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) + + + ### Create model ### + model_params = params.pop("model") + model = Model.from_params(vocab = vocab, params = model_params, regularizer = regularizer) + + + logger.info("Loading the pretrained model from %s", os.path.join(args.pretrained_dir, args.pretrained_model_name)) + try: + pretrained_model_state_path = os.path.join(args.pretrained_dir, args.pretrained_model_name) + pretrained_model_state = torch.load(pretrained_model_state_path) + model.load_state_dict(state_dict = pretrained_model_state) + except: + raise ConfigurationError("It appears that the configuration of the pretrained model and " + "the model to fine-tune are not compatible. " + "Please check the compatibility of the encoders and taggers in the " + "config files.") + + + ### Create multi-task trainer ### + multi_task_trainer_params = params.pop("multi_task_trainer") + trainer = MultiTaskTrainer.from_params(model = model, + task_list = task_list, + serialization_dir = serialization_dir, + params = multi_task_trainer_params) + + + ### Launch training ### + metrics = train_model(multi_task_trainer = trainer, + recover = False) + if metrics is not None: + logging.info("Fine-tuning is finished ! Let's have a drink. It's on the house !") \ No newline at end of file diff --git a/hmtl/__init__.py b/hmtl/__init__.py new file mode 100644 index 0000000..e0807c6 --- /dev/null +++ b/hmtl/__init__.py @@ -0,0 +1,7 @@ +# coding: utf-8 + +from hmtl.dataset_readers import * +from hmtl.modules import * +from hmtl.models import * +from hmtl.tasks import * +from hmtl.training import * \ No newline at end of file diff --git a/hmtl/common/__init__.py b/hmtl/common/__init__.py new file mode 100644 index 0000000..183c8db --- /dev/null +++ b/hmtl/common/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.common.util import create_and_set_iterators \ No newline at end of file diff --git a/hmtl/common/util.py b/hmtl/common/util.py new file mode 100644 index 0000000..dc16492 --- /dev/null +++ b/hmtl/common/util.py @@ -0,0 +1,53 @@ +# coding: utf-8 + +""" +Various utilities that don't fit anwhere else. +""" + +from typing import List, Dict, Any, Tuple + +from allennlp.common.params import Params +from allennlp.data import Vocabulary +from allennlp.data.iterators import DataIterator + +from hmtl.tasks import Task + + + +def create_and_set_iterators(params: Params, + task_list: List[Task], + vocab: Vocabulary) -> List[Task]: + ''' + Each task/dataset can have its own specific data iterator. If not precised, + we use a shared/common data iterator. + + Parameters + ---------- + params: ``Params`` + A parameter object specifing an experiment. + task_list: ``List[Task]`` + A list containing the tasks of the model to train. + + Returns + ------- + task_list: ``List[Task]`` + The list containing the tasks of the model to train, where each task has a new attribute: the data iterator. + ''' + ### Charge default iterator ### + iterators_params = params.pop("iterators") + + default_iterator_params = iterators_params.pop("iterator") + default_iterator = DataIterator.from_params(default_iterator_params) + default_iterator.index_with(vocab) + + ### Charge dataset specific iterators ### + for task in task_list: + specific_iterator_params = iterators_params.pop("iterator_" + task._name, None) + if specific_iterator_params is not None: + specific_iterator = DataIterator.from_params(specific_iterator_params) + specific_iterator.index_with(vocab) + task.set_data_iterator(specific_iterator) + else: + task.set_data_iterator(default_iterator) + + return task_list \ No newline at end of file diff --git a/hmtl/dataset_readers/__init__.py b/hmtl/dataset_readers/__init__.py new file mode 100644 index 0000000..fbf8504 --- /dev/null +++ b/hmtl/dataset_readers/__init__.py @@ -0,0 +1,6 @@ +# coding: utf-8 + +from hmtl.dataset_readers.ner_ontonotes import NerOntonotesReader +from hmtl.dataset_readers.mention_ace import MentionACEReader +from hmtl.dataset_readers.relation_ace import RelationACEReader +from hmtl.dataset_readers.coref_ace import CorefACEReader \ No newline at end of file diff --git a/hmtl/dataset_readers/coref_ace.py b/hmtl/dataset_readers/coref_ace.py new file mode 100644 index 0000000..054783f --- /dev/null +++ b/hmtl/dataset_readers/coref_ace.py @@ -0,0 +1,180 @@ +# coding: utf-8 + +import logging +import collections +from typing import Any, Dict, List, Optional, Tuple, DefaultDict, Set + +from overrides import overrides + +from allennlp.common import Params +from allennlp.common.file_utils import cached_path +from allennlp.data.dataset_readers.dataset_reader import DatasetReader +from allennlp.data.fields import Field, ListField, TextField, SpanField, MetadataField, SequenceLabelField +from allennlp.data.instance import Instance +from allennlp.data.tokenizers import Token +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer +from allennlp.data.dataset_readers.dataset_utils import enumerate_spans + +from hmtl.dataset_readers.dataset_utils import ACE + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def canonicalize_clusters(clusters: DefaultDict[int, List[Tuple[int, int]]]) -> List[List[Tuple[int, int]]]: + """ + The CoNLL 2012 data includes 2 annotatated spans which are identical, + but have different ids. This checks all clusters for spans which are + identical, and if it finds any, merges the clusters containing the + identical spans. + """ + merged_clusters: List[Set[Tuple[int, int]]] = [] + for cluster in clusters.values(): + cluster_with_overlapping_mention = None + for mention in cluster: + # Look at clusters we have already processed to + # see if they contain a mention in the current + # cluster for comparison. + for cluster2 in merged_clusters: + if mention in cluster2: + # first cluster in merged clusters + # which contains this mention. + cluster_with_overlapping_mention = cluster2 + break + # Already encountered overlap - no need to keep looking. + if cluster_with_overlapping_mention is not None: + break + if cluster_with_overlapping_mention is not None: + # Merge cluster we are currently processing into + # the cluster in the processed list. + cluster_with_overlapping_mention.update(cluster) + else: + merged_clusters.append(set(cluster)) + return [list(c) for c in merged_clusters] + + +@DatasetReader.register("coref_ace") +class CorefACEReader(DatasetReader): + """ + A dataset reader to read the coref clusters from an ACE dataset + previously pre-procesed to fit the CoNLL-coreference format. + + Parameters + ---------- + max_span_width: ``int``, required. + The maximum width of candidate spans to consider. + token_indexers : ``Dict[str, TokenIndexer]``, optional + This is used to index the words in the document. See :class:`TokenIndexer`. + Default is ``{"tokens": SingleIdTokenIndexer()}``. + lazy : ``bool``, optional (default = False) + Whether or not the dataset should be loaded in lazy way. + """ + def __init__(self, + max_span_width: int, + token_indexers: Dict[str, TokenIndexer] = None, + lazy: bool = False) -> None: + super().__init__(lazy) + self._max_span_width = max_span_width + self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} + + @overrides + def _read(self, file_path: str): + # if `file_path` is a URL, redirect to the cache + file_path = cached_path(file_path) + + ace_reader = ACE() + for sentences in ace_reader.dataset_document_iterator(file_path): + clusters: DefaultDict[int, List[Tuple[int, int]]] = collections.defaultdict(list) + + total_tokens = 0 + for sentence in sentences: + for typed_span in sentence.coref_spans: + # Coref annotations are on a _per sentence_ + # basis, so we need to adjust them to be relative + # to the length of the document. + span_id, (start, end) = typed_span + clusters[span_id].append((start + total_tokens, + end + total_tokens)) + total_tokens += len(sentence.words) + + canonical_clusters = canonicalize_clusters(clusters) + yield self.text_to_instance([s.words for s in sentences], canonical_clusters) + + @overrides + def text_to_instance(self, # type: ignore + sentences: List[List[str]], + gold_clusters: Optional[List[List[Tuple[int, int]]]] = None) -> Instance: + # pylint: disable=arguments-differ + """ + Parameters + ---------- + sentences : ``List[List[str]]``, required. + A list of lists representing the tokenised words and sentences in the document. + gold_clusters : ``Optional[List[List[Tuple[int, int]]]]``, optional (default = None) + A list of all clusters in the document, represented as word spans. Each cluster + contains some number of spans, which can be nested and overlap, but will never + exactly match between clusters. + + Returns + ------- + An ``Instance`` containing the following ``Fields``: + text : ``TextField`` + The text of the full document. + spans : ``ListField[SpanField]`` + A ListField containing the spans represented as ``SpanFields`` + with respect to the document text. + span_labels : ``SequenceLabelField``, optional + The id of the cluster which each possible span belongs to, or -1 if it does + not belong to a cluster. As these labels have variable length (it depends on + how many spans we are considering), we represent this a as a ``SequenceLabelField`` + with respect to the ``spans ``ListField``. + """ + flattened_sentences = [self._normalize_word(word) + for sentence in sentences + for word in sentence] + + metadata: Dict[str, Any] = {"original_text": flattened_sentences} + if gold_clusters is not None: + metadata["clusters"] = gold_clusters + + text_field = TextField([Token(word) for word in flattened_sentences], self._token_indexers) + + cluster_dict = {} + if gold_clusters is not None: + for cluster_id, cluster in enumerate(gold_clusters): + for mention in cluster: + cluster_dict[tuple(mention)] = cluster_id + + spans: List[Field] = [] + span_labels: Optional[List[int]] = [] if gold_clusters is not None else None + + sentence_offset = 0 + for sentence in sentences: + for start, end in enumerate_spans(sentence, + offset=sentence_offset, + max_span_width=self._max_span_width): + if span_labels is not None: + if (start, end) in cluster_dict: + span_labels.append(cluster_dict[(start, end)]) + else: + span_labels.append(-1) + + spans.append(SpanField(start, end, text_field)) + sentence_offset += len(sentence) + + span_field = ListField(spans) + metadata_field = MetadataField(metadata) + + fields: Dict[str, Field] = {"text": text_field, + "spans": span_field, + "metadata": metadata_field} + if span_labels is not None: + fields["span_labels"] = SequenceLabelField(span_labels, span_field) + + return Instance(fields) + + @staticmethod + def _normalize_word(word): + if word == "/." or word == "/?": + return word[1:] + else: + return word diff --git a/hmtl/dataset_readers/dataset_utils/__init__.py b/hmtl/dataset_readers/dataset_utils/__init__.py new file mode 100644 index 0000000..6610b37 --- /dev/null +++ b/hmtl/dataset_readers/dataset_utils/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.dataset_readers.dataset_utils.ace import ACE, ACESentence \ No newline at end of file diff --git a/hmtl/dataset_readers/dataset_utils/ace.py b/hmtl/dataset_readers/dataset_utils/ace.py new file mode 100644 index 0000000..b29003a --- /dev/null +++ b/hmtl/dataset_readers/dataset_utils/ace.py @@ -0,0 +1,282 @@ +# coding: utf-8 + +from typing import DefaultDict, List, Optional, Iterator, Set, Tuple +from collections import defaultdict +import codecs +import os +import logging + +from allennlp.data.dataset_readers.dataset_utils import iob1_to_bioul + +from nltk import Tree + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + +TypedSpan = Tuple[int, Tuple[int, int]] # pylint: disable=invalid-name +TypedStringSpan = Tuple[str, Tuple[int, int]] # pylint: disable=invalid-name + +class ACESentence: + """ + A class representing the annotations available for a single ACE CONLL-formatted sentence. + + Parameters + ---------- + words : ``List[str]`` + This is the tokens as segmented/tokenized with spayc. + mention_tags : ``List[str]`` + The BIO tags for Entity Mention Detection in the sentence. + relations : ``List[Tuple[str, List[str]]]`` + The relations tags for Relation Extraction in the sentence. + last_head_token_relations : ``List[Tuple[str, List[str]]]`` + The relations tags between last tokens for ARG1 and ARG2 for Relation Extraction in the sentence. + coref_spans : ``Set[TypedSpan]`` + The spans for entity mentions involved in coreference resolution within the sentence. + Each element is a tuple composed of (cluster_id, (start_index, end_index)). Indices + are `inclusive`. + """ + def __init__(self, + words: List[str], + mention_tags: List[str], + relations: List[Tuple[str, List[str]]], + last_head_token_relations: List[Tuple[str, List[str]]], + coref_spans: Set[TypedSpan]): + self.words = words + self.mention_tags = mention_tags + self.relations = relations + self.last_head_token_relations = last_head_token_relations + self.coref_spans = coref_spans + + +class ACE: + """ + This DatasetReader is designed to read in the ACE (2005 or 2004) which + have been previously formatted in the format used by the CoNLL format + (see for instance OntoNotes dataset). + """ + def dataset_iterator(self, file_path: str) -> Iterator[ACESentence]: + """ + An iterator over the entire dataset, yielding all sentences processed. + """ + for conll_file in self.dataset_path_iterator(file_path): + yield from self.sentence_iterator(conll_file) + + @staticmethod + def dataset_path_iterator(file_path: str) -> Iterator[str]: + """ + An iterator returning file_paths in a directory + containing CONLL-formatted files. + """ + logger.info("Reading ACE CONLL-like sentences from dataset files at: %s", file_path) + for root, _, files in list(os.walk(file_path)): + for data_file in files: + if not data_file.endswith("like_conll"): + continue + + yield os.path.join(root, data_file) + + def dataset_document_iterator(self, file_path: str) -> Iterator[List[ACESentence]]: + """ + An iterator over CONLL-formatted files which yields documents, regardless + of the number of document annotations in a particular file. + """ + with codecs.open(file_path, 'r', encoding='utf8') as open_file: + conll_rows = [] + document: List[ACESentence] = [] + for line in open_file: + line = line.strip() + if line != '' and not line.startswith('#'): + # Non-empty line. Collect the annotation. + conll_rows.append(line) + else: + if conll_rows: + document.append(self._conll_rows_to_sentence(conll_rows)) + conll_rows = [] + if line.startswith("#end document"): + yield document + document = [] + if document: + # Collect any stragglers or files which might not + # have the '#end document' format for the end of the file. + yield document + + def sentence_iterator(self, file_path: str) -> Iterator[ACESentence]: + """ + An iterator over the sentences in an individual CONLL formatted file. + """ + for document in self.dataset_document_iterator(file_path): + for sentence in document: + yield sentence + + def _conll_rows_to_sentence(self, conll_rows: List[str]) -> ACESentence: + sentence: List[str] = [] + mention_tags: List[str] = [] + + span_labels: List[List[str]] = [] + current_span_labels: List[str] = [] + + # Cluster id -> List of (start_index, end_index) spans. + clusters: DefaultDict[int, List[Tuple[int, int]]] = defaultdict(list) + # Cluster id -> List of start_indices which are open for this id. + coref_stacks: DefaultDict[int, List[int]] = defaultdict(list) + + for index, row in enumerate(conll_rows): + conll_components = row.split() + + word = conll_components[1] + + if not span_labels: + span_labels = [[] for _ in conll_components[2:-1]] + current_span_labels = [None for _ in conll_components[2:-1]] + self._process_span_annotations_for_word(annotations = conll_components[2:-1], + span_labels = span_labels, + current_span_labels = current_span_labels) + + #Process coref + self._process_coref_span_annotations_for_word(conll_components[-1], + index, + clusters, + coref_stacks) + + sentence.append(word) + + + mention_tags = iob1_to_bioul(span_labels[0]) + + #Process coref clusters + coref_span_tuples: Set[TypedSpan] = {(cluster_id, span) + for cluster_id, span_list in clusters.items() + for span in span_list} + + + #Reformat the labels to only keep the the last token of the head + #Cf paper, we model relation between last tokens of heads. + last_head_token_relations = [] + bioul_relations = [] + + for relation_frame in span_labels[1:]: + bioul_relation_frame = iob1_to_bioul(relation_frame) + + reformatted_frame = [] + for annotation in bioul_relation_frame: + if annotation[:2] in ["L-", "U-"]: + reformatted_frame.append(annotation[2:]) + else: + reformatted_frame.append("*") + + last_head_token_relations.append(reformatted_frame) + bioul_relations.append(bioul_relation_frame) + + return ACESentence(sentence, mention_tags, bioul_relations, last_head_token_relations, coref_span_tuples) + + + @staticmethod + def _process_mention_tags(annotations: List[str]): + """ + Read and pre-process the entity mention tags as a formatted in CoNll-NER-style. + """ + labels = [] + current_span_label = None + for annotation in annotations: + label = annotation.strip("()*") + if "(" in annotation: + bio_label = "B-" + label + current_span_label = label + elif current_span_label is not None: + bio_label = "I-" + current_span_label + else: + bio_label = "O" + if ")" in annotation: + current_span_label = None + labels.append(bio_label) + return labels + + @staticmethod + def _process_span_annotations_for_word(annotations: List[str], + span_labels: List[List[str]], + current_span_labels: List[Optional[str]]) -> None: + """ + Given a sequence of different label types for a single word and the current + span label we are inside, compute the BIO tag for each label and append to a list. + + Parameters + ---------- + annotations: ``List[str]`` + A list of labels to compute BIO tags for. + span_labels : ``List[List[str]]`` + A list of lists, one for each annotation, to incrementally collect + the BIO tags for a sequence. + current_span_labels : ``List[Optional[str]]`` + The currently open span per annotation type, or ``None`` if there is no open span. + """ + for annotation_index, annotation in enumerate(annotations): + # strip all bracketing information to + # get the actual propbank label. + label = annotation.strip("()*") + + if "(" in annotation: + # Entering into a span for a particular semantic role label. + # We append the label and set the current span for this annotation. + bio_label = "B-" + label + span_labels[annotation_index].append(bio_label) + current_span_labels[annotation_index] = label + elif current_span_labels[annotation_index] is not None: + # If there's no '(' token, but the current_span_label is not None, + # then we are inside a span. + bio_label = "I-" + current_span_labels[annotation_index] + span_labels[annotation_index].append(bio_label) + else: + # We're outside a span. + span_labels[annotation_index].append("O") + # Exiting a span, so we reset the current span label for this annotation. + if ")" in annotation: + current_span_labels[annotation_index] = None + + + @staticmethod + def _process_coref_span_annotations_for_word(label: str, + word_index: int, + clusters: DefaultDict[int, List[Tuple[int, int]]], + coref_stacks: DefaultDict[int, List[int]]) -> None: + """ + For a given coref label, add it to a currently open span(s), complete a span(s) or + ignore it, if it is outside of all spans. This method mutates the clusters and coref_stacks + dictionaries. + + Parameters + ---------- + label : ``str`` + The coref label for this word. + word_index : ``int`` + The word index into the sentence. + clusters : ``DefaultDict[int, List[Tuple[int, int]]]`` + A dictionary mapping cluster ids to lists of inclusive spans into the + sentence. + coref_stacks: ``DefaultDict[int, List[int]]`` + Stacks for each cluster id to hold the start indices of active spans (spans + which we are inside of when processing a given word). Spans with the same id + can be nested, which is why we collect these opening spans on a stack, e.g: + + [Greg, the baker who referred to [himself]_ID1 as 'the bread man']_ID1 + """ + if label != "-": + for segment in label.split("|"): + # The conll representation of coref spans allows spans to + # overlap. If spans end or begin at the same word, they are + # separated by a "|". + if segment[0] == "(": + # The span begins at this word. + if segment[-1] == ")": + # The span begins and ends at this word (single word span). + cluster_id = int(segment[1:-1]) + clusters[cluster_id].append((word_index, word_index)) + else: + # The span is starting, so we record the index of the word. + cluster_id = int(segment[1:]) + coref_stacks[cluster_id].append(word_index) + else: + # The span for this id is ending, but didn't start at this word. + # Retrieve the start index from the document state and + # add the span to the clusters for this id. + cluster_id = int(segment[:-1]) + start = coref_stacks[cluster_id].pop() + clusters[cluster_id].append((start, word_index)) \ No newline at end of file diff --git a/hmtl/dataset_readers/mention_ace.py b/hmtl/dataset_readers/mention_ace.py new file mode 100644 index 0000000..048f060 --- /dev/null +++ b/hmtl/dataset_readers/mention_ace.py @@ -0,0 +1,75 @@ +# coding: utf-8 + +import logging +from typing import Dict, List, Iterable, Iterator + +from overrides import overrides +import codecs + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError +from allennlp.common.file_utils import cached_path +from allennlp.data.dataset_readers.dataset_reader import DatasetReader +from allennlp.data.dataset_readers.dataset_utils import iob1_to_bioul +from allennlp.data.fields import Field, TextField, SequenceLabelField +from allennlp.data.instance import Instance +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer +from allennlp.data.tokenizers import Token +from allennlp.data.dataset_readers.dataset_utils import Ontonotes, OntonotesSentence + +from hmtl.dataset_readers.dataset_utils import ACE, ACESentence + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + + +@DatasetReader.register("mention_ace") +class MentionACEReader(DatasetReader): + ''' + A dataset reader to read the Entity Mention Tags from an ACE dataset + previously pre-procesed to fit the CoNll-NER format. + ''' + def __init__(self, + token_indexers: Dict[str, TokenIndexer] = None, + label_namespace: str = "ace_mention_labels", + lazy: bool = False) -> None: + super().__init__(lazy) + self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} + self._label_namespace = label_namespace + + + @staticmethod + def _sentence_iterate(ace_reader: ACE, + file_path: str) -> Iterable[ACESentence]: + for conll_file in ace_reader.dataset_path_iterator(file_path): + yield from ace_reader.sentence_iterator(conll_file) + + + @overrides + def _read(self, + file_path: str): + file_path = cached_path(file_path) # if `file_path` is a URL, redirect to the cache + ace_reader = ACE() + logger.info("Reading ACE Mention instances from dataset files at: %s", file_path) + + for sentence in self._sentence_iterate(ace_reader, file_path): + tokens = [Token(t) for t in sentence.words] + if not sentence.mention_tags: + tags = ["O" for _ in tokens] + else: + tags = sentence.mention_tags + + yield self.text_to_instance(tokens, tags) + + + def text_to_instance(self, + tokens: List[Token], + tags: List[str] = None) -> Instance: + # pylint: disable=arguments-differ + fields: Dict[str, Field] = {} + text_field = TextField(tokens, token_indexers=self._token_indexers) + fields['tokens'] = text_field + if tags: + fields['tags'] = SequenceLabelField(labels = tags, sequence_field = text_field, label_namespace = self._label_namespace) + return Instance(fields) + \ No newline at end of file diff --git a/hmtl/dataset_readers/ner_ontonotes.py b/hmtl/dataset_readers/ner_ontonotes.py new file mode 100644 index 0000000..f8d0c17 --- /dev/null +++ b/hmtl/dataset_readers/ner_ontonotes.py @@ -0,0 +1,107 @@ +# coding: utf-8 + +import logging +from typing import Dict, List, Iterable + +from overrides import overrides + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError +from allennlp.common.file_utils import cached_path +from allennlp.data.dataset_readers.dataset_reader import DatasetReader +from allennlp.data.dataset_readers.dataset_utils import iob1_to_bioul +from allennlp.data.fields import Field, TextField, SequenceLabelField +from allennlp.data.instance import Instance +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer +from allennlp.data.tokenizers import Token +from allennlp.data.dataset_readers.dataset_utils import Ontonotes, OntonotesSentence + + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@DatasetReader.register("ner_ontonotes") +class NerOntonotesReader(DatasetReader): + ''' + An ``allennlp.data.dataset_readers.dataset_reader.DatasetReader`` for reading + NER annotations in CoNll-formatted OntoNotes dataset. + + NB: This DatasetReader was implemented before the current implementation of + ``OntonotesNamedEntityRecognition`` in AllenNLP. It is thought doing pretty much the same thing. + + Parameters + ---------- + token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``) + We use this to define the input representation for the text. See :class:`TokenIndexer`. + Map a token to an id. + domain_identifier : ``str``, optional (default = None) + The subdomain to load. If None is specified, the whole dataset is loaded. + label_namespace : ``str``, optional (default = "ontonotes_ner_labels") + The tag/label namespace for the task/dataset considered. + lazy : ``bool``, optional (default = False) + Whether or not the dataset should be loaded in lazy way. + Refer to https://github.com/allenai/allennlp/blob/master/tutorials/getting_started/laziness.md + for more details about lazyness. + coding_scheme: ``str``, optional (default=``IOB1``) + Specifies the coding scheme for ``ner_labels`` and ``chunk_labels``. + Valid options are ``IOB1`` and ``BIOUL``. The ``IOB1`` default maintains + the original IOB1 scheme in the CoNLL data. + In the IOB1 scheme, I is a token inside a span, O is a token outside + a span and B is the beginning of span immediately following another + span of the same type. + ''' + def __init__(self, + token_indexers: Dict[str, TokenIndexer] = None, + domain_identifier: str = None, + label_namespace: str = "ontonotes_ner_labels", + lazy: bool = False, + coding_scheme: str = "IOB1") -> None: + super().__init__(lazy) + self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} + self._domain_identifier = domain_identifier + self._label_namespace = label_namespace + self._coding_scheme = coding_scheme + if coding_scheme not in ("IOB1", "BIOUL"): + raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme)) + + @overrides + def _read(self, + file_path: str): + file_path = cached_path(file_path) # if `file_path` is a URL, redirect to the cache + ontonotes_reader = Ontonotes() + logger.info("Reading NER instances from dataset files at: %s", file_path) + if self._domain_identifier is not None: + logger.info("Filtering to only include file paths containing the %s domain", self._domain_identifier) + + for sentence in self._ontonotes_subset(ontonotes_reader, file_path, self._domain_identifier): + tokens = [Token(t) for t in sentence.words] + if not sentence.named_entities: + tags = ["O" for _ in tokens] + else: + tags = sentence.named_entities + + if self._coding_scheme == "BIOUL": + tags = iob1_to_bioul(tags) + + yield self.text_to_instance(tokens, tags) + + + @staticmethod + def _ontonotes_subset(ontonotes_reader: Ontonotes, + file_path: str, + domain_identifier: str) -> Iterable[OntonotesSentence]: + for conll_file in ontonotes_reader.dataset_path_iterator(file_path): + yield from ontonotes_reader.sentence_iterator(conll_file) + + + def text_to_instance(self, + tokens: List[Token], + tags: List[str] = None) -> Instance: + # pylint: disable=arguments-differ + fields: Dict[str, Field] = {} + text_field = TextField(tokens, token_indexers=self._token_indexers) + fields['tokens'] = text_field + if tags: + fields['tags'] = SequenceLabelField(labels = tags, sequence_field = text_field, label_namespace = self._label_namespace) + return Instance(fields) + \ No newline at end of file diff --git a/hmtl/dataset_readers/relation_ace.py b/hmtl/dataset_readers/relation_ace.py new file mode 100644 index 0000000..55b96aa --- /dev/null +++ b/hmtl/dataset_readers/relation_ace.py @@ -0,0 +1,80 @@ +# coding: utf-8 + +import logging +from typing import Dict, List, Iterable, Iterator + +from overrides import overrides +import codecs + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError +from allennlp.common.file_utils import cached_path +from allennlp.data.dataset_readers.dataset_reader import DatasetReader +from allennlp.data.dataset_readers.dataset_utils import iob1_to_bioul +from allennlp.data.fields import Field, TextField, SequenceLabelField, ListField +from allennlp.data.instance import Instance +from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer +from allennlp.data.tokenizers import Token +from allennlp.data.dataset_readers.dataset_utils import Ontonotes, OntonotesSentence + +from hmtl.dataset_readers.dataset_utils import ACE, ACESentence +#from hmtl.fields import MultipleSequenceLabelField + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + + +@DatasetReader.register("relation_ace") +class RelationACEReader(DatasetReader): + """ + A dataset reader to read the relations links from an ACE dataset + previously pre-procesed to fit the CoNLL-SRL format. + """ + def __init__(self, + token_indexers: Dict[str, TokenIndexer] = None, + label_namespace: str = "relation_ace_labels", + lazy: bool = False) -> None: + super().__init__(lazy) + self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} + self._label_namespace = label_namespace + + + @staticmethod + def _sentence_iterate(ace_reader: ACE, + file_path: str) -> Iterable[ACESentence]: + for conll_file in ace_reader.dataset_path_iterator(file_path): + yield from ace_reader.sentence_iterator(conll_file) + + + @overrides + def _read(self, + file_path: str): + file_path = cached_path(file_path) # if `file_path` is a URL, redirect to the cache + ace_reader = ACE() + logger.info("Reading Relation labels from dataset files at: %s", file_path) + + for sentence in self._sentence_iterate(ace_reader, file_path): + tokens = [Token(t) for t in sentence.words] + + if sentence.relations == []: + relations = None + continue + else: + relations = sentence.last_head_token_relations + yield self.text_to_instance(tokens, relations) + + + def text_to_instance(self, + tokens: List[Token], + relations = None) -> Instance: + # pylint: disable=arguments-differ + fields: Dict[str, Field] = {} + text_field = TextField(tokens, token_indexers=self._token_indexers) + fields['text'] = text_field + if relations is not None: + field_list = [] + for relation in relations: + field_list.append(SequenceLabelField(labels = relation, sequence_field = text_field, label_namespace = self._label_namespace)) + fields["relations"] = ListField(field_list = field_list) + return Instance(fields) + \ No newline at end of file diff --git a/hmtl/models/__init__.py b/hmtl/models/__init__.py new file mode 100644 index 0000000..b13b730 --- /dev/null +++ b/hmtl/models/__init__.py @@ -0,0 +1,21 @@ +# coding: utf-8 + +from hmtl.models.coref_custom import CoreferenceCustom +from hmtl.models.relation_extraction import RelationExtractor + +#Single Module +from hmtl.models.layerNer import LayerNer +from hmtl.models.layerRelation import LayerRelation +from hmtl.models.layerCoref import LayerCoref + +#Two modules +from hmtl.models.layerNerEmd import LayerNerEmd +from hmtl.models.layerEmdRelation import LayerEmdRelation +from hmtl.models.layerEmdCoref import LayerEmdCoref + +#Three modules +from hmtl.models.layerNerEmdCoref import LayerNerEmdCoref +from hmtl.models.layerNerEmdRelation import LayerNerEmdRelation + +#Four modules +from hmtl.models.hmtl import HMTL \ No newline at end of file diff --git a/hmtl/models/coref_custom.py b/hmtl/models/coref_custom.py new file mode 100644 index 0000000..45e07e3 --- /dev/null +++ b/hmtl/models/coref_custom.py @@ -0,0 +1,204 @@ +import logging +import math +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn.functional as F +from overrides import overrides + +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules.token_embedders import Embedding +from allennlp.modules import FeedForward +from allennlp.modules import Seq2SeqEncoder, TimeDistributed, TextFieldEmbedder, SpanPruner +from allennlp.modules.span_extractors import SelfAttentiveSpanExtractor, EndpointSpanExtractor +from allennlp.nn import util, InitializerApplicator, RegularizerApplicator +from allennlp.training.metrics import MentionRecall, ConllCorefScores +from allennlp.models.coreference_resolution import CoreferenceResolver + +from hmtl.training.metrics import ConllCorefFullScores + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +class CoreferenceCustom(CoreferenceResolver): + """ + This class implements a marginally modified version of ``allennlp.models.coreference_resolution.CoreferenceResolver`` + which is an implementation of the model of Lee et al., 2017. + The two modifications are: + 1/ Replacing the scorer to be able to get the 3 detailled coreference metrics (B3, MUC, CEAFE), + and not only their average. + 2/ Give the possibility to evaluate with the gold mentions: the model first predict mentions that MIGHT + be part of a coreference cluster, and in second time predict the coreference clusters for theses mentions. + We leave the possibility of replacing predicting the possible mentions + with the gold mentions in evaluation. + """ + def __init__(self, + vocab: Vocabulary, + text_field_embedder: TextFieldEmbedder, + context_layer: Seq2SeqEncoder, + mention_feedforward: FeedForward, + antecedent_feedforward: FeedForward, + feature_size: int, + max_span_width: int, + spans_per_word: float, + max_antecedents: int, + lexical_dropout: float = 0.2, + initializer: InitializerApplicator = InitializerApplicator(), + regularizer: Optional[RegularizerApplicator] = None, + eval_on_gold_mentions: bool = False) -> None: + super(CoreferenceCustom, self).__init__(vocab = vocab, + text_field_embedder = text_field_embedder, + context_layer = context_layer, + mention_feedforward = mention_feedforward, + antecedent_feedforward = antecedent_feedforward, + feature_size = feature_size, + max_span_width = max_span_width, + spans_per_word = spans_per_word, + max_antecedents = max_antecedents, + lexical_dropout = lexical_dropout, + initializer = initializer, + regularizer = regularizer) + + self._conll_coref_scores = ConllCorefFullScores() + self._eval_on_gold_mentions = eval_on_gold_mentions + + if self._eval_on_gold_mentions: + self._use_gold_mentions = False + else: + self._use_gold_mentions = None + + + @overrides + def get_metrics(self, + reset: bool = False, + full:bool = False): + mention_recall = self._mention_recall.get_metric(reset = reset) + metrics = self._conll_coref_scores.get_metric(reset = reset, full = full) + metrics["mention_recall"] = mention_recall + + return metrics + + @overrides + def forward(self, # type: ignore + text: Dict[str, torch.LongTensor], + spans: torch.IntTensor, + span_labels: torch.IntTensor = None, + metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + # Shape: (batch_size, document_length, embedding_size) + text_embeddings = self._lexical_dropout(self._text_field_embedder(text)) + + document_length = text_embeddings.size(1) + + # Shape: (batch_size, document_length) + text_mask = util.get_text_field_mask(text).float() + + # Shape: (batch_size, num_spans) + if self._use_gold_mentions: + if text_embeddings.is_cuda: device = torch.device('cuda') + else: device = torch.device('cpu') + + s = [torch.as_tensor(pair, dtype = torch.long, device = device) for cluster in metadata[0]["clusters"] for pair in cluster] + gm = torch.stack(s, dim = 0).unsqueeze(0).unsqueeze(1) + + span_mask = (spans.unsqueeze(2) - gm) + span_mask = (span_mask[:,:,:,0]==0) + (span_mask[:,:,:,1]==0) + span_mask, _ = (span_mask == 2).max(-1) + num_spans = span_mask.sum().item() + span_mask = span_mask.float() + else: + span_mask = (spans[:, :, 0] >= 0).squeeze(-1).float() + num_spans = spans.size(1) + # Shape: (batch_size, num_spans, 2) + spans = F.relu(spans.float()).long() + + # Shape: (batch_size, document_length, encoding_dim) + contextualized_embeddings = self._context_layer(text_embeddings, text_mask) + # Shape: (batch_size, num_spans, 2 * encoding_dim + feature_size) + endpoint_span_embeddings = self._endpoint_span_extractor(contextualized_embeddings, spans) + # Shape: (batch_size, num_spans, emebedding_size) + attended_span_embeddings = self._attentive_span_extractor(text_embeddings, spans) + + # Shape: (batch_size, num_spans, emebedding_size + 2 * encoding_dim + feature_size) + span_embeddings = torch.cat([endpoint_span_embeddings, attended_span_embeddings], -1) + + # Prune based on mention scores. + num_spans_to_keep = int(math.floor(self._spans_per_word * document_length)) + + (top_span_embeddings, top_span_mask, + top_span_indices, top_span_mention_scores) = self._mention_pruner(span_embeddings, + span_mask, + num_spans_to_keep) + top_span_mask = top_span_mask.unsqueeze(-1) + # Shape: (batch_size * num_spans_to_keep) + flat_top_span_indices = util.flatten_and_batch_shift_indices(top_span_indices, num_spans) + + # Compute final predictions for which spans to consider as mentions. + # Shape: (batch_size, num_spans_to_keep, 2) + top_spans = util.batched_index_select(spans, + top_span_indices, + flat_top_span_indices) + + # Compute indices for antecedent spans to consider. + max_antecedents = min(self._max_antecedents, num_spans_to_keep) + + # Shapes: + # (num_spans_to_keep, max_antecedents), + # (1, max_antecedents), + # (1, num_spans_to_keep, max_antecedents) + valid_antecedent_indices, valid_antecedent_offsets, valid_antecedent_log_mask = \ + self._generate_valid_antecedents(num_spans_to_keep, max_antecedents, util.get_device_of(text_mask)) + # Select tensors relating to the antecedent spans. + # Shape: (batch_size, num_spans_to_keep, max_antecedents, embedding_size) + candidate_antecedent_embeddings = util.flattened_index_select(top_span_embeddings, + valid_antecedent_indices) + + # Shape: (batch_size, num_spans_to_keep, max_antecedents) + candidate_antecedent_mention_scores = util.flattened_index_select(top_span_mention_scores, + valid_antecedent_indices).squeeze(-1) + # Compute antecedent scores. + # Shape: (batch_size, num_spans_to_keep, max_antecedents, embedding_size) + span_pair_embeddings = self._compute_span_pair_embeddings(top_span_embeddings, + candidate_antecedent_embeddings, + valid_antecedent_offsets) + # Shape: (batch_size, num_spans_to_keep, 1 + max_antecedents) + coreference_scores = self._compute_coreference_scores(span_pair_embeddings, + top_span_mention_scores, + candidate_antecedent_mention_scores, + valid_antecedent_log_mask) + + # Shape: (batch_size, num_spans_to_keep) + _, predicted_antecedents = coreference_scores.max(2) + predicted_antecedents -= 1 + + output_dict = {"top_spans": top_spans, + "antecedent_indices": valid_antecedent_indices, + "predicted_antecedents": predicted_antecedents} + if span_labels is not None: + # Find the gold labels for the spans which we kept. + pruned_gold_labels = util.batched_index_select(span_labels.unsqueeze(-1), + top_span_indices, + flat_top_span_indices) + + antecedent_labels = util.flattened_index_select(pruned_gold_labels, + valid_antecedent_indices).squeeze(-1) + antecedent_labels += valid_antecedent_log_mask.long() + + # Compute labels. + # Shape: (batch_size, num_spans_to_keep, max_antecedents + 1) + gold_antecedent_labels = self._compute_antecedent_gold_labels(pruned_gold_labels, + antecedent_labels) + coreference_log_probs = util.last_dim_log_softmax(coreference_scores, top_span_mask) + correct_antecedent_log_probs = coreference_log_probs + gold_antecedent_labels.log() + negative_marginal_log_likelihood = -util.logsumexp(correct_antecedent_log_probs).sum() + + self._mention_recall(top_spans, metadata) + self._conll_coref_scores(top_spans, valid_antecedent_indices, predicted_antecedents, metadata) + + output_dict["loss"] = negative_marginal_log_likelihood + + if metadata is not None: + output_dict["document"] = [x["original_text"] for x in metadata] + return output_dict \ No newline at end of file diff --git a/hmtl/models/hmtl.py b/hmtl/models/hmtl.py new file mode 100644 index 0000000..493ae2c --- /dev/null +++ b/hmtl/models/hmtl.py @@ -0,0 +1,207 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator, InitializerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.modules import FeedForward +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models.relation_extraction import RelationExtractor +from hmtl.models import CoreferenceCustom + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("hmtl") +class HMTL(Model): + """ + A class that implement the full HMTL model. + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(HMTL, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + + ############ + # NER Stuffs + ############ + ner_params = params.pop("ner") + + # Encoder + encoder_ner_params = ner_params.pop("encoder") + encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) + self._encoder_ner = encoder_ner + + # Tagger NER - CRF Tagger + tagger_ner_params = ner_params.pop("tagger") + tagger_ner = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_ner, + label_namespace = tagger_ner_params.pop("label_namespace", "labels"), + constraint_type = tagger_ner_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_ner = tagger_ner + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner]) + self._shortcut_text_field_embedder = shortcut_text_field_embedder + + + # Tagger: EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + + ############################ + # Relation Extraction Stuffs + ############################ + relation_params = params.pop("relation") + + # Encoder + encoder_relation_params = relation_params.pop("encoder") + encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) + self._encoder_relation = encoder_relation + + shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner, self._encoder_emd]) + self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation + + # Tagger: Relation + tagger_relation_params = relation_params.pop("tagger") + tagger_relation = RelationExtractor(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_relation, + context_layer = self._encoder_relation, + d = tagger_relation_params.pop_int("d"), + l = tagger_relation_params.pop_int("l"), + n_classes = tagger_relation_params.pop("n_classes"), + activation = tagger_relation_params.pop("activation")) + self._tagger_relation = tagger_relation + + + ############## + # Coref Stuffs + ############## + coref_params = params.pop("coref") + + # Encoder + encoder_coref_params = coref_params.pop("encoder") + encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) + self._encoder_coref = encoder_coref + + shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner, self._encoder_emd]) + self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref + + # Tagger: Coreference + tagger_coref_params = coref_params.pop("tagger") + eval_on_gold_mentions = tagger_coref_params.pop_bool("eval_on_gold_mentions", False) + init_params = tagger_coref_params.pop("initializer", None) + initializer = (InitializerApplicator.from_params(init_params) + if init_params is not None + else InitializerApplicator()) + + tagger_coref = CoreferenceCustom(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_coref, + context_layer = self._encoder_coref, + mention_feedforward = FeedForward.from_params(tagger_coref_params.pop("mention_feedforward")), + antecedent_feedforward = FeedForward.from_params(tagger_coref_params.pop("antecedent_feedforward")), + feature_size = tagger_coref_params.pop_int("feature_size"), + max_span_width = tagger_coref_params.pop_int("max_span_width"), + spans_per_word = tagger_coref_params.pop_float("spans_per_word"), + max_antecedents = tagger_coref_params.pop_int("max_antecedents"), + lexical_dropout = tagger_coref_params.pop_float("lexical_dropout", 0.2), + initializer = initializer, + regularizer = regularizer, + eval_on_gold_mentions = eval_on_gold_mentions) + self._tagger_coref = tagger_coref + if eval_on_gold_mentions: + self._tagger_coref._eval_on_gold_mentions = True + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + + if task_name == "coref" and tagger._eval_on_gold_mentions: + if for_training: tagger._use_gold_mentions = False + else: tagger._use_gold_mentions = True + + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + if full and task_name == "coref": + return task_tagger.get_metrics(reset = reset, full = full) + else: + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "HMTL": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) + \ No newline at end of file diff --git a/hmtl/models/layerCoref.py b/hmtl/models/layerCoref.py new file mode 100644 index 0000000..b0f7f24 --- /dev/null +++ b/hmtl/models/layerCoref.py @@ -0,0 +1,126 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator, InitializerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.modules import FeedForward +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models.relation_extraction import RelationExtractor +from hmtl.models import CoreferenceCustom + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("coref_custom") +class LayerCoref(Model): + """ + A class that implement the one task of HMTL model: Coref (Lee et al). + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerCoref, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + ############## + # Coref Stuffs + ############## + coref_params = params.pop("coref") + + # Encoder + encoder_coref_params = coref_params.pop("encoder") + encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) + self._encoder_coref = encoder_coref + + # Tagger: Coreference + tagger_coref_params = coref_params.pop("tagger") + eval_on_gold_mentions = tagger_coref_params.pop_bool("eval_on_gold_mentions", False) + init_params = tagger_coref_params.pop("initializer", None) + initializer = (InitializerApplicator.from_params(init_params) + if init_params is not None + else InitializerApplicator()) + + tagger_coref = CoreferenceCustom(vocab = vocab, + text_field_embedder = self._text_field_embedder, + context_layer = self._encoder_coref, + mention_feedforward = FeedForward.from_params(tagger_coref_params.pop("mention_feedforward")), + antecedent_feedforward = FeedForward.from_params(tagger_coref_params.pop("antecedent_feedforward")), + feature_size = tagger_coref_params.pop_int("feature_size"), + max_span_width = tagger_coref_params.pop_int("max_span_width"), + spans_per_word = tagger_coref_params.pop_float("spans_per_word"), + max_antecedents = tagger_coref_params.pop_int("max_antecedents"), + lexical_dropout = tagger_coref_params.pop_float("lexical_dropout", 0.2), + initializer = initializer, + regularizer = regularizer, + eval_on_gold_mentions = eval_on_gold_mentions) + self._tagger_coref = tagger_coref + if eval_on_gold_mentions: + self._tagger_coref._eval_on_gold_mentions = True + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "coref") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + + if task_name == "coref" and tagger._eval_on_gold_mentions: + if for_training: tagger._use_gold_mentions = False + else: tagger._use_gold_mentions = True + + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + if full and task_name == "coref": + return task_tagger.get_metrics(reset = reset, full = full) + else: + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerCoref": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) \ No newline at end of file diff --git a/hmtl/models/layerEmdCoref.py b/hmtl/models/layerEmdCoref.py new file mode 100644 index 0000000..657581d --- /dev/null +++ b/hmtl/models/layerEmdCoref.py @@ -0,0 +1,155 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator, InitializerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.modules import FeedForward +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models import CoreferenceCustom + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("emd_coref") +class LayerEmdCoref(Model): + """ + A class that implement two tasks of HMTL model: EMD (CRF Tagger) and Coref (Lee et al., 2017). + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerEmdCoref, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + # Tagger EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_emd_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + + ############## + # Coref Stuffs + ############## + coref_params = params.pop("coref") + + # Encoder + encoder_coref_params = coref_params.pop("encoder") + encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) + self._encoder_coref = encoder_coref + + shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_emd]) + self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref + + # Tagger: Coreference + tagger_coref_params = coref_params.pop("tagger") + eval_on_gold_mentions = tagger_coref_params.pop_bool("eval_on_gold_mentions", False) + init_params = tagger_coref_params.pop("initializer", None) + initializer = (InitializerApplicator.from_params(init_params) + if init_params is not None + else InitializerApplicator()) + + tagger_coref = CoreferenceCustom(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_coref, + context_layer = self._encoder_coref, + mention_feedforward = FeedForward.from_params(tagger_coref_params.pop("mention_feedforward")), + antecedent_feedforward = FeedForward.from_params(tagger_coref_params.pop("antecedent_feedforward")), + feature_size = tagger_coref_params.pop_int("feature_size"), + max_span_width = tagger_coref_params.pop_int("max_span_width"), + spans_per_word = tagger_coref_params.pop_float("spans_per_word"), + max_antecedents = tagger_coref_params.pop_int("max_antecedents"), + lexical_dropout = tagger_coref_params.pop_float("lexical_dropout", 0.2), + initializer = initializer, + regularizer = regularizer, + eval_on_gold_mentions = eval_on_gold_mentions) + self._tagger_coref = tagger_coref + if eval_on_gold_mentions: + self._tagger_coref._eval_on_gold_mentions = True + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "emd") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + """ + Special case for forward: for coreference, we can use gold mentions to predict the clusters + during evaluation (not during training). + """ + + tagger = getattr(self, "_tagger_%s" % task_name) + + if task_name == "coref" and tagger._eval_on_gold_mentions: + if for_training: tagger._use_gold_mentions = False + else: tagger._use_gold_mentions = True + + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str = "emd", + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + if full and task_name == "coref": + return task_tagger.get_metrics(reset = reset, full = full) + else: + return task_tagger.get_metrics(reset = reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerEmdCoref": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) \ No newline at end of file diff --git a/hmtl/models/layerEmdRelation.py b/hmtl/models/layerEmdRelation.py new file mode 100644 index 0000000..e39210d --- /dev/null +++ b/hmtl/models/layerEmdRelation.py @@ -0,0 +1,129 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models.relation_extraction import RelationExtractor + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("emd_relation") +class LayerEmdRelation(Model): + """ + A class that implement three tasks of HMTL model: EMD (CRF Tagger) and Relation Extraction. + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerEmdRelation, self).__init__(vocab = vocab, regularizer = regularizer) + + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + # Tagger EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_emd_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + + ############################ + # Relation Extraction Stuffs + ############################ + relation_params = params.pop("relation") + + # Encoder + encoder_relation_params = relation_params.pop("encoder") + encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) + self._encoder_relation = encoder_relation + + shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_emd]) + self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation + + # Tagger: Relation + tagger_relation_params = relation_params.pop("tagger") + tagger_relation = RelationExtractor(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_relation, + context_layer = self._encoder_relation, + d = tagger_relation_params.pop_int("d"), + l = tagger_relation_params.pop_int("l"), + n_classes = tagger_relation_params.pop("n_classes"), + activation = tagger_relation_params.pop("activation")) + self._tagger_relation = tagger_relation + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerEmdRelation": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) \ No newline at end of file diff --git a/hmtl/models/layerNer.py b/hmtl/models/layerNer.py new file mode 100644 index 0000000..945438a --- /dev/null +++ b/hmtl/models/layerNer.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.models.crf_tagger import CrfTagger + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("ner") +class LayerNer(Model): + """ + A class that implement the first task of HMTL model: NER (CRF Tagger). + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerNer, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base Text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + ############ + # NER Stuffs + ############ + ner_params = params.pop("ner") + + # Encoder + encoder_ner_params = ner_params.pop("encoder") + encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) + self._encoder_ner = encoder_ner + + # Tagger NER - CRF Tagger + tagger_ner_params = ner_params.pop("tagger") + tagger_ner = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_ner, + label_namespace = tagger_ner_params.pop("label_namespace", "labels"), + constraint_type = tagger_ner_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_ner = tagger_ner + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str = "ner", + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + return task_tagger.get_metrics(reset = reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerNer": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) + \ No newline at end of file diff --git a/hmtl/models/layerNerEmd.py b/hmtl/models/layerNerEmd.py new file mode 100644 index 0000000..f97c532 --- /dev/null +++ b/hmtl/models/layerNerEmd.py @@ -0,0 +1,127 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("ner_emd") +class LayerNerEmd(Model): + """ + A class that implement two tasks of HMTL model: NER (CRF Tagger) and EMD (CRF Tagger). + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerNerEmd, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + ############ + # NER Stuffs + ############ + ner_params = params.pop("ner") + + # Encoder + encoder_ner_params = ner_params.pop("encoder") + encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) + self._encoder_ner = encoder_ner + + # Tagger NER - CRF Tagger + tagger_ner_params = ner_params.pop("tagger") + tagger_ner = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_ner, + label_namespace = tagger_ner_params.pop("label_namespace", "labels"), + constraint_type = tagger_ner_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_ner = tagger_ner + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner]) + self._shortcut_text_field_embedder = shortcut_text_field_embedder + + + # Tagger: EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerNerEmd": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) diff --git a/hmtl/models/layerNerEmdCoref.py b/hmtl/models/layerNerEmdCoref.py new file mode 100644 index 0000000..9a8d146 --- /dev/null +++ b/hmtl/models/layerNerEmdCoref.py @@ -0,0 +1,183 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator, InitializerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.modules import FeedForward +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models import CoreferenceCustom + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("ner_emd_coref") +class LayerNerEmdCoref(Model): + """ + A class that implement three tasks of HMTL model: NER (CRF Tagger), EMD (CRF Tagger) and Coreference Resolution. + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerNerEmdCoref, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + + ############ + # NER Stuffs + ############ + ner_params = params.pop("ner") + + # Encoder + encoder_ner_params = ner_params.pop("encoder") + encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) + self._encoder_ner = encoder_ner + + # Tagger NER - CRF Tagger + tagger_ner_params = ner_params.pop("tagger") + tagger_ner = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_ner, + label_namespace = tagger_ner_params.pop("label_namespace", "labels"), + constraint_type = tagger_ner_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_ner = tagger_ner + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner]) + self._shortcut_text_field_embedder = shortcut_text_field_embedder + + + # Tagger: EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + + ############## + # Coref Stuffs + ############## + coref_params = params.pop("coref") + + # Encoder + encoder_coref_params = coref_params.pop("encoder") + encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) + self._encoder_coref = encoder_coref + + shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner, self._encoder_emd]) + self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref + + # Tagger: Coreference + tagger_coref_params = coref_params.pop("tagger") + eval_on_gold_mentions = tagger_coref_params.pop_bool("eval_on_gold_mentions", False) + init_params = tagger_coref_params.pop("initializer", None) + initializer = (InitializerApplicator.from_params(init_params) + if init_params is not None + else InitializerApplicator()) + + tagger_coref = CoreferenceCustom(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_coref, + context_layer = self._encoder_coref, + mention_feedforward = FeedForward.from_params(tagger_coref_params.pop("mention_feedforward")), + antecedent_feedforward = FeedForward.from_params(tagger_coref_params.pop("antecedent_feedforward")), + feature_size = tagger_coref_params.pop_int("feature_size"), + max_span_width = tagger_coref_params.pop_int("max_span_width"), + spans_per_word = tagger_coref_params.pop_float("spans_per_word"), + max_antecedents = tagger_coref_params.pop_int("max_antecedents"), + lexical_dropout = tagger_coref_params.pop_float("lexical_dropout", 0.2), + initializer = initializer, + regularizer = regularizer, + eval_on_gold_mentions = eval_on_gold_mentions) + self._tagger_coref = tagger_coref + if eval_on_gold_mentions: + self._tagger_coref._eval_on_gold_mentions = True + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + """ + Special case for forward: for coreference, we can use gold mentions to predict the clusters + during evaluation (not during training). + """ + + tagger = getattr(self, "_tagger_%s" % task_name) + + if task_name == "coref" and tagger._eval_on_gold_mentions: + if for_training: tagger._use_gold_mentions = False + else: tagger._use_gold_mentions = True + + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + if full and task_name == "coref": + return task_tagger.get_metrics(reset = reset, full = full) + else: + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerNerEmdCoref": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) + \ No newline at end of file diff --git a/hmtl/models/layerNerEmdRelation.py b/hmtl/models/layerNerEmdRelation.py new file mode 100644 index 0000000..ed96c27 --- /dev/null +++ b/hmtl/models/layerNerEmdRelation.py @@ -0,0 +1,155 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder +from allennlp.models.crf_tagger import CrfTagger + +from hmtl.modules.text_field_embedders import ShortcutConnectTextFieldEmbedder +from hmtl.models.relation_extraction import RelationExtractor + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("ner_emd_relation") +class LayerNerEmdRelation(Model): + """ + A class that implement three tasks of HMTL model: NER (CRF Tagger), EMD (CRF Tagger) and Relation Extraction. + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerNerEmdRelation, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + + ############ + # NER Stuffs + ############ + ner_params = params.pop("ner") + + # Encoder + encoder_ner_params = ner_params.pop("encoder") + encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) + self._encoder_ner = encoder_ner + + # Tagger NER - CRF Tagger + tagger_ner_params = ner_params.pop("tagger") + tagger_ner = CrfTagger(vocab = vocab, + text_field_embedder = self._text_field_embedder, + encoder = self._encoder_ner, + label_namespace = tagger_ner_params.pop("label_namespace", "labels"), + constraint_type = tagger_ner_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_ner = tagger_ner + + + ############ + # EMD Stuffs + ############ + emd_params = params.pop("emd") + + # Encoder + encoder_emd_params = emd_params.pop("encoder") + encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) + self._encoder_emd = encoder_emd + + shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner]) + self._shortcut_text_field_embedder = shortcut_text_field_embedder + + + # Tagger: EMD - CRF Tagger + tagger_emd_params = emd_params.pop("tagger") + tagger_emd = CrfTagger(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder, + encoder = self._encoder_emd, + label_namespace = tagger_emd_params.pop("label_namespace", "labels"), + constraint_type = tagger_emd_params.pop("constraint_type", None), + dropout = tagger_ner_params.pop("dropout", None), + regularizer = regularizer) + self._tagger_emd = tagger_emd + + + ############################ + # Relation Extraction Stuffs + ############################ + relation_params = params.pop("relation") + + # Encoder + encoder_relation_params = relation_params.pop("encoder") + encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) + self._encoder_relation = encoder_relation + + shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder(base_text_field_embedder = self._text_field_embedder, + previous_encoders = [self._encoder_ner, self._encoder_emd]) + self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation + + # Tagger: Relation + tagger_relation_params = relation_params.pop("tagger") + tagger_relation = RelationExtractor(vocab = vocab, + text_field_embedder = self._shortcut_text_field_embedder_relation, + context_layer = self._encoder_relation, + d = tagger_relation_params.pop_int("d"), + l = tagger_relation_params.pop_int("l"), + n_classes = tagger_relation_params.pop("n_classes"), + activation = tagger_relation_params.pop("activation")) + self._tagger_relation = tagger_relation + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "ner") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str, + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "LayerNerEmdRelation": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) \ No newline at end of file diff --git a/hmtl/models/layerRelation.py b/hmtl/models/layerRelation.py new file mode 100644 index 0000000..45b7892 --- /dev/null +++ b/hmtl/models/layerRelation.py @@ -0,0 +1,100 @@ +# coding: utf-8 + +import os +import sys +import logging +from typing import Dict +from overrides import overrides + +import torch + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.nn import RegularizerApplicator +from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder + +from hmtl.models.relation_extraction import RelationExtractor + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@Model.register("relation") +class LayerRelation(Model): + """ + A class that implement one task of HMTL model: Relation Extraction. + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + params: ``allennlp.common.Params``, required + Configuration parameters for the multi-task model. + regularizer: ``allennlp.nn.RegularizerApplicator``, optional (default = None) + A reguralizer to apply to the model's layers. + """ + def __init__(self, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator = None): + + super(LayerRelation, self).__init__(vocab = vocab, regularizer = regularizer) + + # Base text Field Embedder + text_field_embedder_params = params.pop("text_field_embedder") + text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, + params=text_field_embedder_params) + self._text_field_embedder = text_field_embedder + + ############################ + # Relation Extraction Stuffs + ############################ + relation_params = params.pop("relation") + + # Encoder + encoder_relation_params = relation_params.pop("encoder") + encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) + self._encoder_relation = encoder_relation + + # Tagger: Relation + tagger_relation_params = relation_params.pop("tagger") + tagger_relation = RelationExtractor(vocab = vocab, + text_field_embedder = self._text_field_embedder, + context_layer = self._encoder_relation, + d = tagger_relation_params.pop_int("d"), + l = tagger_relation_params.pop_int("l"), + n_classes = tagger_relation_params.pop("n_classes"), + activation = tagger_relation_params.pop("activation")) + self._tagger_relation = tagger_relation + + logger.info("Multi-Task Learning Model has been instantiated.") + + @overrides + def forward(self, + tensor_batch, + for_training: bool = False, + task_name: str = "relation") -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + + tagger = getattr(self, "_tagger_%s" % task_name) + return tagger.forward(**tensor_batch) + + @overrides + def get_metrics(self, + task_name: str = "relation", + reset: bool = False, + full: bool = False) -> Dict[str, float]: + + task_tagger = getattr(self, "_tagger_" + task_name) + return task_tagger.get_metrics(reset) + + @classmethod + def from_params(cls, + vocab: Vocabulary, + params: Params, + regularizer: RegularizerApplicator) -> "layerRelation": + return cls(vocab = vocab, + params = params, + regularizer = regularizer) + \ No newline at end of file diff --git a/hmtl/models/relation_extraction.py b/hmtl/models/relation_extraction.py new file mode 100644 index 0000000..33533b1 --- /dev/null +++ b/hmtl/models/relation_extraction.py @@ -0,0 +1,274 @@ +# coding: utf-8 + +import logging +import math +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable #from torch.nn.parameter import Parameter, Variable + +from overrides import overrides + +from allennlp.common import Params +from allennlp.data import Vocabulary +from allennlp.models.model import Model +from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder +from allennlp.modules.span_extractors import SelfAttentiveSpanExtractor, EndpointSpanExtractor +from allennlp.nn import util + +from hmtl.training.metrics import RelationF1Measure + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +# Mapping specific to the dataset used in our setting (ACE2005) +# Please adapt it if necessary +rel_type_2_idx = {"ORG-AFF": 0, + "PHYS": 1, + "ART": 2, + "PER-SOC": 3, + "PART-WHOLE": 4, + "GEN-AFF": 5} +idx_2_rel_type = {value: key for key, value in rel_type_2_idx.items()} + + +@Model.register("relation_extractor") +class RelationExtractor(Model): + """ + A class containing the scoring model for relation extraction. + It is derived the model proposed by Bekoulis G. in + "Joint entity recognition and relation extraction as a multi-head selection problem" + https://bekou.github.io/papers/eswa2018b/bekoulis_eswa_2018b.pdf + + Parameters + ---------- + vocab: ``allennlp.data.Vocabulary``, required. + The vocabulary fitted on the data. + text_field_embedder : ``TextFieldEmbedder``, required + Used to embed the ``text`` ``TextField`` we get as input to the model. + context_layer : ``Seq2SeqEncoder``, required + This layer incorporates contextual information for each word in the document. + d: ``int``, required + The (half) dimension of embedding given by the encoder context_layer. + l: ``int``, required + The dimension of the relation extractor scorer embedding. + n_classes: ``int``, required + The number of different possible relation classes. + activation: ``str``, optional (default = "relu") + Non-linear activation function for the scorer. Can be either "relu" or "tanh". + label_namespace: ``str``, optional (default = "relation_ace_labels") + The namespace for the labels of the task of relation extraction. + """ + def __init__(self, + vocab: Vocabulary, + text_field_embedder: TextFieldEmbedder, + context_layer: Seq2SeqEncoder, + d: int, + l: int, + n_classes: int, + activation: str = "relu", + label_namespace: str = "relation_ace_labels") -> None: + super(RelationExtractor, self).__init__(vocab) + + + self._U = nn.Parameter(torch.Tensor(2*d, l)) + self._W = nn.Parameter(torch.Tensor(2*d, l)) + self._V = nn.Parameter(torch.Tensor(l, n_classes)) + self._b = nn.Parameter(torch.Tensor(l)) + + self.init_weights() + + self._n_classes = n_classes + self._activation = activation + + self._text_field_embedder = text_field_embedder + self._context_layer = context_layer + + self._label_namespace = label_namespace + + self._relation_metric = RelationF1Measure() + + self._loss_fn = nn.BCEWithLogitsLoss() + + + def init_weights(self) -> None: + """ + Initialization for the weights of the model. + """ + nn.init.kaiming_normal_(self._U) + nn.init.kaiming_normal_(self._W) + nn.init.kaiming_normal_(self._V) + + nn.init.normal_(self._b) + + + def multi_class_cross_entropy_loss(self, + scores, + labels, + mask): + """ + Compute the loss from + """ + #Compute the mask before computing the loss + #Transform the mask that is at the sentence level (#Size: n_batches x padded_document_length) + #to a suitable format for the relation labels level + padded_document_length = mask.size(1) + mask = mask.float() #Size: n_batches x padded_document_length + squared_mask = torch.stack([e.view(padded_document_length, 1)*e for e in mask], dim = 0) + squared_mask = squared_mask.unsqueeze(-1).repeat(1,1,1,self._n_classes) #Size: n_batches x padded_document_length x padded_document_length x n_classes + + + #The scores (and gold labels) are flattened before using + #the binary cross entropy loss. + # We thus transform + flat_size = scores.size() + scores = scores*squared_mask #Size: n_batches x padded_document_length x padded_document_length x n_classes + scores_flat = scores.view(flat_size[0], flat_size[1], flat_size[2]*self._n_classes) #Size: n_batches x padded_document_length x (padded_document_length x n_classes) + labels = labels*squared_mask #Size: n_batches x padded_document_length x padded_document_length x n_classes + labels_flat = labels.view(flat_size[0], flat_size[1], flat_size[2]*self._n_classes) #Size: n_batches x padded_document_length x (padded_document_length x n_classes) + + loss = self._loss_fn(scores_flat, labels_flat) + + #Amplify the loss to actually see something... + return 100*loss + + + @overrides + def forward(self, + text: Dict[str, torch.LongTensor], + relations: torch.IntTensor = None) -> Dict[str, torch.Tensor]: + # pylint: disable=arguments-differ + """ + Forward pass of the model. + Compute the predictions and the loss (if labels are available). + + Parameters: + ---------- + text: Dict[str, torch.LongTensor] + The input sentences which have transformed into indexes (integers) according to a mapping token:str -> token:int + relations: torch.IntTensor + The gold relations to predict. + """ + + #Text field embedder map the token:int to their word embedding representation token:embedding (whatever these embeddings are). + text_embeddings = self._text_field_embedder(text) + #Compute the mask from the text: 1 if there is actually a word in the corresponding sentence, 0 if it has been padded. + mask = util.get_text_field_mask(text) #Size: batch_size x padded_document_length + + + #Compute the contextualized representation from the word embeddings. + #Usually, _context_layer is a Seq2seq model such as LSTM + encoded_text = self._context_layer(text_embeddings, mask) #Size: batch_size x padded_document_length x lstm_output_size + + + ###### Relation Scorer ############## + #Compute the relation scores + left = torch.matmul(encoded_text, self._U) #Size: batch_size x padded_document_length x l + right = torch.matmul(encoded_text, self._W) #Size: batch_size x padded_document_length x l + + left = left.permute(1,0,2) + left = left.unsqueeze(3) + right = right.permute(0,2,1) + right = right.unsqueeze(0) + + B = left + right + B = B.permute(1,0,3,2) #Size: batch_size x padded_document_length x padded_document_length x l + + outer_sum_bias = B + self._b #Size: batch_size x padded_document_length x padded_document_length x l + if self._activation == "relu": + activated_outer_sum_bias = F.relu(outer_sum_bias) + elif self._activation == "tanh": + activated_outer_sum_bias = F.tanh(outer_sum_bias) + + relation_scores = torch.matmul(activated_outer_sum_bias, self._V) #Size: batch_size x padded_document_length x padded_document_length x n_classes + ################################################################# + + + batch_size, padded_document_length = mask.size() + + relation_sigmoid_scores = torch.sigmoid(relation_scores) # F.sigmoid(relation_scores) #Size: batch_size x padded_document_length x padded_document_length x n_classes + + #predicted_relations[l, i, j, k] == 1 iif we predict a relation k with ARG1==i, ARG2==j in the l-th sentence of the batch + predicted_relations = torch.round(relation_sigmoid_scores) #Size: batch_size x padded_document_length x padded_document_length x n_classes + + output_dict = { + "relation_sigmoid_scores": relation_sigmoid_scores, + "predicted_relations": predicted_relations, + "mask": mask + } + + + if relations is not None: + #Reformat the gold relations before computing the loss + #Size: batch_size x padded_document_length x padded_document_length x n_classes + #gold_relations[l, i, j, k] == 1 iif we predict a relation k with ARG1==i, ARG2==j in the l-th sentence of the batch + gold_relations = torch.zeros(batch_size, padded_document_length, padded_document_length, self._n_classes) + + + for exple_idx, exple_tags in enumerate(relations): #going through the batch + #rel is a list of list containing the current sentence in the batch + #each sublist in rel is of size padded_document_length + #and encodes a relation in the sentence where the two non zeros elements + #indicate the two words arguments AND the relation type between these two words. + for rel in exple_tags: + #relations have been padded, so for each sentence in the batch there are + #max_nb_of_relations_in_batch_for_one_sentence relations ie (number of sublist such as rel) + #The padded relations are simply list of size padded_document_length filled with 0. + if rel.sum().item()==0: continue + + for idx in rel.nonzero(): + label_srt = self.vocab.get_token_from_index(rel[idx].item(), self._label_namespace) + arg, rel_type = label_srt.split("_") + if arg == "ARG1": x = idx.data[0] + else: y = idx.data[0] + + gold_relations[exple_idx, x, y, rel_type_2_idx[rel_type]] = 1 + + #GPU support + if text_embeddings.is_cuda: gold_relations = gold_relations.cuda() + + + #Compute the loss + output_dict["loss"] = self.multi_class_cross_entropy_loss(scores = relation_scores, labels = gold_relations, mask = mask) + + #Compute the metrics with the predictions. + self._relation_metric(predictions = predicted_relations, gold_labels = gold_relations, mask = mask) + + return output_dict + + + @overrides + def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, Any]: + """ + Decode the predictions + """ + decoded_predictions = [] + + for instance_tags in output_dict["predicted_relations"]: + sentence_length = instance_tags.size(0) + decoded_relations = [] + + for arg1, arg2, rel_type_idx in instance_tags.nonzero().data: + relation = ["*"]*sentence_length + rel_type = idx_2_rel_type[rel_type_idx] + relation[arg1] = "ARG1_" + rel_type + relation[arg2] = "ARG2_" + rel_type + decoded_relations.append(relation) + + decoded_predictions.append(decoded_relations) + + output_dict["decoded_predictions"] = decoded_predictions + + return output_dict + + + @overrides + def get_metrics(self, reset: bool = False) -> Dict[str, float]: + """ + Compute the metrics for relation: precision, recall and f1. + A relation is considered correct if we can correctly predict the last word of ARG1, the last word of ARG2 and the relation type. + """ + metric_dict = self._relation_metric.get_metric(reset = reset) + return {x: y for x, y in metric_dict.items() if "overall" in x} diff --git a/hmtl/modules/__init__.py b/hmtl/modules/__init__.py new file mode 100644 index 0000000..a4d6118 --- /dev/null +++ b/hmtl/modules/__init__.py @@ -0,0 +1,4 @@ +# coding: utf-8 + +from hmtl.modules import seq2seq_encoders +from hmtl.modules import text_field_embedders \ No newline at end of file diff --git a/hmtl/modules/seq2seq_encoders/__init__.py b/hmtl/modules/seq2seq_encoders/__init__.py new file mode 100644 index 0000000..7aef322 --- /dev/null +++ b/hmtl/modules/seq2seq_encoders/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.modules.seq2seq_encoders.stacked_gru import StackedGRU \ No newline at end of file diff --git a/hmtl/modules/seq2seq_encoders/stacked_gru.py b/hmtl/modules/seq2seq_encoders/stacked_gru.py new file mode 100644 index 0000000..b029e33 --- /dev/null +++ b/hmtl/modules/seq2seq_encoders/stacked_gru.py @@ -0,0 +1,129 @@ +# coding: utf-8 + +from typing import List + +from overrides import overrides +import torch +from torch.nn import Dropout, Linear +from torch.nn import GRU + +from allennlp.nn.util import last_dim_softmax, weighted_sum +from allennlp.modules.seq2seq_encoders.seq2seq_encoder import Seq2SeqEncoder +from allennlp.common.params import Params + + +@Seq2SeqEncoder.register("stacked_gru") +class StackedGRU(Seq2SeqEncoder): + # pylint: disable=line-too-long + """ + This class implements a multiple layer GRU (RNN). + The specificity of this implementation compared to the default one in allennlp + (``allennlp.modules.seq2seq_encoders.Seq2SeqEncoder``) is the ability to + specify differents hidden state size for each layer of the in the + multiple-stacked-layers-GRU. + Optionally, different dropouts can be individually specified for each layer of the encoder. + + Parameters + ---------- + input_dim : ``int``, required. + The size of the last dimension of the input tensor. + hidden_sizes : ``List[int]``, required. + The hidden state sizes of each layer of the stacked-GRU. + num_layers : ``int``, required. + The number of layers to stack in the encoder. + bidirectional : ``bool``, required + Wheter or not the layers should be bidirectional. + dropouts : ``List[float]``, optional (default = None). + The dropout probabilities applied to each layer. The length of this list should + be equal to the number of layers ``num_layers``. + """ + + def __init__(self, + input_dim: int, + hidden_sizes: List[int], + num_layers: int, + bidirectional: bool, + dropouts: List[float] = None) -> None: + super(StackedGRU, self).__init__() + + self._input_dim = input_dim + self._hidden_sizes = hidden_sizes + self._num_layers = num_layers + self._bidirectional = bidirectional + self._dropouts = [0.]*num_layers if dropouts is None else dropouts + + if len(self._hidden_sizes) != self._num_layers: + raise ValueError(f"Number of layers ({self._num_layers}) must be equal to the length of hidden state size list ({len(self._hidden_sizes)})") + if len(self._dropouts) != self._num_layers: + raise ValueError(f"Number of layers ({self._num_layers}) must be equal to the legnth of drouput rates list ({len(self._dropouts)})") + + self._output_dim = hidden_sizes[-1] + if self._bidirectional: + self._output_dim *= 2 + + self._gru_layers: List[GRU] = [] + for k in range(self._num_layers): + input_size = self._input_dim if k==0 else self._hidden_sizes[k-1] + if self._bidirectional and (k!=0): + input_size *= 2 + + gru_layer = GRU(input_size = input_size, + hidden_size = self._hidden_sizes[k], + dropout = self._dropouts[k], + num_layers = 1, + bidirectional = self._bidirectional) + self.add_module(f"gru_{k}", gru_layer) + self._gru_layers.append(gru_layer) + + + def get_input_dim(self): + return self._input_dim + + def get_output_dim(self): + return self._output_dim + + @overrides + def is_bidirectional(self): + return self._bidirectional + + @overrides + def forward(self, # pylint: disable=arguments-differ + inputs: torch.Tensor, + mask: torch.LongTensor = None) -> torch.FloatTensor: + """ + Parameters + ---------- + inputs : ``torch.FloatTensor``, required. + A tensor of shape (batch_size, timesteps, input_dim) + mask : ``torch.FloatTensor``, optional (default = None). + A tensor of shape (batch_size, timesteps). + + Returns + ------- + A tensor of shape (batch_size, timesteps, output_projection_dim), + where output_projection_dim = input_dim by default. + """ + gru = self._gru_layers[0] + outputs, _ = gru(inputs) + + for k in range(1, self._num_layers): + gru = self._gru_layers[k] + next_outputs, _ = gru(outputs) + outputs = next_outputs + + return outputs + + @classmethod + def from_params(cls, params: Params) -> 'StackedGRU': + input_dim = params.pop_int('input_dim') + hidden_sizes = params.pop('hidden_sizes') + dropouts = params.pop('dropouts', None) + num_layers = params.pop_int('num_layers') + bidirectional = params.pop_bool('bidirectional') + params.assert_empty(cls.__name__) + + return cls(input_dim = input_dim, + hidden_sizes = hidden_sizes, + num_layers = num_layers, + bidirectional = bidirectional, + dropouts = dropouts) \ No newline at end of file diff --git a/hmtl/modules/text_field_embedders/__init__.py b/hmtl/modules/text_field_embedders/__init__.py new file mode 100644 index 0000000..f12783b --- /dev/null +++ b/hmtl/modules/text_field_embedders/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.modules.text_field_embedders.shortcut_connect_text_field_embedder import ShortcutConnectTextFieldEmbedder \ No newline at end of file diff --git a/hmtl/modules/text_field_embedders/shortcut_connect_text_field_embedder.py b/hmtl/modules/text_field_embedders/shortcut_connect_text_field_embedder.py new file mode 100644 index 0000000..1be64f5 --- /dev/null +++ b/hmtl/modules/text_field_embedders/shortcut_connect_text_field_embedder.py @@ -0,0 +1,63 @@ +# coding: utf-8 + +from typing import Dict, List + +import torch +from overrides import overrides + +from allennlp.modules.text_field_embedders.text_field_embedder import TextFieldEmbedder +from allennlp.modules.seq2seq_encoders.seq2seq_encoder import Seq2SeqEncoder +import allennlp.nn.util as util + + +@TextFieldEmbedder.register("shortcut_connect_text_field_embedder") +class ShortcutConnectTextFieldEmbedder(TextFieldEmbedder): + """ + This class implement a specific text field embedder that benefits from the output of + a ``allennlp.modules.seq2seq_encoders.seq2seq_encoder.Seq2SeqEncoder``. + It simply concatenate two embeddings vectors: the one from the previous_encoder + (an ``allennlp.modules.seq2seq_encoders.seq2seq_encoder.Seq2SeqEncoder``) and + the one from the base_text_field_embedder + (an ``allennlp.modules.text_field_embedders.text_field_embedder.TextFieldEmbedder``). + The latter actually computes the word representation and explains the name of this class + "ShortcutConnectTextFieldEmbedder": it will feed the input of a ``Seq2SeqEncoder`` + with the output of the previous_encoder and the output of the base_text_field_embedder, + the connection with base_text_field_embedder actually circumventing the previous_encoder. + + Parameters + ---------- + base_text_field_embedder : ``TextFieldEmbedder``, required + The text field embedder that computes the word representation at the base of the model. + previous_encoder : ``Seq2SeqEncoder``, required + The previous seq2seqencoder. + """ + def __init__(self, + base_text_field_embedder: TextFieldEmbedder, + previous_encoders: List[Seq2SeqEncoder]) -> None: + super(ShortcutConnectTextFieldEmbedder, self).__init__() + self._base_text_field_embedder = base_text_field_embedder + self._previous_encoders = previous_encoders + + @overrides + def get_output_dim(self) -> int: + output_dim = 0 + output_dim += self._base_text_field_embedder.get_output_dim() + output_dim += self._previous_encoders[-1].get_output_dim() + + return output_dim + + @overrides + def forward(self, + text_field_input: Dict[str, torch.Tensor], + num_wrapping_dims: int = 0) -> torch.Tensor: + text_field_embeddings = self._base_text_field_embedder.forward(text_field_input, num_wrapping_dims) + base_representation = text_field_embeddings + mask = util.get_text_field_mask(text_field_input) + + + for encoder in self._previous_encoders: + text_field_embeddings = encoder(text_field_embeddings, mask) + text_field_embeddings = torch.cat([base_representation, text_field_embeddings], dim = -1) + + + return torch.cat([text_field_embeddings], dim=-1) \ No newline at end of file diff --git a/hmtl/tasks/__init__.py b/hmtl/tasks/__init__.py new file mode 100644 index 0000000..ad064fd --- /dev/null +++ b/hmtl/tasks/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.tasks.task import Task \ No newline at end of file diff --git a/hmtl/tasks/task.py b/hmtl/tasks/task.py new file mode 100644 index 0000000..c04f206 --- /dev/null +++ b/hmtl/tasks/task.py @@ -0,0 +1,96 @@ +# coding: utf-8 + +from typing import List +from allennlp.common import Params +from allennlp.commands.train import datasets_from_params +from allennlp.data.iterators import DataIterator +from allennlp.common.checks import ConfigurationError + +class Task(): + """ + A class to encapsulate the necessary informations (and datasets) + about each task. + + Parameters + ---------- + name : ``str``, required + The name of the task. + validation_metric_name : ``str``, required + The name of the validation metric to use to monitor training + and select the best epoch. + validation_metric_decreases : ``bool``, required + Whether or not the validation metric should decrease for improvement. + evaluate_on_test : ``bool`, optional (default = False) + Whether or not the task should be evaluated on the test set at the end of the training. + """ + def __init__(self, + name: str, + validation_metric_name: str, + validation_metric_decreases: bool, + evaluate_on_test: bool = False) -> None: + self._name = name + + self._train_data = None + self._validation_data = None + self._test_data = None + self._evaluate_on_test = evaluate_on_test + + self._val_metric = validation_metric_name + self._val_metric_decreases = validation_metric_decreases + + self._data_iterator = None + + + def set_data_iterator(self, + data_iterator: DataIterator): + if data_iterator is not None: + self._data_iterator = data_iterator + else: + ConfigurationError(f"data_iterator cannot be None in set_iterator - Task name: {self._name}") + + + def load_data_from_params(self, + params: Params): + all_datasets = datasets_from_params(params) + datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) + + for dataset in datasets_for_vocab_creation: + if dataset not in all_datasets: + raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") + + instances_for_vocab_creation = (instance for key, dataset in all_datasets.items() + for instance in dataset + if key in datasets_for_vocab_creation) + + self._instances_for_vocab_creation = instances_for_vocab_creation + self._datasets_for_vocab_creation = datasets_for_vocab_creation + + if 'train' in all_datasets.keys(): + self._train_data = all_datasets["train"] + self._tr_instances = sum(1 for e in self._train_data) # This is horrible if lazy iterator (Iterable) + if 'validation' in all_datasets.keys(): + self._validation_data = all_datasets["validation"] + self._val_instances = sum(1 for e in self._validation_data) # This is horrible if lazy iterator (Iterable) + if 'test' in all_datasets.keys(): + self._test_data = all_datasets["test"] + self._test_instances = sum(1 for e in self._test_data) # This is horrible if lazy iterator (Iterable) + + # If trying to evaluate on test set, make sure the dataset is loaded + if self._evaluate_on_test: + assert self._test_data is not None + + #return instances_for_vocab_creation, datasets_for_vocab_creation, all_datasets + return instances_for_vocab_creation, datasets_for_vocab_creation + + @classmethod + def from_params(cls, params: Params) -> "Task": + task_name = params.pop("task_name", "ner") + validation_metric_name = params.pop("validation_metric_name", "f1-measure-overall") + validation_metric_decreases = params.pop_bool("validation_metric_decreases", False) + evaluate_on_test = params.pop_bool("evaluate_on_test", False) + + params.assert_empty(cls.__name__) + return cls(name = task_name, + validation_metric_name = validation_metric_name, + validation_metric_decreases = validation_metric_decreases, + evaluate_on_test = evaluate_on_test) \ No newline at end of file diff --git a/hmtl/training/__init__.py b/hmtl/training/__init__.py new file mode 100644 index 0000000..f7eecc3 --- /dev/null +++ b/hmtl/training/__init__.py @@ -0,0 +1,3 @@ +# coding: utf-8 + +from hmtl.training.sampler_multi_task_trainer import SamplerMultiTaskTrainer \ No newline at end of file diff --git a/hmtl/training/metrics/__init__.py b/hmtl/training/metrics/__init__.py new file mode 100644 index 0000000..d0094ad --- /dev/null +++ b/hmtl/training/metrics/__init__.py @@ -0,0 +1,4 @@ +# coding: utf-8 + +from hmtl.training.metrics.relation_f1_measure import RelationF1Measure +from hmtl.training.metrics.conll_coref_full_scores import ConllCorefFullScores \ No newline at end of file diff --git a/hmtl/training/metrics/conll_coref_full_scores.py b/hmtl/training/metrics/conll_coref_full_scores.py new file mode 100644 index 0000000..76bba7a --- /dev/null +++ b/hmtl/training/metrics/conll_coref_full_scores.py @@ -0,0 +1,35 @@ +from overrides import overrides + +from allennlp.training.metrics import ConllCorefScores + +class ConllCorefFullScores(ConllCorefScores): + """ + This is marginal modification of the class ``allennlp.training.metrics.metric.ConllCorefScores``. + It leaves the possibility to get the 3 detailled coreference metrics (B3, MUC, CEAFE), + and not only their average. + """ + def __init__(self) -> None: + super(ConllCorefFullScores, self).__init__() + + @overrides + def get_metric(self, reset: bool = False, full: bool = False): + full_metrics = {} + if full: + for e in self.scorers: + metric_name = e.metric.__name__ + full_metrics[metric_name] = {"precision": e.get_precision(), + "recall": e.get_recall(), + "f1_score": e.get_f1()} + + metrics = (lambda e: e.get_precision(), lambda e: e.get_recall(), lambda e: e.get_f1()) + precision, recall, f1_score = tuple(sum(metric(e) for e in self.scorers) / len(self.scorers) + for metric in metrics) + + full_metrics["coref_precision"] = precision + full_metrics["coref_recall"] = recall + full_metrics["coref_f1"] = f1_score + + if reset: + self.reset() + + return full_metrics \ No newline at end of file diff --git a/hmtl/training/metrics/relation_f1_measure.py b/hmtl/training/metrics/relation_f1_measure.py new file mode 100644 index 0000000..e3fd299 --- /dev/null +++ b/hmtl/training/metrics/relation_f1_measure.py @@ -0,0 +1,109 @@ +from typing import Dict, List, Optional, Set +from collections import defaultdict + +import torch + +from allennlp.common.checks import ConfigurationError +from allennlp.nn.util import get_lengths_from_binary_sequence_mask #, ones_like +from allennlp.data.vocabulary import Vocabulary +from allennlp.training.metrics.metric import Metric + +@Metric.register("relation_f1") +class RelationF1Measure(Metric): + """ + """ + def __init__(self) -> None: + """ + A class for computing the metrics specific to relation extraction. + We consider a relation correct if we correctly predict the last of the head of the two arguments and the relation type. + """ + self._true_positives: int = 0 + self._false_positives: int = 0 + self._false_negatives: int = 0 + + def __call__(self, + predictions: torch.Tensor, + gold_labels: torch.Tensor, + mask: Optional[torch.Tensor] = None): + """ + Update the TP, FP and FN counters. + + Parameters + ---------- + predictions : ``torch.Tensor``, required. + A tensor of predictions of shape (batch_size, sequence_length, num_classes). + gold_labels : ``torch.Tensor``, required. + A tensor of integer class label of shape (batch_size, sequence_length). It must be the same + shape as the ``predictions`` tensor without the ``num_classes`` dimension. + mask: ``torch.Tensor``, optional (default = None). + A masking tensor the same size as ``gold_labels``. + """ + if mask is None: + mask = torch.ones_like(gold_labels) #ones_like(gold_labels) + # Get the data from the Variables. + predictions, gold_labels, mask = self.unwrap_to_tensors(predictions, + gold_labels, + mask) + + if (gold_labels.size() != predictions.size()): + raise ConfigurationError("Predictions and gold labels don't have the same size.") + + #Apply mask + #Compute the mask before computing the loss + #Transform the mask that is at the sentence level (#Size: n_batches x padded_document_length) + #to a suitable format for the relation labels level + _, padded_document_length, _, n_classes = predictions.size() + mask = mask.float() + squared_mask = torch.stack([e.view(padded_document_length, 1)*e for e in mask], dim = 0) + squared_mask = squared_mask.unsqueeze(-1).repeat(1, 1, 1, n_classes) #Size: n_batches x padded_document_length x padded_document_length x n_classes + + gold_labels = gold_labels.cpu() + + predictions = predictions*squared_mask #Size: n_batches x padded_document_length x padded_document_length x n_classes + gold_labels = gold_labels*squared_mask #Size: n_batches x padded_document_length x padded_document_length x n_classes + + + # Iterate over timesteps in batch. + batch_size = gold_labels.size(0) + for i in range(batch_size): + flattened_predictions = predictions[i].view(-1).nonzero().cpu().numpy() + flattened_gold_labels = gold_labels[i].view(-1).nonzero().cpu().numpy() + + for prediction in flattened_predictions: + if prediction in flattened_gold_labels: + self._true_positives += 1 + else: + self._false_positives += 1 + for gold in flattened_gold_labels: + if gold not in flattened_predictions: + self._false_negatives += 1 + + + def get_metric(self, reset: bool = False): + """ + Get the metrics and reset the counters if necessary. + """ + all_metrics = {} + + # Compute the precision, recall and f1 for all spans jointly. + precision, recall, f1_measure = self._compute_metrics(self._true_positives, + self._false_positives, + self._false_negatives) + all_metrics["precision-overall"] = precision + all_metrics["recall-overall"] = recall + all_metrics["f1-measure-overall"] = f1_measure + if reset: + self.reset() + return all_metrics + + @staticmethod + def _compute_metrics(true_positives: int, false_positives: int, false_negatives: int): + precision = float(true_positives) / float(true_positives + false_positives + 1e-13) + recall = float(true_positives) / float(true_positives + false_negatives + 1e-13) + f1_measure = 2. * ((precision * recall) / (precision + recall + 1e-13)) + return precision, recall, f1_measure + + def reset(self): + self._true_positives = 0 + self._false_positives = 0 + self._false_negatives = 0 diff --git a/hmtl/training/multi_task_trainer.py b/hmtl/training/multi_task_trainer.py new file mode 100644 index 0000000..f9b345e --- /dev/null +++ b/hmtl/training/multi_task_trainer.py @@ -0,0 +1,380 @@ +# coding: utf-8 + +import os +import math +import time +from copy import deepcopy +import random +import logging +import itertools +import shutil +from tensorboardX import SummaryWriter + +from typing import List, Optional, Dict, Any, Tuple + +import torch +import torch.optim.lr_scheduler +import tqdm + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError, check_for_gpu +from allennlp.common.util import peak_memory_mb, gpu_memory_mb +from allennlp.nn.util import device_mapping, move_to_device +from allennlp.training.learning_rate_schedulers import LearningRateScheduler +from allennlp.training.optimizers import Optimizer +from allennlp.training.trainer import sparse_clip_norm, TensorboardWriter +from allennlp.models.model import Model +from allennlp.common.registrable import Registrable + + +from hmtl.tasks import Task + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + +class MultiTaskTrainer(Registrable): + def __init__(self, + model: Model, + task_list: List[Task], + optimizer_params: Params, + lr_scheduler_params: Params, + patience: Optional[int] = None, + num_epochs: int = 20, + serialization_dir: str = None, + cuda_device: int = -1, + grad_norm: Optional[float] = None, + grad_clipping: Optional[float] = None, + min_lr: float = 0.00001, + no_tqdm: bool = False, + summary_interval: int = 50, + log_parameter_statistics: bool = False, + log_gradient_statistics: bool = False): + """ + Parameters + ---------- + model: ``Model``, required. + An AllenNLP model to be optimized. Pytorch Modules can also be optimized if + their ``forward`` method returns a dictionary with a "loss" key, containing a + scalar tensor representing the loss function to be optimized. + iterator: ``DataIterator``, required. + A method for iterating over a ``Dataset``, yielding padded indexed batches. + patience: Optional[int] > 0, optional (default=None) + Number of epochs to be patient before early stopping: the training is stopped + after ``patience`` epochs with no improvement. If given, it must be ``> 0``. + If None, early stopping is disabled. + num_epochs: int, optional (default = 20) + Number of training epochs. + serialization_dir: str, optional (default=None) + Path to directory for saving and loading model files. Models will not be saved if + this parameter is not passed. + cuda_device: int, optional (default = -1) + An integer specifying the CUDA device to use. If -1, the CPU is used. + Multi-gpu training is not currently supported, but will be once the + Pytorch DataParallel API stabilises. + grad_norm: float, optional, (default = None). + If provided, gradient norms will be rescaled to have a maximum of this value. + grad_clipping : float, optional (default = None). + If provided, gradients will be clipped `during the backward pass` to have an (absolute) + maximum of this value. If you are getting ``NaNs`` in your gradients during training + that are not solved by using ``grad_norm``, you may need this. + no_tqdm : bool, optional (default=False) + We use ``tqdm`` for logging, which will print a nice progress bar that updates in place + after every batch. This is nice if you're running training on a local shell, but can + cause problems with log files from, e.g., a docker image running on kubernetes. If + ``no_tqdm`` is ``True``, we will not use tqdm, and instead log batch statistics using + ``logger.info``. + """ + self._model = model + parameters_to_train = [(n, p) for n, p in self._model.named_parameters() if p.requires_grad] + + self._task_list = task_list + self._n_tasks = len(self._task_list) + + self._optimizer_params = optimizer_params + self._optimizers = {} + self._lr_scheduler_params = lr_scheduler_params + self._schedulers = {} + for task in self._task_list: + task_name = task._name + self._optimizers[task_name] = Optimizer.from_params(model_parameters = parameters_to_train, + params = deepcopy(optimizer_params)) + self._schedulers[task_name] = LearningRateScheduler.from_params(optimizer = self._optimizers[task_name], + params = deepcopy(lr_scheduler_params)) + + self._serialization_dir = serialization_dir + + self._patience = patience + self._num_epochs = num_epochs + self._cuda_device = cuda_device + if self._cuda_device >= 0: + check_for_gpu(self._cuda_device) + self._model = self._model.cuda(self._cuda_device) + self._grad_norm = grad_norm + self._grad_clipping = grad_clipping + self._min_lr = min_lr + + self._task_infos = None + self._metric_infos = None + + self._tr_generators = None + self._no_tqdm = no_tqdm + + self._summary_interval = summary_interval # num batches between logging to tensorboard + self._log_parameter_statistics = log_parameter_statistics + self._log_gradient_statistics = log_gradient_statistics + self._global_step = 0 + train_log = SummaryWriter(os.path.join(self._serialization_dir, "log", "train")) + validation_log = SummaryWriter(os.path.join(self._serialization_dir, "log", "validation")) + self._tensorboard = TensorboardWriter(train_log = train_log, validation_log = validation_log) + + + def train(self, + #tasks: List[Task], + #params: Params, + recover: bool = False): + + raise NotImplementedError + + + def _check_history(self, + metric_history: List[float], + cur_score: float, + should_decrease: bool = False): + ''' + Given a task, the history of the performance on that task, + and the current score, check if current score is + best so far and if out of patience. + + Parameters + ---------- + metric_history: List[float], required + cur_score: float, required + should_decrease: bool, default = False + Wheter or not the validation metric should increase while training. + For instance, the bigger the f1 score is, the better it is -> should_decrease = False + + Returns + ------- + best_so_far: bool + Whether or not the current epoch is the best so far in terms of the speicified validation metric. + out_of_patience: bool + Whether or not the training for this specific task should stop (patience parameter). + ''' + patience = self._patience + 1 + best_fn = min if should_decrease else max + best_score = best_fn(metric_history) + if best_score == cur_score: + best_so_far = metric_history.index(best_score) == len(metric_history) - 1 + else: + best_so_far = False + + out_of_patience = False + if len(metric_history) > patience: + if should_decrease: + out_of_patience = max(metric_history[-patience:]) <= cur_score + else: + out_of_patience = min(metric_history[-patience:]) >= cur_score + + if best_so_far and out_of_patience: # then something is up + print("Something is up") + + return best_so_far, out_of_patience + + + def _forward(self, + tensor_batch: torch.Tensor, + for_training: bool = False, + task:Task = None): + if task is not None: + tensor_batch = move_to_device(tensor_batch, self._cuda_device) + output_dict = self._model.forward(task_name = task._name, tensor_batch = tensor_batch, for_training = for_training) + if for_training: + try: + loss = output_dict["loss"] + loss += self._model.get_regularization_penalty() + except KeyError: + raise RuntimeError("The model you are trying to optimize does not contain a" + " `loss` key in the output of model.forward(inputs).") + return output_dict + else: + raise ConfigurationError("Cannot call forward through task `None`") + + + def _get_metrics(self, + task: Task, + reset: bool = False): + task_tagger = getattr(self._model, "_tagger_" + task._name) + return task_tagger.get_metrics(reset) + + + def _description_from_metrics(self, + metrics: Dict[str, float]): + # pylint: disable=no-self-use + return ', '.join(["%s: %.4f" % (name, value) for name, value in metrics.items()]) + " ||" + + + def _rescale_gradients(self) -> Optional[float]: + """ + Performs gradient rescaling. Is a no-op if gradient rescaling is not enabled. + """ + if self._grad_norm: + parameters_to_clip = [p for p in self._model.parameters() + if p.grad is not None] + return sparse_clip_norm(parameters_to_clip, self._grad_norm) + return None + + + def _enable_gradient_clipping(self) -> None: + if self._grad_clipping is not None: + # Pylint is unable to tell that we're in the case that _grad_clipping is not None... + # pylint: disable=invalid-unary-operand-type + clip_function = lambda grad: grad.clamp(-self._grad_clipping, self._grad_clipping) + for parameter in self._model.parameters(): + if parameter.requires_grad: + parameter.register_hook(clip_function) + + + def _save_checkpoint(self, + epoch: int, + should_stop: bool) -> None: + """ + Save the current states (model, training, optimizers, metrics and tasks). + + Parameters + ---------- + epoch: int, required. + The epoch of training. + should_stop: bool, required + Wheter or not the training is finished. + should_save_model: bool, optional (default = True) + Whether or not the model state should be saved. + """ + ### Saving training state ### + training_state = {"epoch": epoch, + "should_stop": should_stop, + "metric_infos": self._metric_infos, + "task_infos": self._task_infos, + "schedulers": {}, + "optimizers": {}} + + if self._optimizers is not None: + for task_name, optimizer in self._optimizers.items(): + training_state["optimizers"][task_name] = optimizer.state_dict() + if self._schedulers is not None: + for task_name, scheduler in self._schedulers.items(): + training_state["schedulers"][task_name] = scheduler.lr_scheduler.state_dict() + + training_path = os.path.join(self._serialization_dir, "training_state.th") + torch.save(training_state, training_path) + logger.info("Checkpoint - Saved training state to %s", training_path) + + + ### Saving model state ### + model_path = os.path.join(self._serialization_dir, "model_state.th") + model_state = self._model.state_dict() + torch.save(model_state, model_path) + logger.info("Checkpoint - Saved model state to %s", model_path) + + + ### Saving best models for each task ### + for task_name, infos in self._metric_infos.items(): + best_epoch, _ = infos["best"] + if best_epoch == epoch: + logger.info("Checkpoint - Best validation performance so far for %s task", task_name) + logger.info("Checkpoint - Copying weights to '%s/best_%s.th'.", self._serialization_dir, task_name) + shutil.copyfile(model_path, os.path.join(self._serialization_dir, "best_{}.th".format(task_name))) + + + def find_latest_checkpoint(self) -> Tuple[str, str]: + """ + Return the location of the latest model and training state files. + If there isn't a valid checkpoint then return None. + """ + have_checkpoint = (self._serialization_dir is not None and + any("model_state" in x for x in os.listdir(self._serialization_dir)) and + any("training_state" in x for x in os.listdir(self._serialization_dir))) + + if not have_checkpoint: + return None + + model_path = os.path.join(self._serialization_dir, + "model_state.th") + training_state_path = os.path.join(self._serialization_dir, + "training_state.th") + + return (model_path, training_state_path) + + + def _restore_checkpoint(self): + """ + Restores a model from a serialization_dir to the last saved checkpoint. + This includes an epoch count, optimizer state, a model state, a task state and + a metric state. All are of which are serialized separately. + This function should only be used to continue training - + if you wish to load a model for inference/load parts of a model into a new + computation graph, you should use the native Pytorch functions: + `` model.load_state_dict(torch.load("/path/to/model/weights.th"))`` + + Returns + ------- + epoch: int, + The epoch at which to resume training. + should_stop: bool + Whether or not the training should already by stopped. + """ + + latest_checkpoint = self.find_latest_checkpoint() + + if not self._serialization_dir: + raise ConfigurationError("`serialization_dir` not specified - cannot " + "restore a model without a directory path.") + if latest_checkpoint is None: + raise ConfigurationError("Cannot restore model because one of" + "`model_state.th` or `training_state.th` is not in directory path.") + + model_path, training_state_path = latest_checkpoint + + # Load the parameters onto CPU, then transfer to GPU. + # This avoids potential OOM on GPU for large models that + # load parameters onto GPU then make a new GPU copy into the parameter + # buffer. The GPU transfer happens implicitly in load_state_dict. + model_state = torch.load(model_path, map_location = device_mapping(-1)) + training_state = torch.load(training_state_path, map_location = device_mapping(-1)) + + # Load model + self._model.load_state_dict(model_state) + logger.info("Checkpoint - Model loaded from %s", model_path) + + # Load optimizers + for task_name, optimizers_state in training_state["optimizers"].items(): + self._optimizers[task_name].load_state_dict(optimizers_state) + logger.info("Checkpoint - Optimizers loaded from %s", training_state_path) + + # Load schedulers + for task_name, scheduler_state in training_state["schedulers"].items(): + self._schedulers[task_name].lr_scheduler.load_state_dict(scheduler_state) + logger.info("Checkpoint - Learning rate schedulers loaded from %s", training_state_path) + + self._metric_infos = training_state["metric_infos"] + self._task_infos = training_state["task_infos"] + logger.info("Checkpoint - Task infos loaded from %s", training_state_path) + logger.info("Checkpoint - Metric infos loaded from %s", training_state_path) + + n_epoch, should_stop = training_state["epoch"], training_state["should_stop"] + + return n_epoch + 1, should_stop + + + @classmethod + def from_params(cls, + model: Model, + task_list: List[Task], + serialization_dir: str, + params: Params) -> 'MultiTaskTrainer': + """ + Static method that constructs the multi task trainer described by ``params``. + """ + choice = params.pop_choice('type', cls.list_available()) + return cls.by_name(choice).from_params(model = model, + task_list = task_list, + serialization_dir = serialization_dir, + params = params) \ No newline at end of file diff --git a/hmtl/training/sampler_multi_task_trainer.py b/hmtl/training/sampler_multi_task_trainer.py new file mode 100644 index 0000000..bfc12de --- /dev/null +++ b/hmtl/training/sampler_multi_task_trainer.py @@ -0,0 +1,501 @@ +# coding: utf-8 + +import os +import math +import time +from copy import deepcopy +import random +import logging +import itertools +import shutil +from tensorboardX import SummaryWriter +import numpy as np + +from typing import List, Optional, Dict, Any +from overrides import overrides + +import torch +import torch.optim.lr_scheduler +import tqdm + +from allennlp.common import Params +from allennlp.common.checks import ConfigurationError, check_for_gpu +from allennlp.common.util import peak_memory_mb, gpu_memory_mb +from allennlp.nn.util import device_mapping +from allennlp.data.iterators import DataIterator +from allennlp.training.learning_rate_schedulers import LearningRateScheduler +from allennlp.training.optimizers import Optimizer +from allennlp.training.trainer import sparse_clip_norm, TensorboardWriter +from allennlp.models.model import Model + +from hmtl.tasks import Task +from hmtl.training.multi_task_trainer import MultiTaskTrainer + + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +@MultiTaskTrainer.register("sampler_multi_task_trainer") +class SamplerMultiTaskTrainer(MultiTaskTrainer): + def __init__(self, + model: Model, + task_list: List[Task], + optimizer_params: Params, + lr_scheduler_params: Params, + patience: Optional[int] = None, + num_epochs: int = 20, + serialization_dir: str = None, + cuda_device: int = -1, + grad_norm: Optional[float] = None, + grad_clipping: Optional[float] = None, + min_lr: float = 0.00001, + no_tqdm: bool = False, + summary_interval: int = 50, + log_parameter_statistics: bool = False, + log_gradient_statistics: bool = False, + sampling_method: str = "proportional"): + + if sampling_method not in ["uniform", "proportional"]: + raise ConfigurationError(f"Sampling method ({sampling_method}) must be `uniform` or `proportional`.") + + self._sampling_method = sampling_method + super(SamplerMultiTaskTrainer, self).__init__(model = model, + task_list = task_list, + optimizer_params = optimizer_params, + lr_scheduler_params = lr_scheduler_params, + patience = patience, + num_epochs = num_epochs, + serialization_dir=serialization_dir, + cuda_device = cuda_device, + grad_norm = grad_norm, + grad_clipping = grad_clipping, + min_lr = min_lr, + no_tqdm = no_tqdm, + summary_interval = summary_interval, + log_parameter_statistics = log_parameter_statistics, + log_gradient_statistics = log_gradient_statistics) + + + + @overrides + def train(self, + recover: bool = False): + ''' + Train the different task_list, save the different checkpoints and metrics, + and save the model at the end of training while logging the training details. + + The metrics through the training are stored in dictionaries with the following structure: + + all_metrics - Dict[str, str] + task_name: val_metric + + metric_infos (Dict[]) + task_name (Dict[str, diverse] + val_metric (str): name (str) + hist (str): history_of_the_val_metric (List[float]) + stopped (str): training_is_stopped (bool) + best (str): best_epoch_for_val_metric (Tuple(int, Dict)) + + all_tr_metrics (Dict[str, Dict[str, float]]) + task_name (Dict[str, float]) + metric_name (str): value (float) + loss: value (float) + + all_val_metrics (Dict[str, Dict[str, float]]) + task_name (Dict[str, float]) + metric_name (str): value (float) + loss (str): value (float) + + Parameters + ---------- + task_list: List[Task], required + A list containing the tasks to train. + params: Params, required + Training parameters + recover: bool, required + Whether or not training should be recovered from a previous training. + + Returns + ------- + return_dict: Dict + A dictionary summarizing the training and the metrics for the best epochs for each task. + ''' + training_start_time = time.time() + + if recover: + try: + n_epoch, should_stop = self._restore_checkpoint() + logger.info("Loaded model from checkpoint. Starting at epoch %d", n_epoch) + except RuntimeError: + raise ConfigurationError("Could not recover training from the checkpoint. Did you mean to output to " + "a different serialization directory or delete the existing serialization " + "directory?") + else: + n_epoch, should_stop = 0, False + + ### Store all the necessary informations and attributes about the tasks ### + task_infos = {task._name: {} for task in self._task_list} + for task_idx, task in enumerate(self._task_list): + task_info = task_infos[task._name] + + # Store statistiscs on training and validation batches + data_iterator = task._data_iterator + n_tr_batches = data_iterator.get_num_batches(task._train_data) + n_val_batches = data_iterator.get_num_batches(task._validation_data) + task_info['n_tr_batches'] = n_tr_batches + task_info['n_val_batches'] = n_val_batches + + # Create counter for number of batches trained during the whole + # training for this specific task + task_info['total_n_batches_trained'] = 0 + + task_info['last_log'] = time.time() # Time of last logging + self._task_infos = task_infos + + ### Bookkeeping the validation metrics ### + metric_infos = {task._name: {'val_metric': task._val_metric, + 'hist': [], + 'is_out_of_patience': False, + 'min_lr_hit': False, + 'best': (-1, {})} + for task in self._task_list} + self._metric_infos = metric_infos + + + ### Write log ### + total_n_tr_batches = 0 # The total number of training batches across all the datasets. + for task_name, info in self._task_infos.items(): + total_n_tr_batches += info["n_tr_batches"] + logger.info("Task %s:", task_name) + logger.info("\t%d training batches", info["n_tr_batches"]) + logger.info("\t%d validation batches", info["n_val_batches"]) + + + ### Create the training generators/iterators tqdm ### + self._tr_generators = {} + for task in self._task_list: + data_iterator = task._data_iterator + tr_generator = data_iterator(task._train_data, + num_epochs = None) + self._tr_generators[task._name] = tr_generator + + + ### Create sampling probability distribution ### + if self._sampling_method == "uniform": + sampling_prob = [float(1/self._n_tasks)]*self._n_tasks + elif self._sampling_method == "proportional": + sampling_prob = [float(info['n_tr_batches']/total_n_tr_batches) for info in self._task_infos.values()] + + + ### Enable gradient clipping ### + # Only if self._grad_clipping is specified + self._enable_gradient_clipping() + + + ### Setup is ready. Training of the model can begin ### + logger.info("Set up ready. Beginning training/validation.") + + + ### Begin Training of the model ### + while not should_stop: + # Train one epoch (training pass + validation pass) + + + self._model.train() # Set the model to "train" mode. + + + ### Log Infos: current epoch count and CPU/GPU usage ### + logger.info("") + logger.info("Epoch %d/%d - Begin", n_epoch, self._num_epochs - 1) + logger.info(f"Peak CPU memory usage MB: {peak_memory_mb()}") + for gpu, memory in gpu_memory_mb().items(): + logger.info(f"GPU {gpu} memory usage MB: {memory}") + + logger.info("Training - Begin") + + + ### Reset training and trained batches counter before new training epoch ### + for _, task_info in self._task_infos.items(): + task_info["tr_loss_cum"] = 0.0 + task_info["n_batches_trained_this_epoch"] = 0 + all_tr_metrics = {} # BUG TO COMPLETE COMMENT TO MAKE IT MORE CLEAR + + + ### Start training epoch ### + epoch_tqdm = tqdm.tqdm(range(total_n_tr_batches), total = total_n_tr_batches) + for _ in epoch_tqdm: + task_idx = np.argmax(np.random.multinomial(1, sampling_prob)) + task = self._task_list[task_idx] + task_info = self._task_infos[task._name] + + + ### One forward + backward pass ### + + # Call next batch to train + batch = next(self._tr_generators[task._name]) + task_info["n_batches_trained_this_epoch"] += 1 + + # Load optimizer + optimizer = self._optimizers[task._name] + optimizer.zero_grad() + + # Get the loss for this batch + output_dict = self._forward(tensor_batch = batch, task = task, for_training = True) + assert "loss" in output_dict, "Model must return a dict containing a 'loss' key" + loss = output_dict["loss"] + loss.backward() + task_info["tr_loss_cum"] += loss.item() + + # Gradient rescaling if self._grad_norm is specified + self._rescale_gradients() + + # Take an optimization step + optimizer.step() + + + ### Get metrics for all progress so far, update tqdm, display description ### + task_metrics = self._get_metrics(task = task) + task_metrics["loss"] = float(task_info["tr_loss_cum"] / (task_info["n_batches_trained_this_epoch"]+0.000001)) + description = self._description_from_metrics(task_metrics) + epoch_tqdm.set_description(task._name + ", " + description) + + + ### Tensorboard logging: Training detailled metrics, parameters and gradients ### + if self._global_step % self._summary_interval == 0: + # Metrics + for metric_name, value in task_metrics.items(): + self._tensorboard.add_train_scalar(name = "training_details/" + task._name + "/" + metric_name, + value = value, + global_step = self._global_step) + # Parameters and Gradients + for param_name, param in self._model.named_parameters(): + if self._log_parameter_statistics: + self._tensorboard.add_train_scalar(name = "parameter_mean/" + param_name, + value = param.data.mean(), + global_step = self._global_step) + self._tensorboard.add_train_scalar(name = "parameter_std/" + param_name, + value = param.data.std(), + global_step = self._global_step) + if param.grad is None: + continue + if self._log_gradient_statistics: + self._tensorboard.add_train_scalar(name = "grad_mean/" + param_name, + value = param.grad.data.mean(), + global_step = self._global_step) + self._tensorboard.add_train_scalar(name = "grad_std/" + param_name, + value = param.grad.data.std(), + global_step = self._global_step) + self._global_step += 1 + + + + ### Bookkeeping all the training metrics for all the tasks on the training epoch that just finished ### + for task in self._task_list: + task_info = self._task_infos[task._name] + + task_info['total_n_batches_trained'] += task_info["n_batches_trained_this_epoch"] + task_info['last_log'] = time.time() + + task_metrics = self._get_metrics(task = task, reset = True) + if task._name not in all_tr_metrics: + all_tr_metrics[task._name ] = {} + for name, value in task_metrics.items(): + all_tr_metrics[task._name][name] = value + all_tr_metrics[task._name]["loss"] = \ + float(task_info["tr_loss_cum"] / (task_info["n_batches_trained_this_epoch"]+0.00000001)) + + # Tensorboard - Training metrics for this epoch + self._tensorboard.add_train_scalar(name = "training_proportions/" + task._name, + value = task_info['n_batches_trained_this_epoch'], + global_step = n_epoch) + for metric_name, value in all_tr_metrics[task._name].items(): + self._tensorboard.add_train_scalar(name = "task_" + task._name + "/" + metric_name, + value = value, + global_step = n_epoch) + + + logger.info("Train - End") + + + + + ### Begin validation of the model ### + logger.info("Validation - Begin") + all_val_metrics = {} + + + self._model.eval() #Set the model into evaluation mode + + + for task_idx, task in enumerate(self._task_list): + logger.info("Validation - Task %d/%d: %s", task_idx + 1, self._n_tasks, task._name) + + val_loss = 0.0 + n_batches_val_this_epoch_this_task = 0 + n_val_batches = self._task_infos[task._name]['n_val_batches'] + scheduler = self._schedulers[task._name] + + # Create tqdm generator for current task's validation + data_iterator = task._data_iterator + val_generator = data_iterator(task._validation_data, + num_epochs = 1, + shuffle = False) + val_generator_tqdm = tqdm.tqdm(val_generator, + total = n_val_batches) + + # Iterate over each validation batch for this task + for batch in val_generator_tqdm: + n_batches_val_this_epoch_this_task += 1 + + # Get the loss + val_output_dict = self._forward(batch, task = task, for_training = False) + loss = val_output_dict["loss"] + val_loss += loss.item() + + # Get metrics for all progress so far, update tqdm, display description + task_metrics = self._get_metrics(task = task) + task_metrics["loss"] = float(val_loss / n_batches_val_this_epoch_this_task) + description = self._description_from_metrics(task_metrics) + val_generator_tqdm.set_description(description) + + # Get task validation metrics and store them in all_val_metrics + task_metrics = self._get_metrics(task = task, reset = True) + if task._name not in all_val_metrics: + all_val_metrics[task._name] = {} + for name, value in task_metrics.items(): + all_val_metrics[task._name][name] = value + all_val_metrics[task._name]["loss"] = float(val_loss / n_batches_val_this_epoch_this_task) + + # Tensorboard - Validation metrics for this epoch + for metric_name, value in all_val_metrics[task._name].items(): + self._tensorboard.add_validation_scalar(name = "task_" + task._name + "/" + metric_name, + value = value, + global_step = n_epoch) + + + ### Perform a patience check and update the history of validation metric for this task ### + this_epoch_val_metric = all_val_metrics[task._name][task._val_metric] + metric_history = self._metric_infos[task._name]['hist'] + + metric_history.append(this_epoch_val_metric) + is_best_so_far, out_of_patience = self._check_history(metric_history = metric_history, + cur_score = this_epoch_val_metric, + should_decrease = task._val_metric_decreases) + + if is_best_so_far: + logger.info("Best model found for %s.", task._name) + self._metric_infos[task._name]['best'] = (n_epoch, all_val_metrics) + if out_of_patience and not self._metric_infos[task._name]['is_out_of_patience']: + self._metric_infos[task._name]['is_out_of_patience'] = True + logger.info("Task %s is out of patience and vote to stop the training.", task._name) + + # The LRScheduler API is agnostic to whether your schedule requires a validation metric - + # if it doesn't, the validation metric passed here is ignored. + scheduler.step(this_epoch_val_metric, n_epoch) + + + logger.info("Validation - End") + + + ### Print all training and validation metrics for this epoch ### + logger.info("***** Epoch %d/%d Statistics *****", n_epoch, self._num_epochs - 1) + for task in self._task_list: + logger.info("Statistic: %s", task._name) + logger.info("\tTraining - %s: %3d", "Nb batches trained", self._task_infos[task._name]["n_batches_trained_this_epoch"]) + for metric_name, value in all_tr_metrics[task._name].items(): + logger.info("\tTraining - %s: %3f", metric_name, value) + for metric_name, value in all_val_metrics[task._name].items(): + logger.info("\tValidation - %s: %3f", metric_name, value) + logger.info("**********") + + + ### Check to see if should stop ### + stop_tr, stop_val = True, True + + for task in self._task_list: + #task_info = self._task_infos[task._name] + if self._optimizers[task._name].param_groups[0]['lr'] < self._min_lr: + logger.info("Minimum lr hit on %s.", task._name) + logger.info("Task %s vote to stop training.", task._name) + metric_infos[task._name]['min_lr_hit'] = True + stop_tr = stop_tr and self._metric_infos[task._name]['min_lr_hit'] + stop_val = stop_val and self._metric_infos[task._name]['is_out_of_patience'] + + if stop_tr: + should_stop = True + logging.info("All tasks hit minimum lr. Stopping training.") + if stop_val: + should_stop = True + logging.info("All metrics ran out of patience. Stopping training.") + if n_epoch >= self._num_epochs - 1: + should_stop = True + logging.info("Maximum number of epoch hit. Stopping training.") + + self._save_checkpoint(n_epoch, should_stop) + + + ### Update n_epoch ### + # One epoch = doing N (forward + backward) pass where N is the total number of training batches. + n_epoch += 1 + + + ### Summarize training at the end ### + logging.info('***** Training is finished *****') + logging.info('Stopped training after %d epochs', n_epoch) + return_metrics = {} + for task_name, task_info in self._task_infos.items(): + nb_epoch_trained = int(task_info['total_n_batches_trained'] / task_info['n_tr_batches']) + logging.info('Trained %s for %d batches ~= %d epochs', + task_name, + task_info['total_n_batches_trained'], + nb_epoch_trained) + return_metrics[task._name] = {"best_epoch": self._metric_infos[task_name]['best'][0], + "nb_epoch_trained": nb_epoch_trained, + "best_epoch_val_metrics": self._metric_infos[task_name]['best'][1]} + + training_elapsed_time = time.time() - training_start_time + return_metrics["training_duration"] = time.strftime("%d:%H:%M:%S", time.gmtime(training_elapsed_time)) + return_metrics["nb_epoch_trained"] = n_epoch + + + return return_metrics + + @classmethod + def from_params(cls, + model: Model, + task_list: List[Task], + serialization_dir: str, + params: Params) -> 'SamplerMultiTaskTrainer': + ''' Generator multi-task trainer from parameters. ''' + + optimizer_params = params.pop("optimizer") + lr_scheduler_params = params.pop("scheduler") + patience = params.pop_int("patience", 2) + num_epochs = params.pop_int("num_epochs", 20) + cuda_device = params.pop_int("cuda_device", -1) + grad_norm = params.pop_float("grad_norm", None) + grad_clipping = params.pop_float("grad_clipping", None) + min_lr = params.pop_float("min_lr", 0.00001) + no_tqdm = params.pop_bool("no_tqdm", False) + summary_interval = params.pop("sumarry_interval", 50) + log_parameter_statistics = params.pop("log_parameter_statistics", False) + log_gradient_statistics = params.pop("log_gradient_statistics", False) + sampling_method = params.pop("sampling_method", "proportional") + + params.assert_empty(cls.__name__) + return SamplerMultiTaskTrainer(model = model, + task_list = task_list, + optimizer_params = optimizer_params, + lr_scheduler_params = lr_scheduler_params, + patience = patience, + num_epochs = num_epochs, + serialization_dir = serialization_dir, + cuda_device = cuda_device, + grad_norm = grad_norm, + grad_clipping = grad_clipping, + min_lr = min_lr, + no_tqdm = no_tqdm, + summary_interval = summary_interval, + log_parameter_statistics = log_parameter_statistics, + log_gradient_statistics = log_gradient_statistics, + sampling_method = sampling_method) \ No newline at end of file diff --git a/html_senteval.py b/html_senteval.py new file mode 100644 index 0000000..3d52f2f --- /dev/null +++ b/html_senteval.py @@ -0,0 +1,166 @@ +# coding: utf-8 + +""" +A quick and simple script for evaluating the embeddings throught the HTML model/hierarchy +using SentEval. +""" + + +from __future__ import absolute_import, division, unicode_literals + +import sys +import io +import numpy as np +import logging +import re + +# Set PATHs +PATH_TO_SENTEVAL = './SentEval/' +PATH_TO_DATA = './SentEval/data' +sys.path.insert(0, PATH_TO_SENTEVAL) +import senteval + +import os +import torch +import argparse + +from allennlp.common.params import Params +from allennlp.data.token_indexers import TokenIndexer +from allennlp.data import Token, Instance, Vocabulary +from allennlp.data.dataset import Batch +from allennlp.data.fields import TextField +from allennlp.nn import util +from allennlp.models.model import Model + +import hmtl + + +def text_to_instance(sent, token_indexers): + text = TextField([Token(word) for word in sent], token_indexers = token_indexers) + instance = Instance({"text": text}) + return instance + +def sentences_to_indexed_batch(sentences, token_indexers): + instances = [text_to_instance(sent, token_indexers) for sent in sentences] + batch = Batch(instances) + batch.index_instances(vocab) + return batch + +def compute_embds_from_layer(model, model_layer_name, batch): + batch_tensor = batch.as_tensor_dict(batch.get_padding_lengths()) + text = batch_tensor["text"] + text_mask = util.get_text_field_mask(text) + + if model_layer_name == "text_field_embedder": + embds_text_field_embedder = model._text_field_embedder(text) + embds = embds_text_field_embedder + + if model_layer_name == "encoder_ner": + embds_text_field_embedder = model._text_field_embedder(text) + embds_encoder_ner = model._encoder_ner(embds_text_field_embedder, text_mask) + embds = embds_encoder_ner + + if model_layer_name == "encoder_emd": + embds_text_field_embedder = model._shortcut_text_field_embedder(text) + embds_encoder_emd = model._encoder_emd(embds_text_field_embedder, text_mask) + embds = embds_encoder_emd + + if model_layer_name == "encoder_relation": + embds_text_field_embedder = model._shortcut_text_field_embedder_relation(text) + embds_encoder_relation = model._encoder_relation(embds_text_field_embedder, text_mask) + embds = embds_encoder_relation + + if model_layer_name == "encoder_coref": + embds_text_field_embedder = model._shortcut_text_field_embedder_coref(text) + embds_encoder_coref = model._encoder_coref(embds_text_field_embedder, text_mask) + embds = embds_encoder_coref + + emds_size = embds.size(2) + expanded_text_mask = torch.cat([text_mask.unsqueeze(-1)]*emds_size, dim = -1) + + embds_sum = (embds*expanded_text_mask.float()).sum(dim = 1) + normalization = torch.cat([(1/text_mask.float().sum(-1)).unsqueeze(-1)]*emds_size, dim = -1) + computed_embds = (embds_sum*normalization) + + return computed_embds.detach().numpy() + + +# SentEval prepare and batcher +def prepare(params, samples): + return + +def batcher(params, batch): + batch = sentences_to_indexed_batch(batch, token_index) + embds = compute_embds_from_layer(model, args.layer_name, batch) + return embds + + +# Set params for SentEval +params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5} +params_senteval['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128, + 'tenacity': 3, 'epoch_size': 2} + + +# Set up logger +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO) +logger = logging.getLogger(__name__) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-s", + "--serialization_dir", + required = True, + help = "Directory from which to load the pretrained model.", + type = str) + parser.add_argument("-t", + "--task", + required = False, + default = "ner", + help = "Name of the task to load.", + type = str) + parser.add_argument("-l", + "--layer_name", + required = False, + default = "text_field_embedder", + help = "Name of encoder/embedding layer of the model", + type = str) + args = parser.parse_args() + + + serialization_dir = args.serialization_dir + + params = Params.from_file(params_file = os.path.join(args.serialization_dir, "config.json")) + logging.info("Parameters loaded from %s", os.path.join(serialization_dir, "config.json")) + + ### Load Vocabulary from files ### + logging.info("Loading Vocavulary from %s", os.path.join(serialization_dir, "vocabulary")) + vocab = Vocabulary.from_files(os.path.join(args.serialization_dir, "vocabulary")) + logger.info("Vocabulary loaded") + + ### Create model ### + model_params = params.pop("model") + model = Model.from_params(vocab = vocab, params = model_params, regularizer = None) + best_model_state_path = os.path.join(serialization_dir, "best_{}.th".format(args.task)) + best_model_state = torch.load(best_model_state_path) + model.load_state_dict(state_dict = best_model_state) + + ### Create token indexer ### + token_index = {} + task_keys = [key for key in params.keys() if re.search("^task_", key)] + token_indexer_params = params.pop(task_keys[-1]).pop("data_params").pop("dataset_reader").pop("token_indexers") + for name, indexer_params in token_indexer_params.items(): + token_index[name] = TokenIndexer.from_params(indexer_params) + + params_senteval['encoder'] = model + + se = senteval.engine.SE(params_senteval, batcher, prepare) + transfer_tasks = ['Length', 'WordContent', 'Depth', 'TopConstituents', + 'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber', + 'OddManOut', 'CoordinationInversion'] + results = se.eval(transfer_tasks) + + print(results) + logging.info("SentEval(uation) Finished") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9d5e33b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,102 @@ +alabaster==0.7.12 +allennlp==0.7.0 +asn1crypto==0.24.0 +atomicwrites==1.2.1 +attrs==18.2.0 +aws-xray-sdk==0.95 +awscli==1.16.38 +Babel==2.6.0 +biscuits==0.1.1 +boto==2.49.0 +boto3==1.9.28 +botocore==1.12.28 +certifi==2018.10.15 +cffi==1.11.2 +chardet==3.0.4 +Click==7.0 +colorama==0.3.9 +conllu==0.11 +cookies==2.2.1 +cryptography==2.3.1 +cymem==2.0.2 +cytoolz==0.9.0.1 +dill==0.2.8.2 +docker==3.5.1 +docker-pycreds==0.3.0 +docutils==0.14 +ecdsa==0.13 +editdistance==0.5.2 +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz +flaky==3.4.0 +Flask==0.12.4 +Flask-Cors==3.0.3 +ftfy==5.5.0 +future==0.16.0 +gevent==1.3.6 +greenlet==0.4.15 +h5py==2.8.0 +idna==2.7 +imagesize==1.1.0 +ItsDangerous==1.1.0 +Jinja2==2.10 +jmespath==0.9.3 +jsondiff==1.1.1 +jsonnet==0.10.0 +jsonpickle==1.0 +MarkupSafe==1.0 +mock==2.0.0 +more-itertools==4.3.0 +moto==1.3.4 +msgpack==0.5.6 +msgpack-numpy==0.4.3.2 +murmurhash==1.0.1 +nltk==3.3 +numpy==1.15.2 +numpydoc==0.8.0 +overrides==1.9 +packaging==18.0 +parsimonious==0.8.0 +pbr==5.0.0 +plac==0.9.6 +pluggy==0.8.0 +preshed==2.0.1 +protobuf==3.6.1 +py==1.7.0 +pyaml==17.12.1 +pyasn1==0.4.4 +pycparser==2.19 +pycryptodome==3.6.6 +Pygments==2.2.0 +pyparsing==2.2.2 +pytest==3.9.1 +pytest-pythonpath==0.7.3 +python-dateutil==2.7.3 +python-jose==2.0.2 +pytz==2017.3 +PyYAML==3.13 +regex==2018.1.10 +requests==2.20.0 +responses==0.10.1 +rsa==3.4.2 +s3transfer==0.1.13 +scikit-learn==0.20.0 +scipy==1.1.0 +six==1.11.0 +snowballstemmer==1.2.1 +spacy==2.0.16 +Sphinx==1.8.1 +sphinxcontrib-websupport==1.1.0 +sqlparse==0.2.4 +tensorboardX==1.2 +thinc==6.12.0 +toolz==0.9.0 +torch==0.4.1 +tqdm==4.28.1 +ujson==1.35 +Unidecode==1.0.22 +urllib3==1.24 +wcwidth==0.1.7 +websocket-client==0.53.0 +Werkzeug==0.14.1 +wrapt==1.10.11 +xmltodict==0.11.0 diff --git a/scripts/data_setup.sh b/scripts/data_setup.sh new file mode 100755 index 0000000..382c4f0 --- /dev/null +++ b/scripts/data_setup.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#Download Data +cd data + +#ELMO +mkdir elmo +cd elmo + +##Original size +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json +mv elmo_2x4096_512_2048cnn_2xhighway_options.json 2x4096_512_2048cnn_2xhighway_options.json +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5 +mv elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5 2x4096_512_2048cnn_2xhighway_weights.hdf5 + +##Medium size +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5 +mv elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5 2x2048_256_2048cnn_1xhighway_weights.hdf5 +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json +mv elmo_2x2048_256_2048cnn_1xhighway_options.json 2x2048_256_2048cnn_1xhighway_options.json + +##Small size +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5 +mv elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5 2x1024_128_2048cnn_1xhighway_weights.hdf5 +wget https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json +mv elmo_2x1024_128_2048cnn_1xhighway_options.json 2x1024_128_2048cnn_1xhighway_options.json + +#Glove +cd .. +mkdir glove +cd glove +wget https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz \ No newline at end of file diff --git a/scripts/machine_setup.sh b/scripts/machine_setup.sh new file mode 100755 index 0000000..d3342e2 --- /dev/null +++ b/scripts/machine_setup.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +### Install git-lfs ### +curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash +sudo apt-get install git-lfs +git lfs install + + +### Install Python3.6 ### +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt-get update +sudo apt-get install python3.6 python3.6-dev +wget https://bootstrap.pypa.io/get-pip.py +sudo python3.6 get-pip.py +sudo ln -s /usr/bin/python3.6 /usr/local/bin/python3 +sudo ln -s /usr/local/bin/pip /usr/local/bin/pip3 + + +### Create a clean Python3.6 environment ### +sudo pip3 install virtualenv +virtualenv -p /usr/bin/python3.6 .env +source ./.env/bin/activate + + +### Install dependencies ### +pip install -r requirements.txt + + +### Install submodules (SentEval) ### +git submodule init +git submodule update + +sudo apt-get install unzip +cd SentEval/data/downstream/ +./get_transfer_data.bash diff --git a/train.py b/train.py new file mode 100644 index 0000000..f0a54f4 --- /dev/null +++ b/train.py @@ -0,0 +1,237 @@ +# coding: utf-8 + +""" +The ``train.py`` file can be used to train a model. +It requires a configuration file and a directory in +which to write the results. + +.. code-block:: bash + + $ python train.py --help + usage: train.py [-h] -s SERIALIZATION_DIR -c CONFIG_FILE_PATH [-r] + + optional arguments: + -h, --help show this help message and exit + -s SERIALIZATION_DIR, --serialization_dir SERIALIZATION_DIR + Directory in which to save the model and its logs. + -c CONFIG_FILE_PATH, --config_file_path CONFIG_FILE_PATH + Path to parameter file describing the multi-tasked + model to be trained. + -r, --recover Recover a previous training from the state in + serialization_dir. +""" + +import argparse +import itertools +import os +import json +import re +from copy import deepcopy +import torch +import logging +from typing import List, Dict, Any, Tuple +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.INFO) + +from hmtl.tasks import Task +from hmtl.training.multi_task_trainer import MultiTaskTrainer +from hmtl.common import create_and_set_iterators +from evaluate import evaluate + +from allennlp.models.model import Model +from allennlp.data import Vocabulary +from allennlp.data.iterators import DataIterator +from allennlp.commands.train import create_serialization_dir +from allennlp.common.params import Params +from allennlp.nn import RegularizerApplicator + +logger = logging.getLogger(__name__) + + +def tasks_and_vocab_from_params(params: Params, + serialization_dir: str) -> Tuple[List[Task], Vocabulary]: + ''' + Load each of the tasks in the model from the ``params`` file + and load the datasets associated with each of these task. + Create the vocavulary from ``params`` using the concatenation of the ``datasets_for_vocab_creation`` + from each of the task specific dataset. + + Parameters + ---------- + params: ``Params`` + A parameter object specifing an experiment. + serialization_dir: ``str`` + Directory in which to save the model and its logs. + Returns + ------- + task_list: ``List[Task]`` + A list containing the tasks of the model to train. + vocab: ``Vocabulary`` + The vocabulary fitted on the datasets_for_vocab_creation. + ''' + ### Instantiate the different tasks ### + task_list = [] + instances_for_vocab_creation = itertools.chain() + datasets_for_vocab_creation = {} + task_keys = [key for key in params.keys() if re.search("^task_", key)] + + for key in task_keys: + logger.info("Creating %s", key) + task_params = params.pop(key) + task_description = task_params.pop("task_description") + task_data_params = task_params.pop("data_params") + + task = Task.from_params(params = task_description) + task_list.append(task) + + task_instances_for_vocab, task_datasets_for_vocab = task.load_data_from_params(params = task_data_params) + instances_for_vocab_creation = itertools.chain(instances_for_vocab_creation, task_instances_for_vocab) + datasets_for_vocab_creation[task._name] = task_datasets_for_vocab + + + ### Create and save the vocabulary ### + for task_name, task_dataset_list in datasets_for_vocab_creation.items(): + logger.info("Creating a vocabulary using %s data from %s.", ", ".join(task_dataset_list), task_name) + + logger.info("Fitting vocabulary from dataset") + vocab = Vocabulary.from_params(params.pop("vocabulary", {}), instances_for_vocab_creation) + + vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) + logger.info("Vocabulary saved to %s", os.path.join(serialization_dir, "vocabulary")) + + return task_list, vocab + +def train_model(multi_task_trainer: MultiTaskTrainer, + recover: bool = False)-> Dict[str, Any]: + ''' + Launching the training of the multi-task model. + +    Parameters +    ---------- + multi_task_trainer: ``MultiTaskTrainer`` +        A trainer (similar to allennlp.training.trainer.Trainer) that can handle multi-task training. + recover : ``bool``, optional (default=False) + If ``True``, we will try to recover a training run from an existing serialization + directory. This is only intended for use when something actually crashed during the middle + of a run. For continuing training a model on new data, see the ``fine-tune`` command. +      +    Returns +    ------- + metrics: ``Dict[str, Any] + The different metrics summarizing the training of the model. + It includes the validation and test (if necessary) metrics. + ''' + ### Train the multi-task model ### + metrics = multi_task_trainer.train(recover = recover) + + task_list = multi_task_trainer._task_list + serialization_dir = multi_task_trainer._serialization_dir + model = multi_task_trainer._model + + ### Evaluate the model on test data if necessary ### + # This is a multi-task learning framework, the best validation metrics for one task are not necessarily + # obtained from the same epoch for all the tasks, one epoch begin equal to N forward+backward passes, + # where N is the total number of batches in all the training sets. + # We evaluate each of the best model for each task (based on the validation metrics) for all the other tasks (which have a test set). + for task in task_list: + if not task._evaluate_on_test: continue + + logger.info("Task %s will be evaluated using the best epoch weights.", task._name) + assert task._test_data is not None, "Task {} wants to be evaluated on test dataset but no there is no test data loaded.".format(task._name) + + logger.info("Loading the best epoch weights for task %s", task._name) + best_model_state_path = os.path.join(serialization_dir, "best_{}.th".format(task._name)) + best_model_state = torch.load(best_model_state_path) + best_model = model + best_model.load_state_dict(state_dict = best_model_state) + + test_metric_dict = {} + + for pair_task in task_list: + if not pair_task._evaluate_on_test: continue + + logger.info("Pair task %s is evaluated with the best model for %s", pair_task._name, task._name) + test_metric_dict[pair_task._name] = {} + test_metrics = evaluate(model = best_model, + task_name = pair_task._name, + instances = pair_task._test_data, + data_iterator = pair_task._data_iterator, + cuda_device = multi_task_trainer._cuda_device) + + for metric_name, value in test_metrics.items(): + test_metric_dict[pair_task._name][metric_name] = value + + metrics[task._name]["test"] = deepcopy(test_metric_dict) + logger.info("Finished evaluation of task %s.", task._name) + + + ### Dump validation and possibly test metrics ### + metrics_json = json.dumps(metrics, indent = 2) + with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: + metrics_file.write(metrics_json) + logger.info("Metrics: %s", metrics_json) + + return metrics + + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser() + parser.add_argument("-s", + "--serialization_dir", + required = True, + help = "Directory in which to save the model and its logs.", + type = str) + parser.add_argument("-c", + "--config_file_path", + required = True, + help = "Path to parameter file describing the multi-tasked model to be trained.", + type = str) + parser.add_argument("-r", + "--recover", + action = "store_true", + default = False, + help = "Recover a previous training from the state in serialization_dir.") + args = parser.parse_args() + + + params = Params.from_file(params_file = args.config_file_path) + serialization_dir = args.serialization_dir + create_serialization_dir(params, serialization_dir, args.recover) + + serialization_params = deepcopy(params).as_dict(quiet=True) + with open(os.path.join(serialization_dir, "config.json"), "w") as param_file: + json.dump(serialization_params, param_file, indent = 4) + + + ### Instantiate the different tasks from the param file, load datasets and create vocabulary ### + tasks, vocab = tasks_and_vocab_from_params(params = params, serialization_dir = serialization_dir) + + + ### Load the data iterators for each task ### + tasks = create_and_set_iterators(params = params, task_list = tasks, vocab = vocab) + + + ### Load Regularizations ### + regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) + + + ### Create model ### + model_params = params.pop("model") + model = Model.from_params(vocab = vocab, params = model_params, regularizer = regularizer) + + + ### Create multi-task trainer ### + multi_task_trainer_params = params.pop("multi_task_trainer") + trainer = MultiTaskTrainer.from_params(model = model, + task_list = tasks, + serialization_dir = serialization_dir, + params = multi_task_trainer_params) + + + ### Launch training ### + metrics = train_model(multi_task_trainer = trainer, + recover = args.recover) + if metrics is not None: + logging.info("Training is finished ! Let's have a drink. It's on the house !") \ No newline at end of file