From 5c0d892998f48e498c2c37263512adce26dd7e93 Mon Sep 17 00:00:00 2001 From: Hanif Yuli Abdillah P Date: Thu, 14 Sep 2023 14:19:57 +0700 Subject: [PATCH] bug fix: entity indices when ngram > 1 --- dist/lexifuzz_ner-0.0.2-py3-none-any.whl | Bin 5484 -> 0 bytes dist/lexifuzz_ner-0.0.2.tar.gz | Bin 5109 -> 0 bytes dist/lexifuzz_ner-0.0.3-py3-none-any.whl | Bin 0 -> 5557 bytes dist/lexifuzz_ner-0.0.3.tar.gz | Bin 0 -> 5185 bytes pyproject.toml | 2 +- src/lexifuzz_ner/ner.py | 13 ++++++++----- 6 files changed, 9 insertions(+), 6 deletions(-) delete mode 100644 dist/lexifuzz_ner-0.0.2-py3-none-any.whl delete mode 100644 dist/lexifuzz_ner-0.0.2.tar.gz create mode 100644 dist/lexifuzz_ner-0.0.3-py3-none-any.whl create mode 100644 dist/lexifuzz_ner-0.0.3.tar.gz diff --git a/dist/lexifuzz_ner-0.0.2-py3-none-any.whl b/dist/lexifuzz_ner-0.0.2-py3-none-any.whl deleted file mode 100644 index 706116fe258be2110dd358402a9591c98774144b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5484 zcmai&bx>Swvc?A)oZy2y1PB`39RdRk5ZqlRK(N6=uwY>bgA?4{3GN=wA9ehNdW)=)uxl|0nd=ecsRr{YB%a z4#5>OlNB6R8Mv&w#olXLkd)P@VaffC(^z(Ha+4;KbD!z0BS^}JHRDBv?+Bi|%B0j< zQ+s`V_|H%a_9A1aM^1-Qovj^~N7OOOXGej&_^IW~9P<=b)5eA%_HZ(ff)Mh0vL$eG zduQkJL3mx|rtp$lqfLaa7+<2EaqWvSQBU!wWovbDvUt%x9gbmX@yi6LejF)!b`y>M zlxv|CyO(I#HW#ypMxPxUz$y63%DDA4@J#n<2=0<2sp?z;#q^kyz0@9_S(JDO8YHd7 z%HPS!(=)&fZYsQmr`y(K3i_Nh=EN32rJjXjSCrhRUX*?Uou@EKZtzWQs?NFN3lh8E zdw+91F~zGiw0`#cP3PC>{S+6@319pXU8j+%9-layw{NML8O~-^?6?fla@jRg>%_Ba zXZ&9aaBaX{(0Lwn)JIZv6vO*>X=3%2MoBl%OiWLy_>P{V(E+=1m$Sx|5Fv^NO*@o| zG4K0uq$D`Gi&xPnC5&8?$t3b}$Tv46yoNOGW-MT9RRA6K`XlFxHFaa{bQTs#u{-+E zOE~Il{Q7e5D1z4Q@*ik-X|?|Al1}Jf;>6=@_3D?}Jt>@4Jkei-NMsn2R4IZxmEwd+ z2=q_K?~1HNRexa@bznSKZ%Gr7z{gf-yO$GlsclJbBhQWGukg$6eM@p0IQbip9`e`AQ8&0XJxZ=NNMjLx}*u=CVo_SNyKOAWo97 zv#kyigyeo#D&$P_;^nxosSKx-4TC!{F-pzI$tyuznf_XD!~=8Lq6oSDOVQmPY^y^SS<7+m`!Y$mvJWG{^8ieQ|Ag_p+1A&P1RV|IgmAMc-MqvTBzG@{oynoMt$6hrzKk6t!I& zWCeHBqplaOvquET1>ZyS6U~PSQekGbrt3`dkA!GptMLWaj0)<#KXEl%Y1$PjYI?+4 zo|*&2-EIiUxR^C^am>a<^qA-~cFVPhCwq*eC5(hHP6T{MJ^{UZBFL5G#k-Vg^JTK! z#r%~p2J8gen)&y#5`KvCK?!Et5Sen&p08o=cGoL^5_=(t6a}UnkzBiK8OM^E!*nHv zR3!QRJj(pK8uTb}ccSf#f@Yu5K6RrI?{j3X6-T~&#oDk9)&nIOn(WG|vgItM9pPE_ zdxkRLh7+gq=|tlx+K&rTr%L9&;HX(?h$OD<>!mfE6l6jkD`3YlBd9?EEV&-eR z9t)b+zAy1wrgJJl>WGTB>6CV%22dvEe9>iCb77$2e%2{f30nM$(2tFct^1xgnUkF@ ze!7RWnqzahWmtW(ZYD>YG2f95d0jgLM61D9l@#BTU@Ec!V00$p3TItdRU$OOMRjxd?|X$XOs+QAXJT1SuV>%!rt!Ng{D!ny36A^ zUd>GG&)Yw~R~p1FpPEfcWu<&lK$}|WbuYn^s4puq9U2E;F*8KC`hAM;K=p^5?3V|X zHvaq}imb+1cxJFtxwz60a7N};=!Cfy$B;`?OohU_9A$N@Rz2XbrkUiWP3vZTV5+c^ zq_=PRr2d-NbC@70lCI0MDHL0yF>Vt>>xLhogXQsVcs7KTz429zRV5IXU*S&Uy|KCW znwV!3lE-Io#COh||GU>3?EqT@OO9yd;y~)REyU~8-N$eO<=ZklmBxk=*$KS31T)5k&MsfyRXIu%1l4`K=(KSy+$UaZh3&;HQ~zBN+nL2b^X6%BNXUo^3g@CBzDIe zs`#G}1aK_x%O+-_pKq3I+yqsNt>y8y*anSpo;SBy>UgCU*E+$aAAfu!C5`Vw6Z1qc z$M_@z2`J_%%!oafzNsxnS{csWFdBWQ4|5*9lFbmOC>EMq_@d}@`MXV%e2>MfD6hV4 zO7n0vah#2I#^98Xa`rmi88ybuSkp27a3^l$iAJRMo!vE2p8&vJ3PDtG`>gu)!MgMK z$NC`7GUEJb-!J;e^l~5fb5DBbUoGBNkIvhS7R)cm4?L5wCxD|4vV!=Lozf&bs4voC z(5U{W?C5l(TzI!Jd0%Tb>Qbg!8P|ui^W@gQTCMN*9NkP$ljMAWF16iXpnR{7#JUM6 zN9`Xh{syJLuK7(p@*1{5I1ykP0nflA)aC{D1v+A|2N1|EZm1hWb}I6iUwXa`OkKW2 z9mage=uLs)po6y$pmqgO(bK*L|C+P-(NUZe5^^UnSW=3RF_O$&oy25CxhNoT9U^rg zEsTE4+kDFM(`{&s*8P6bS2>BgA8~Pq_E-5p&fg#x@t`|Y(E$LKKjZ^99}gc7KaVZc z6UGgFY46IbuBxB}(NQvcZsD52M;H`4;3K+KmaHY>$u2fcx=SPGk&DpEjArVoHU%Ot^@*+_DrqJS=lEg>7CZ$6nExRL(J#+ys$^zNB z5g0V{8Dq-s=-F?^lsQjnfz%MErx#)EQZ!ZA#d47+%$aR$}#9C>(s$=!7s;)?KhKd zmy(B_TUW0r)=H5k3Ut z5kj2DosX<|%M}lw^k5I3`~Z01a>?`%#1AnZ<%EB?!+s9rj3+BwGbHs(I<$`O?!gFY z9pE*w&6l4CqjIdaqVq;^Z#)4XjR-T5eP+bdH__f2XiLVSAXV36S}anRtJ4YR5~F$> zFG7B)pNBW)Yc7??H6Psl9c?tMhyx1bZ0ZNVx#U@*Ly37+fl=xK#%9P=FLE5ag5LKo$1DqU2486~o-n?KsRGs|7Ea&8L zKIQ{bMh`1F%Sl(k2G`#^T%z(wvk@5z#%(U84+%YLqZfN6GJoykcSS%OYzzQE=MezF z`geV-p`kOW?1?U#YeS>irS`%I&6YBJdC#lc z+aIA1+y}bOUt%xQ9)*aWkMDPgAT@4`Ud7|^=F1d$jqXbST3q=^u01Thb)t-4zO+rG-!Rc5?Sovw#E;Wvbm6uv5eXO{pFinT@0e9cSmdlJWG)&Rqt>i*<7XYf z1(R97Qzjx7XO(u8hmbuX*S}QVsFk-`Dv6b^K)Kc1f9EvJo+AG#Mfl>=EyIp9u`ox5^h#NI^nHm%4ke|M|4#5ml=HJaQ zbOlLE=SZ2WUK)crQCnO|cI>owX6qn!9LJj1$wxX)5&EP+woELk%GAQUOkq=(Zoxa0 zh#lMgPEnitLCTfMF`c$(#!j8d3)P)K#h;iIbLQ>s$46yTEMsA4xpG+U1;>!Kf%1f8 zQd-J`-XNGXN1plo5=c-SJkj`4(0pcl8yo#L1oyRH&^#N+4~#U*ClODKqI@yvwR@4K z@~sMdrVzL5u%3ulDl9@BW5I*KU28L)>fkjCa&jtttv|@sU6uB@ImM-cZ0E7Effix~ z)o8YT&~q8`&h`DPmXN^K_5KB2I!ta6$Y`(B{o;+dyzsWSdXbJojEbvDo#^M>kEkA% zDw2Cz4u_vYgn)XBekQD4A2NrVw(w_)NyH+~BPF%vf+;w;`_k-dH@8dAfA@VFOyWxd z^H&Ax&o(*&<0=^xA1^eX;<`mfH5lR`0289}LRflsT?7v0SnJci`hM0LAWm+4s zQereIa=a6X~EyWo>EVg&|W)pOWRd zQd%PtgGr~oWrfdE60OQLrryspeta|#{RVd1BGkzN+Bc=8F_Q`m!jxkLnNOBYCCX`* zV#~-ORum(Gst^MuN<{+Y5)#97<^)sVdlb(%6FKU%!#zqy+vWP`Z6>}pIKDmpyb<MbM=+xy+zk8wi3sP{fS2N8Z|LnPJ2dFu3z1nNr)Fy?7=0?>vzGV-NsyW! zGa~Y5vo{30Jj>s^AAg@SEJ__DGcSC86DC-uNv^@mkAg2`GS^|1{G^L2U2<#6aIw42 zRXN}9t8o&%E_61o6U1g+rQ}pM&D?jLx^sY+1#cW9479i)^*%wV;%EaI<0`&WIhtZ) z%#p`T&!nj5g)Xib4`*Kgu%z4*CwJzSbD5D|6+zw7hjvRp{LTa>5E6;iqdSZ2vu=%w zGh7wewx{hCPjSU{6oq_zlxh+?q6$P;h-N06l4e#{54}l|-Ft9bpKBMU^Yp5ruR?g$ z0Mu-pU?EnJ1ugMzAQP4=i#}8S(w^ioDQtvVveQXEc;0i_JUVuJ(m(6^dj6Ep`~1A! zRi}=61lCM_us`75KtFct*eud>D~Ftb?5JYlzWWt?Pq0mbdP^{V-RU5A_zM(wacIB~ z`j*l-F^aR&AzMCyd1){#c+-_6ZMKJ6#DmN($ntM1jqT|0pQJdX_nJ3kz~}jm`PW?n zEz?WTd)Og{55e?zyTm|6Nl6`lR1PPYfe<*|S#pF*q7>bOtJwawv~jQmJuOO`d{z1j zGK-gRFSwXte}DRyJ%&O6$m^WMwh9)VKQre>GSa|e9L*Fl_O%jdf1x?%#R9_ctKR-= zQ7Vp|Y>VtD0KnWsF#laq&y^H3pDTusYrdD41ptoxbATh=W&Lz~HiTrwrDKwIlE$j4 zqb1)u&&>Uy1l>PttYi6+5h3&Y;>8V>W7I|Rr<;pR$+`z>D1^WX_ZQUKqdMO+t0^C>CqZ0Ea@94QMU=MuHW{H5Q%*kfbG-Q*fCu*5`8n8I69Em2V3B8s!B!VOtq z;kn(Uhi5XXDEqQvX2?`)?Au3P`Q7LBH>DE$hVfpzMhY=YT7FSP>5b)!PmJuQF=5-z zb?8%>mC*bBx|q0&asiHR`uBtls_m5HeI1)je(=N9Z0fS+gLOL0S^|}Wawd_iK!k!V zkHWBf0lk(Q3Mwhuf4AZuZu_6lLC8Oj|7OhnN%_;5`xgTMI0zB_lkzW{?oYs<()>RF zp9jD1|CaIJg!-SvKSk2NiG?9DxPL?Zw`BSg`RASpWb4 diff --git a/dist/lexifuzz_ner-0.0.2.tar.gz b/dist/lexifuzz_ner-0.0.2.tar.gz deleted file mode 100644 index 853e380f2c388845a17e229bb746b46fc683e946..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5109 zcmVM=X=ZhLdS7m3axE|}FfKAKbYXG;?Hp@w+c>hH4dg%Y zT;O82w&G_J8=GR?#A%F8?7&X9R|J8VmS~$BS<>l|#4YmQZ-%5^j@=}?ZEo*YMA697 za5yuZ8O@L*YCGD_&*${+gi(XJuYO3Y$hPTkrBbiQ*Lc5Lsa315$la?KXo2Tb7lwcJ z_iVL2vhewW9aMK)jYg}wS8r+6My;`5&%Mgp|9>;t9iIkYzKkvAmMdCCtL>cj4!iwP z_xsxaUbDH${=wF(ud0p4-rnABt%|;&TB}tWuSg}c{~zAYdKcuB>&)`lR&FbIXgf=n z&*nZU=tWYi)avAfT6{`A2PP+-iNQ^i&dE8#o-=pBJ&)TK;U1YYmra&r=2FXNMu|*a z#)v&7`kcBmRwBMlsI?>xb3N!_PkhQPZp{cKI*hrM15kescszUR-%^(Wv_Yun**d3y zkQlZeESTj}9|xRrlX;}z&l$;&RF8a7jAAfqZsoW|@L~9j+;V?z2R?C`=et}-BqhQv z-3$z*B7AJ}1y@60cOeRID+g!;512qwOJrdie2RaW;LHgoCimtgVsOMg34FNe;Y}gd z5^}O*yToHA5C$kXkS^Gb=@URmy@Nvam6$xdb33;eX)8NFDs~&lCYMNxil+<8k zkTFfNy8k*Kc1I&J7;fcyXXmHAF5K+(4^KZH_4*&k zd+697fM)kVzX9vTfZ&Ju=kK4Lu{3`vKacZL_e z!;hz(AvymzJRgj@K)?~;?f3e}Ll~xe*6m+tFf807-CyAc8J%=aPjRfRT<0S&Fhml` z;o$u9u=nBQf}9LakGgR4eHUo!yg%*Av4F3`(@yWKM2qr;1+@2B3VTOyrdZ-g>A9uCfcN0cda83-)UvEP+w zP~IdpEdYYQKaRQ)Msn2coC2~Db{1TQ&|3B?{hw^wUn2f*JdFQqS^WPZ@&A4z{%=-V zje1Sn+uz^Yuhy~z;O`m#54)YCv#z!G3Ws4cZY4#rwfSfx)8+qOEP4-J+nmYNZwNq9>B$sjGw8y6!9WvIS; z-@L*2?jM)E1)Z_0!rb>Auf4M~g8)02Xu7@FnPWmhC&o^wdu7nQor!5rb{5q0;Tpnz z=6msTxUJj1=2){LB7coUcP0?8<6ueZF%hAgIVb=~kvLSp2HGG|@c7J<5i~a-wSb^r zFvRYBmqHwE2|W0~$M|;YLLeSk5aS2BAGp{NAc;YJs*zK6!(2KGGn&POTQZG+=+}tk zRh|eL;viEYwS|%zpwj0S|98N=5=n7nPp6Oz5YSSemF;O6Lg_MfeL{6T04kRlg}ai+ zFNkP`Xn1-+WC;t%xd8uyULz!=85B}C>xP;EotO;dV4ILNxNr~tUr73uFhsrt zEWwy2F{w2RP=tX6K?6qom_(@@24zAmYAzvb@U&>YhV0(~cNwN|9!^`CoG&5gvPfOq z!_?sf(h%n2{r>62FZdVI6qG`Q;TN_f#|%kep476c2Df8vEK3bd%=nu(?X)xlpPT#( z^W-EsE}KJ&#vv1m=LlykEyNOBAvCC^oZnke7XpdyOk}u`ti3>=L&_TFxQGP$+U^V# z{)><%{4{Mnyadfb0F>IohQtUs2K`M?0>2=2^SKqk{FzG6Mabyo{;h6RkL}T1gOJ@&Ml$tZ!1v6fFDT`TAIIsgj zhYoWgbHAly&s^%vxeltsT-SC{2O+t1ARz|CLPkPMrC_i(AXSO$4Pu77N;;BnkGS>( zJh#O>Z`A-IN-(EKv`cAUcYXlly9}h|h3g1XKkF!)LqKOQ)I2@V8;Ys4hl>EN;GR*B z!-VLn6H|V}k@+(VXCX0{(8&wHV{)k#KwoSoisdQ#!3pTCkCSnY zMhzBbP{&tR8)_tVT`JTU$dvLA&F`ZJ6O31{VfHPQLl68IFeL}Q;goz34qDjMp=nzT z%*e4l?%VEiD@UOH_S21)iM|NJL$xC65+dJruE zj{Kw;`6vwZoI8S#wS#zbd6~qS9Pa^oaKB*wOkh-UR1l7mjCe6511SyZ<_lZ_AyaM{ zW2p`}83>=yiUaah-ov&Hp!|j#Fh66*wGDX9Y?C&*-1h05f@*D-$o3toFUPByZ5sG@ z!rb4%6jqc>=@L{XTRT_dYINOD|2jHLv_CDN)C)daZQ@(W?VVnXi1B8SWi z%vfs{hP+IQE9|=sOvblYpkm<~0Jq(2(pp8|os=g=gO(e0#>)Hq&3bv#+O1N(S>My? z{&s0icgYZ<09mYs01IZ|Atj8E{yHd#WLyRy)kYE_x85+F3ieL;~ZH%{!^{6fZ*VWgjXwp5olw)3feadv)KOikK z(8xHl^`9*N`(f)pt!909Pixkyd%LyWZ2jl&x&GrU!TaDwk_NG@`Qz7rsx`P4>p!)< z2K)j0Z&dcO{ofa9my>{-M%i0>K3hOMsPYet<1X`#&3s6!@xA@-pbE-S!H`k*m_XJD zFJ}xFC76a_+JZvr3qPpC^BBop;-Z2kA)qfNi`XR@aRb~9q*WNd!xN!{aEk{195O{g z$J#Au@>7^=)apOaAeS;R0eOcM8qCvO?)VC&T4)IKTwLKWm=6k`N*8g*7~|hbxXeY9 zfqJ}Tx&yUhUT7&B3TgIu2Ot4Nwff6Px)F_`qQ<6`YR59Ls)?*BH!! z`2+S5^lEjkwD7SGvVo@$Ie|161tRCHhPnaMYx>uCqpt01)%+TqH{}()nFr4H|FZb+ zFB1PXTD4ZQrnRcAR<)V!|NTwlKhM>7lDBupV{UPOJl34$kBI-owW|MXH0rqjx4T=b zX7OL<|Fijj@%+Ep()L^X%~o|k^Z%Lu7yci2H2$FLe}eyq%pdGO%KsZ^|IOXTZsz}A zqN!afmu=<5HddI`g}YZ`1Ag5T$U1fAd6?s=JDB__SiID5uRVmeFunDXPksBES-5km zAZ~5f7>73%U=VO4e2&)*9L-=;GGo5j%N|9!ctLbM=-U=c?l-9$kXVl~3%14YVq4|L zd9`^Zw^;*A^upa!Pi*3<0t906iL177ac@gJ!;N?5`SI?!S{7ImvCV5SJxN5wNfu5kJHuYPsXBc)71_2QG5O7Xi+U%e=X5%EA(t`GJc= zN@y+-$RF>BuRyhM&u;{nbk>gcau>|t!9wii`jne?nerVmdO2jZ94?uZxmBiRxd$$9 zF3X8pmwFWpdEcl_b}K9)aC!Nf9L|}3Ef!-aE@3#h{Vx`0U>r{ZVI2@mJ@>>$t)(+L z%O&FZZm~r27x>E~I<>Ga9c~tiSZ)APv&?0F6OQC=Bwiz?L`jpRfPj<&kSO5A^%bt_ z-BFZ@RUw!)x?n!|$Bm;!s)wH&h_QN>&t}Yhd>}QyWzqfNSZebb1bEGW6{%U+HzrWy zK3jN&P~oNEyX|&>RLb+1N9=4k9gJC0OKO5Yq++ zUTuRPO4%i>D>e}QUBxxt{PE%fI=*r*f-2IsiU-&W7(L&?qpq5=;1De z?^1kAaZ2ARzM0)9~a@Qv1yGymzHV-7*Hyhbl@JLhO+KntiK2=@jJTfC=0 zq)b#|sE1>93khR>AsXt?W$4W*vDj_&?t4VfWDS+9Ym3zj@d1*g2rfn43c4IsW(C4q z5maQzqL={sNXSc7W^i?>stm3|IJuSa01C?wu3{GyzO~d#RaHuOk*GqG5DYplyWz5M zFO-sk3()}@YMKQ}<2$4wlBZzKAU{b4qT9%H>P{)ui(9J`Q%t$Eq#!jg4d^Q?aF!`Q zE8Y^K)=;pyfqKC~dNhq1MnUo;2rEVkkwBN)aY3s|djN!A)oz%YB_3k;HX;;+r>DRBhn2i)c@7BrOSuLzFR! z5DEM~z)*ON%Gc8(S&0kyr-7`#MWZnBqde&e0LT?k(TEZ0s3fMW6%5BlVc_VGIl0)6 zes4#oipatNE@e%QFf=`IUARA%-uQr29@=r-EuMX)!jYg8hN>)3nX#1fSQ-dO8@Hk^ z^QmWOE%M1UfA#h2*T~|*&hPK<6Y=SB80t%ERUjS{p+q6y*z%sDQw$&SC?*5bhCUluQ)wCN+%T4&K=vEToEisuD4_AzNQdXFr2dqpL9!N%d$XgiBSvZ%r zF5&U>&?!u`^a&CL#2qY@Z+dQH;@*<#YShmk2otPL*u_Svu88L!De)v;IVo?TunTvb zFv3J^svyUaW);#QAWq+9J2CPV@bf^=-ljXne7#-OySHo5N1|$9#ZQazrLw3MS`%Il z0YWdgT7CJPlz4;tiweUxR)nVt!jt};k{QQ>@W>^J05nmBA9;YmJJc%~#i@Eo z9;Gp=uwQDcsH9f)<~Ur(kl$m&+aD4N`MozfT@<>DF?JaKBjXdS=3m%1;&qUSX=7sW zOL_TT3Nt&PWX1u#l+&vIKlXBvRl844vR^XAeyIdoS{Ifek5#RRsDAi*zwW_Gs3PN2 zU&>9r0X)a#B0k1^_--uM!_cVdUEvb{LzgYJK7=g_H!SZV#mXjC*b77RKrRL3~|I1ma*mYlfD=Vd^~- z#Rc-EDU+5cI0k}1Lb{J%>v%HuOGqrnvnfbe;b&c-@My5GF<^K+Xh;oTB<1+k{lf!@ zCkGL7WoEMlhKbFih4OiB$-@C z>)_Xe3t0g88fGLqoCd*DG_j=(pdcXtj0F4;YIDVX@d-XqEmM38YIVut?qPOb+~9^+ z7T#7NHV`Ni5oKcSE z&ws1cYFYj7i?lQ5Q(Pn||0+vx+oYydbNB^yJM~#yE_yDMBsU5c3wW_cC92=OxHi`j z1;Fj~=TE7pWrlUXZL+QsI5!Nda3zUtA}UvNhfxJ?n>?+SHr3_vdfF8D6DoV33%|m{ zQ+n(_yGQHUJ*t)Gcd5pcbOd2%TWk%J!oh(kATF*x4Y!~HK$aXAb4PKhclpqUz~A!A z7m%H_g}&|J+L>YBTDja0IOrhvpK*{|cf!Gj0}j;r2J!I*xpjvd z3a?zm%PdL+H-H`_2_q2cFK!XUy%5erM)Q=rz5CLyK z%3$>*gZLnWZ=PeYf?1tn5FKI=pJDK*~(`oy0*vEb1I3!Zdd!MBeq_#>wkJdMYvomKFZqY9pXQo%P5 zDtP`m1?feT#3==T_K=^~Qm{VZ|Sy6S?vX-^1Wi4x2%Uag5mbI*9Eo)iJTGq0b XwX9_=Ygx1f-<9yM&>U zZb>QmIOnYMo$vKqXT8s}_H+NWe`~LM?fvX)T?hp{d|ChiKztM6n*acC|M>*~7;etj zW^YjD&hG98wq}kH0|S&T3TQ^y9J5)kaf9Gw0sv_K%)80GNvYS;j+_>y z@}i8oZmOY7D5<15l`0Je((>UhXAnivx(Sf^Cs=;edv^opmMi+$ zBBw2EMeu8xnr_;q)ctX!la|xo#_^#AnOAIg0LvV64vu>K<8~$vvRA-f+?*1rUEzJ> zN@I_F$12yjpe%ue_!;vT`4oLo54g?C;6UUKqlGf}UAiZrTfs&@6X?SQfH4>PY>@V{ zg3sSvARKS)>7IJC8*M9NISQG=k=8qq=&Sd`K2aQ1hvZsEC~VYF3ho@-`O z*!im8EuI?2Vs#tesD|i@uHe3OH~Pj~Adv`3K5`Rq=8uaW8>eknoosj`1WR z2Vsnbql_QxTE$ugizk&AjAH~n3Yx78#FhAeT54n~LX{gkMZVzRK|~f*7l1vpJ-W*b zFtnLRDs=44{m(<8p(Dnd{R?%m$8y>ZROcwDWP_7y9V9e6^eX5q$Cdo6#+85C^0cgNPP zwmT)D&@4V{Kbd#$4g02{3|J6Io7Zt`E!>%TZ;Q(;?iEIw5PcVcx!r4!CnG5`tdYT)>{|he@%zj zG#@?zdn6m96oV;h8M+Z7p7_ttja;NHYYK=ydE;?TI*B(cQO+L0E&JmOc<+@~*yC}f zytTS$2KtZ>lKb(}!VOy#`2?vxJRIFl-|{KQsdGtTfj5y7xf)8Sd*D}73g0PKtx1Ki z%b$fybBA7@DlIOB%BnU!Q-A?vu&T1}RGk%Lq~i8j%(8?K=jv?Vm(~*rA6Z;Hc$eJol?M?W@D*hr$bKJFt)_DE|=QTA1nf;=OT*gp)c2Y31lCh@OV7$E zO*|VV`a%@+{CJSW;~@$3FNrL!nJceFU99t1rCSrNM6spc`NV|7)z@0`TjIw)!7my! zy_ekET8}yMw#q{A&}JuE#uR6P2VTKiEHZVr-Fq`bmM zSs2u5F6Kfpz-`3fN{VUl3#w$LZWjYRu1>Z0r=pF%oE&Jf@GRJhtnIg=__g|K*pdlS z5T!ZRZWiUS9wy$=bUm{oWNxj;`0lf`&Lz(bZ~&X>?r-7QY#P3wK$sZN%&sg}tN5d1 zY)9qeTiSzi0{ z%)5(!EZbO`hZh8_v(HtG*XC^M>0OQy={zmgd+%Rw3#$C_yTCI-aM98CfRS3M-)JxO zohOc7l5lz3Dg!Mpr5PyccL!qc`%kUjHMLlQ5{u8OY$B3aezs=Jr^Stqj9jUn)t%$3 zXQI@=6Q-|pLPU2nR+=22mt;1P&MgLzmaXGKk+K2DJB$zQ&4w0g)?1z$ z3_stkt@277ra{bMCyY9uwpQ-Sp46kZOe}8&mV15QAM$RWmsyRq4jS0ykVx-*9T0qu z%eI|6MV!+(80Dfxw{_+yY5iE1v7V4RO{V zRcUg%?p`r@**XY0T{B>k4e6$O zr%jdXgMwD6QyV3{{j*t=h=SY%nj(rA18ai(%z;l+!ks!I4DTGdk93NOD;=*p-1^O%Eg#@u!P8XK_p3xvWSPR<1%kwz#d%0WPon zFz9|%!QDJShHduW6Zzcx*!ZeCue^rEFP_miY42$@)BN;QYj#SSqBJM?eJSH=f^JUs5CbN50fy3NxWc4mzA! zM>=E1^^q>7|J>4hi)(OaChOSpbk%Rb{_M$a{mQvJ!$RJkjvQL5tQcZr3=vv5C<4TN^~jG*fBAttOt7fmTt29h;*FEmg3fPx`j&#P*afwcVph zA(_u|oO{+KkFm2I^r~HwD+hAq1=G_oh3WUt_R1gc>~vHW+zUuK+CVP}W#!X$!^pVE z!*X7_e|VD<#95)KFaH7_n<$oQyl4s!rjL8j#t_cViH5%&3P^tNlK=BTY=;r1rBa0z zgTy;8?4e`s$nSBA9^N=YaRM_AB+c${ER|PXO-JpO)2Is%EDyr9H!^G5C9m8|X1%7x zdoWH`>6}(+c`{bq`SZD7<|8&rc#9abpl`3=zLoT-i#e!>L*m=d-yP`99LOu<6v}$)=Km7okFM=OQN3B~d#**cUiro%3;Ew<<)aVY@ zLG-+>t@GX@+j$#{KHRNJuqmfoyDO)Oh?vFI9;t12no$n3OLVXH_4>BhI{A`?h)+zp zr}^nM_VSUG=&Flij=FR>%nnv6KAzrz>-ZV=WCLNbJL)S!pn-MM;c6&L?y6iNAO1um z5qJ{xL>218z{A(_!MtR7HSgqC%V>L?Kpb4K$O}1AVM!3pNLHfP<+f;MN=UtWhh$5h zQy#<5T_^cE{AU92sU?$2=Op7N$2g`*gDfww+?{-c42L1_0le)SKIL6X1vu&wviUpL zbfFA`ZQ^t6u}CU1vww5ZMM0d=Wfey$R?aj&iEqO;KUSr@Rg1cFpmw$=n)09Pdd7>;$FJc$LfiZz z3B92p|5q!;7@i38`M^?rX)aS=J;(iIV@)nbk{4QD6j9a0j^WHw%T>NIZ~K)5YXc0$ zeYzin#oMG~UMD_kuEKZGsA-uzAkpB;#ZSq!DS6KLi@ga(d%e3KWme=;%Z4*{`aUmc zU|XoxiKZ*FuOGiH+a!bL43@~w%5=GGiqR?U#*2quW`yF$+ABt+#nmnKC~^&jmQQGb zQXPxq+#*_hGuZl*Km&5BlD8&1bs1fAXU6iFwP`d5tT$$)#|IyOll!9UqV{pKLsNVn zZofjMof7|U`F?G!JX;Ie0pUghHhbbWldQ|8A`ca8m%Ob7+r!y z94Ej2G)3mFCRy#(0wjKo&KncbH%%UpK=Y@O#%+>;`D&`?`KlIHv=bq0dga<*4Ib~- zw%R-VB=!HU*!=JWL^aMG5ug67MbsR^ydtMCnJcX9X2MP0G9StMtc5#4lncD@&AKk! z$BpEk{n>K-hL|k2Ezr@zP%MzcZ z9v-NS8=jlnb)nsB+z2ssy|7_FNh<1`5+N%F;HHxLh;V@nm~X9nu@l>IBH1&iH=g{Y zMR(tRQg1EgGvPF&9~!wj;E{51F>xs1a&l5@r(Sxy8(npKXS>azoVEALvP!J!3cA;? zye3Z!3_JF@rd*}Ly`t>9sJDRb9)qGzcQpk;^9dFGJ*2aB&kOpA&NRD(FB{?{^)_&G z`1ew?Qaoz%BJ1ir1{ii(ohP)|$oYQ}{dJk3@-<>KZkCAFO|bvnGSP&|$|{ogKuLYr zs0fDYbN6v+WJ8;PGPS{Z745nBAKt0b&q;pYOMy^r_Z9SH-}kBkDwPRvf_pQk|afEalngjn91ZfB{TTr4`Ra~fGijXKxC8qSl~ zWc8i+6DSd8rP=8}5$&A!R5+~5!UzV;6Jdzg94Nm4FWoqd4t97V?yH#)InH*9K}DKA zn>+d-Z#>vxYrTLMFZeKOE6zQ}o9|s@tnH%M#M4-7dbVtNZ(f`bL|nVH{xz;!amUh5V0}es~2bUJ_zk7N&*Zt3D$M+w{ ze{=Z$r2Ogd{fhwr?D&fRN%@!G_b1>_asD4b!;NwHf6Mr9QvFZjpEBv+#BAR{;NKAc zEt>vB{y8lFL5@=Y4f3C(^9S$$_4)tua4c{A4ewuFk5C}E5rO~!;mzBBGb|(ja908U E15)(cCjbBd literal 0 HcmV?d00001 diff --git a/dist/lexifuzz_ner-0.0.3.tar.gz b/dist/lexifuzz_ner-0.0.3.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f91da0631c96acf27eb2f337ffd90b664977631 GIT binary patch literal 5185 zcmV-H6u#>piwFn+00002|7>M=X=ZhLdS7m3axE|}FfKDLbYXG;?L2F5+qkm*Y#{%E z#|2L8)>bS(@-Q}w<0ejHY+?say1hjZcxj2YxsfFuJ(9RZ{`;LFsh49n$!?q5yA@G1 zGC3U142Po`azt%Q+xpKLy*p;qWZtXqQfjiS`&+9u8p$)(>$SSBzan?9UZ8}2Ks^}# z)z4Y#J7ga4IosE__nOT;eW$Uf>CJj`w^4YNmw$e-*d3pQpFfXm=2dH2O>1nO^bWfH z^X|7@|2wVLy7dohU4NxFn>#x@+jSjnfnKlITCYegU;p2|oc1or2{)MSvyH+=;lObh z9-qzvQZmYltqX3}64Dk5QB)Fv>189>_-**g7 z0U+oDKqM8y zZNmypq#|mz_?)XDu)7e2zfl0Rp$|+TsTDGJOg_QCOmOCgV~hK<3Nbn29)|%``dBH% zT0u^>9FO?S0>S_V2hs(*34H_6L z-o^3Y!vz6|VW)rbi42ZNr~irk*6SZuNcVrvhTZdXG8k?YdZ%Y6y)IPt`UfW;4txFg zmXU|@(O zl7qq7r(y5?@dY^^oE&zc@?95b>%2SZ%CUg2gOg70v_cL$r=9m*(Q5!`fIWnh1d@-( zU97_qJMiz|qBrOxCkKQ6#Sng0fYISa-1lSeyjvliVecGeax@&A0*@$D=rRykpku!) z(V)CZW?BFQe}6dd#u&+Aw{rr>&at!LGJ@9fSLy%DqWuv5fAgXLU(fyj7xDjhQ~rOe zw%2UbwVmDFon5`22LL~(|3B<@4o|z<+o{1NdJIJ9dJakeQYJ1nu7NfP6ns9lr3cMT2ra;= z7Ywnxz@y+t+X4?Z1nA#RJaEKA8+`oG2tp4#0wghMKs9p0ZkR`>QA9Hzb6bWH;QgAh zysBd%LmXrxq&8Pl160P$=Kl?uUm+QeoXG@Y0RmbYu&Ohuf-7C6UO=c}gh1s2y>L(R z_!$u`5e=<3)ZZHgRjN&HDvz{xyLYs^Ksg$-#4czu{j9Q&0*qhTqtN95Ez;`BKYt4XP7utV#_{&G?%)?W{DzfLr`C z^W`MD9-Bdk#vu|)<_KpjE5r&sAvCC^oZnke7XpFqR7AM3to_iKLC6}#xQGP$I^Gl% z{I?fxrkj2K`M@0>2=13%DJ^{8%V&#agwg@bh< z=rCX|WbC(aovBCN88<+6nCCej>L3J{E(FAYSjb3dsT2(M2Ba!*y+O=StE40O_KD|= z!E)Qo_m>wyObN#Hh;|{@*If|8_#Ojk`O!Xt)XxUW<^a$+b2U#7^oC+6?c*kZC%9+S z=P)6L>co`YaAo|=qFG4IC3NybSaQI%!6)2M%Mn?pu4{3r6+mCIOccu#w1Z>N+W;ry z8W%OJFq680TD6fzQqQA8eSu6V|7iJr@L_`S={3y0jdJLL9RsH1pf{Y7_rgGnHFaP) zHiKp4IzA5^Z?RDzQ2zMi5BUJ5oEQS+sm5IcfD{tY#q{t?=na#k`;veEo4DMCSr$Er zmI6n9GK_o>3-pY;f{&Gh_%r#K#+e@P0eUdMu>6_8sN|?1TqPOtVn{nu8q&?@xC25a z+%`v29dI%bKBX1=SF^ z_;<{_Kfx4T)6J_&3fCCylPS6v#|AA{NO4T$Us)%2PS5?e6J2CT&%j+0v`c=6GD)8?(J?t3HAHNqxInqmOVZXZ%rSSE%jRINz&3 zjq!RtVSM*K+a>igqxGn7f~03?@a8gt5p;FX-!fODx&&NTU!I~#_vBKJdG+Nf({cZR zw0MEWi{l@;|C68p{t)NC+dHk@y_VM8Y3Q|`eEa7o-T!eHV14i)Ndw>3`u_VrdL5p{ z{!go+Z*T9w`fuvBM!x^^66JCna?7mx3qN3U@CQ}=fqvX&(REk=VKu(Df8EzXIcn%K z${u5g8sX)P?xF2z}uPb$Jmzxl7zs&?E%(#b6P;q&;qcyMeF@{dZ^)ItW!X z3}z513OZIEL6cviSfk$f&lF-Q3j>f}kwTOChR5ANp;Q|UQJjlA93~4u!873^o*1M5 zJC3%wXxdRvwrqSp=N1I3!J>%bqNe01-Zd>ILae%SWN7|%zoG3wlVRa5!h|!)ntZVr z&pZr~=OX4ki8fc-?P!Yxo*G5vr4-d^Jys6Rd!*ySYP|u9;dkmVKL#JT%%g(Sb1}!V zAH+2#b7B5~eFVK)o+~YCHb6Ge`j8U{b5S62&g!TeFuhiAjTH@TSJR6taNbmx@a7gc zKmW_m|9+73zvfk&r5 zj`&?}tJt`x4zJ`fYiNsJczWuKLtK@BKrBA?)DbS8ZHYEKcxQf)oQ|t)fdvuA9C!*# ziJ(cONdpc5hDuDf5a+Tu?nj2WY>|q9iQS0!As&&-ogBr>b8RQ|kSjh9ab`T`&m2o8 zmUt`(JseU(bBRFy_>}|-REy61&H>8kFUwWj2YKrGlt?8hKtAlVsi$@@g)%E0m0OB zUmVoh29vW~A%5VME2MaVzkFg)8}rhkvQ)-&1CW|+EsE=Kq)#L988M|&nj{4Tq!fZg z0Wa>aaFy?lqD<`y!K~3a3&1|E9W7Qp{MZkVN}bgByqnbB_Zf?IWj>r7|3EH-{J;{w1in; zv2j66Ym7@E5;!tji`W&@hPds9fuLVWOJsWZsdH{J26phX3OR@KWUmr_aZi~H)B@%k zOeAFyFsq<4l9XN`?TyOa5$QUaY?-uW6UapFlOK9&MDg(nu^r;vFwdf{(o~KqVahK)91+mNTrg^hRHdb~q-(}pg3Iz6t-$S!<0=DMKm|d# zH@L9HI|@X~L>7i>9H}ZKj0L55r~{9oC8xw@xAA-L5j~UnQ!=M5Q7^;?NYWy>6gews za#V^H2yaPH@j@1f1kgu9UaAy>t4oz(a23JHV~ht-n0j!Pn4sv5rCz8iOTv#u6)g$D zpzE<4E|c~mDJi%J9iX9>RgyIRiZsOX6wI0A7s)_ejZ9~1Dw$r~UY?k8#-ybMsln2K zzB2n}k+HMlEg|v@1)FQA7aU|q)2L~dBtL?%a;y+3beYo@Ts2t>fbh%O4O5dQJwkeh z=|d0A2{tOzEq_XD9c6Tk@QA0B(x=29Tw}1g$oezo_lpuqlEWvyck@KX22Q*1^yDGZ zf`B+g3Zrn3!0rPKh1aNjJ*dPF(w_A#FBY}(by;q z9Q`pPm)Oy7ZRk`2Sv0_<%*PQ6%?LdY>PONV?~~fYb)0ldW?!jrEa=oiRVz>}V=3p6 zTp%EAJcha~X4-JI$d*a*>dTie@rs9aet&WEo3L4NRT|(ev6!6+Q1My^TUTM8UWqDDo?4>U2JGEX0S-G_s&*$!AFn{J>8Y{&Ct_m6-zR40mcd zkzPqJPUS-KIE+zA{Zd;+7PZPX$Kg7r{1zMD{t!^eZ@tmzqR>6`u_N~%86RUN|J=C| zuY;6Nn-GIv$;m^w!<8#ntyIfvvVu{X)=D<5KCOjGxWAt~ms5^7ET0ua zOXo22o{9Ye`O=g@OY9s2K_DR`z%O-t>HDQ5mf%_EBrNf>%2AMHeAQh*fHh9T=C`fxW;lIeuVp~{eu}KcrV?}~} zQThb{m{qeCkWp?wB3)V)msS00RTt5?3Vt=XkQsolU`C=tbrL+q3R+nM3K9aqcp@G` zt^abM`~-Whwp_jjwY>du_i(47yylg%w28P3v4%hyR4LQ39whY{0|IA5;59=DmvXr+ zKPCOC&VSE-dtdGKj|Sf~|6{w^T$lf`qt}y3i2`;O*7-kEzxw!@Tb{S(OQ17)DvxlEgL< znahQPI0L&)o|a3U>GF6!bq4$incdHYU*h2@J$9enWAE8L^xE^g=*c7*JsR5+4 zzb_KV%ge1%3IYJgl=E`oFiG{V9yk#E*+KOJBGk6fw=LX1HJw|#Q1}iPT@=3GH5Y}< zB^UV}7eDkJ7llus#3+`ov&f)+>@tfOc>Prt%Qsmh7g>Du9*ZT+>K2Rm5{u*xi)US7 zQCM+-#W%da;+fZ1$dBU-fBo)?XJ1{h>gI}A(#f?I$*mRg(u&o0R%G{e{@#rhk6u{u zr28tqeqF_1xvk=9JU;ENilRQ#}aR1^|U#VW`* vRq~RTyyPV>dC5y&@{*Ul"] description = "Python package for detecting entities in text based on a dictionary and fuzzy similarity" readme = "README.md" diff --git a/src/lexifuzz_ner/ner.py b/src/lexifuzz_ner/ner.py index b9337f9..60296c2 100644 --- a/src/lexifuzz_ner/ner.py +++ b/src/lexifuzz_ner/ner.py @@ -19,7 +19,7 @@ def getFuzzySimilarity(token=None, dictionary=None, min_ratio=None): assert isinstance(token, str), "Tokens can be str() type only" assert isinstance(dictionary, dict), "Dictionary format should be provided in the dictionary parameter." assert isinstance(min_ratio, int), "Integer format should be provided in the minimum-ratio parameter." - + for key, values in dictionary.items(): # Using the process option of FuzzyWuzzy, we can search through the entire dictionary for the best match match = process.extractOne(token, values, scorer = fuzz.ratio) @@ -122,12 +122,15 @@ def find_entity(text=None, dictionary=None, min_ratio=None): if not similarity_score == None: # Find the start and end indices correctly using current_index start_index = text.find(compared_text, current_index) - if start_index == -1: - start_index = 0 + # if start_index == -1: + # start_index = 0 end_index = start_index + len(compared_text) - 1 - # Update current_index to start searching for the next occurrence after the current one - current_index = end_index + 1 + # Update current_index to start searching for the next occurrence after the current one. For ngram > 1, current_index will be back to the second index of the ngram value + if n == 1: + current_index = end_index + 1 + else: + current_index = end_index - (len(compared_text.split(' ', 1)[1]) + 1) result_detection['entities'].append( {