Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apertium/apertium-nob
Browse files Browse the repository at this point in the history
  • Loading branch information
hal45 committed Sep 26, 2024
2 parents b316d90 + 1474c0b commit 5b3f602
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 178 deletions.
1 change: 1 addition & 0 deletions apertium-nob.nob.acx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<char value="">
<equiv-char value="&#x2019;"/>
<equiv-char value="&#x2BC;"/>
<equiv-char value="&#8216;"/>
<equiv-char value="&#8217;"/>
<equiv-char value="&#39;"/>
</char>
Expand Down
58 changes: 28 additions & 30 deletions apertium-nob.nob.dix
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Generated semi-automatically from the Norsk ordbank.
http://www.edd.uio.no/prosjekt/ordbanken/

Grep for "todo" if you're looking for work.
Grep for "todo" if youre looking for work.

Some of the stuff in this file that is still very much work-in-progress
or barely begun:
Expand Down Expand Up @@ -136,7 +136,7 @@
<pardefs>
<!-- Compounding pardefs. -->
<!-- The cmp tag may be added by transfer to sg.ind forms. Even nouns
that can't be analysed in compounds, can be generated in
that cant be analysed in compounds, can be generated in
compounds, and need to be able to make the sg.ind.cmp form.
Ø-ep... outputs no epenthetic if used as compound-L -->
<pardef n="'s_case\Ø_no-cp">
Expand Down Expand Up @@ -1545,7 +1545,7 @@
<!-- Acronyms -->
<!-- singular/plural todo -->
<pardef n="km_no-cp__n" c="Note: gives form a dot iff lemma has a dot,
don't include a final dot in the individual entries">
dont include a final dot in the individual entries">
<e> <re>.?</re><p><l></l> <r><s n="n"/><s n="acr"/></r></p><par n="'s_case\Ø_no-cp"/></e>
</pardef>

Expand Down Expand Up @@ -5430,8 +5430,8 @@

<!-- sme-nob agreement-MWE nouns. TODO: use lt-mwpp for these. -->
<pardef n="bærbar/_datamaskin__n" c="lt-mwpp mwe todo. Also todo: this has an adj already, so should have special transfer rules to insert determiner...
Definite form has indef noun; better to have no double defnes in 'den bærbare datamaskin' than overdo it in
'din bærbare datamaskinen'">
Definite form has indef noun; better to have no double defnes in den bærbare datamaskin than overdo it in
din bærbare datamaskinen'">
<e> <p><l>e<b/>datamaskinene</l> <r><b/>datamaskin<s n="n"/><s n="f"/><s n="pl"/><s n="def"/></r></p><par n="s_case"/><par n="cp-R"/></e>
<e> <p><l>e<b/>datamaskiner</l> <r><b/>datamaskin<s n="n"/><s n="f"/><s n="pl"/><s n="ind"/></r></p><par n="s_case"/><par n="cp-R"/></e>
<e> <p><l>e<b/>datamaskin</l> <r><b/>datamaskin<s n="n"/><s n="f"/><s n="sg"/><s n="def"/></r></p><par n="s_case"/><par n="cp-R"/></e>
Expand Down Expand Up @@ -18361,7 +18361,7 @@
<e r="RL"><re>([§\+-–] ?)?[0-9]+([.,][0-9]+)*([ \-]+[0-9]+([.,][0-9]+)*)*( *%)?</re><p><l></l><r><s n="det"/><s n="qnt"/><s n="un"/><s n="pl"/></r></p></e>
</pardef>
<pardef n="sme-nob-numeros" c="Used in sme-nob, should be RL to avoid messing up other pairs.
Also, the \+ in regexes here can't be trimmed (and would mess up pretransfer in analysis anyway), so keep it RL.
Also, the \+ in regexes here cant be trimmed (and would mess up pretransfer in analysis anyway), so keep it RL.
(Also, this one pardef takes as long as the rest of the dix to compile, so without the RL the analyser also gets slow to compile …)">
<!-- ordinals: -->
<e r="RL"><re>[IVXLCDM\-]+</re><i>.</i><par n="første__adj"/></e>
Expand Down Expand Up @@ -18395,7 +18395,7 @@
<e> <re>[½¹²³¼¾]</re><p><l></l> <r><s n="det"/><s n="qnt"/><s n="un"/><s n="pl"/></r></p></e>

<!-- Numbers from 1000 to 2200 (or 20-99) might be years. Other numbers don't get to be years.
Noun tagging would be better, but that doesn't work with compounding
Noun tagging would be better, but that doesnt work with compounding
since inconditional numerals override the unknown-analysis. -->
<e c="date"><re>1[0-9][0-9][0-9]|2[0-2][0-9][0-9]|[2-9][0-9]</re><p><l></l><r><s n="det"/><s n="qnt"/><s n="un"/><s n="pl"/><s n="date"/></r></p></e>
<e c="section"> <re>[0-9]+[\-][0-9]+</re><p><l></l><r><s n="det"/><s n="qnt"/><s n="un"/><s n="sg"/></r></p></e>
Expand Down Expand Up @@ -18445,13 +18445,10 @@
<e> <p><l>/</l><r>/<s n="cm"/></r></p></e>
</pardef>

<pardef n="aposchar">
<e> <p><l>’</l> <r>'</r></p></e>
<e r="LR"><p><l>'</l> <r>'</r></p></e>
</pardef>

<pardef n="cometa">
<e> <par n="aposchar"/><p><l></l><r><s n="apos"/></r></p></e>
<e c="left quote"> <p><l>‘</l> <r>‘<s n="apos"/></r></p></e>
<e c="right quote / genitive"> <p><l>’</l> <r>’<s n="apos"/></r></p></e>
<e c="foot"> <p><l>'</l> <r>'<s n="n"/><s n="m"/><s n="pl"/><s n="ind"/></r></p></e>
<e> <p><l>«</l><r>«<s n="lquot"/></r></p></e>
<e> <p><l>»</l><r>»<s n="rquot"/></r></p></e>
</pardef>
Expand Down Expand Up @@ -21090,7 +21087,7 @@
<e lm="med all tydelighet"> <i>med<b/>all<b/>tydelighet</i><par n="dagevis__adv"/></e>
<e lm="med ansiktet ned"> <i>med<b/>ansiktet<b/>ned</i><par n="dagevis__adv"/></e>
<e lm="med ansiktet vendt mot"> <i>med<b/>ansiktet<b/>vendt<b/>mot</i><par n="dagevis__adv"/></e>
<e r="RL" lm="med dette" c="RL: don't use this entry for [med] [dette laget]"><i>med<b/>dette</i><par n="dagevis__adv"/></e>
<e r="RL" lm="med dette" c="RL: dont use this entry for [med] [dette laget]"><i>med<b/>dette</i><par n="dagevis__adv"/></e>
<e lm="med dobbellås"> <i>med<b/>dobbellås</i><par n="dagevis__adv"/></e>
<e lm="med en gang"> <i>med<b/>en<b/>gang</i><par n="dagevis__adv"/></e>
<e lm="med enden mot"> <i>med<b/>enden<b/>mot</i><par n="dagevis__adv"/></e>
Expand Down Expand Up @@ -88440,7 +88437,7 @@
<e lm="cerebral parese-pasient"><i>cerebral<b/>parese-pasient</i><par n="ep_\Ø__n"/></e>
<e lm="cocker spaniel"><i>cocker<b/>spaniel</i><par n="ep_\Ø__n"/></e>
<e lm="don juan"><i>don<b/>juan</i><par n="ep_\Ø__n"/></e>
<e lm="duc d'albe"><i>duc<b/>d'albe</i><par n="re_\Ø__n"/></e>
<e lm="duc dalbe"><i>duc<b/>dalbe</i><par n="re_\Ø__n"/></e>
<e lm="eau de cologne"><i>eau<b/>de<b/>cologne</i><par n="re_\Ø__n"/></e>
<e lm="eau de vie"><i>eau<b/>de<b/>vie</i><par n="re_\Ø__n"/></e>
<e lm="ex libris"><i>ex<b/>libris</i><par n="ja_\Ø__n"/></e>
Expand All @@ -88465,7 +88462,7 @@
<e lm="peau de pêche"><i>peau<b/>de<b/>pêche</i><par n="re_\Ø__n"/></e>
<e lm="prae ceteris"><i>prae<b/>ceteris</i><par n="gon__n"/></e>
<e lm="rock and roll"><i>rock<b/>and<b/>roll</i><par n="ep_\Ø__n"/></e>
<e lm="rock'n roll"><i>rock'n<b/>roll</i><par n="ep_\Ø__n"/></e>
<e lm="rockn roll"><i>rockn<b/>roll</i><par n="ep_\Ø__n"/></e>
<e lm="sex appeal"><i>sex<b/>appeal</i><par n="ep_\Ø__n"/></e>
<e lm="short story"><i>short<b/>story</i><par n="ep_\Ø__n"/></e>
<e lm="sleeping partner"><i>sleeping<b/>partner</i><par n="uer__n"/></e>
Expand Down Expand Up @@ -90682,7 +90679,7 @@
<e lm="trivia"> <i>trivia</i><par n="MC__n"/></e>
<e c="tennisvariant" lm="padel"><i>padel</i><par n="MC__n"/></e>
<e lm="fast track"> <i>fast<b/>track</i><par n="MC__n"/></e>
<e lm="chargé d'affaires"> <i>chargé<b/>d'affaires</i><par n="MC__n"/></e>
<e lm="chargé daffaires"> <i>chargé<b/>daffaires</i><par n="MC__n"/></e>
<e lm="subsea"> <i>subsea</i><par n="MC__n"/></e>
<e lm="fair play"> <i>fair<b/>play</i><par n="MC__n"/></e>
<e r="LR" lm="fairplay"> <p><l>fairplay</l><r>fair<b/>play</r></p><par n="MC_no-cp__n"/></e>
Expand Down Expand Up @@ -98468,7 +98465,7 @@
<e lm="brusteinsball"><i>brusteinsball</i><par n="ja__n"/></e>
<e lm="brutalisering"><i>brutalisering</i><par n="då_\s__n"/></e>
<e lm="brutilhenger"><i>brutilhenger</i><par n="uer__n"/></e>
<e lm="brutter'n"><i>brutter'n</i><par n="småen__n"/></e>
<e lm="bruttern"><i>bruttern</i><par n="småen__n"/></e>
<e lm="bruttoareal"><i>bruttoareal</i><par n="ja__n"/></e>
<e lm="bruttoavanse"><i>bruttoavanse</i><par n="re_\Ø__n"/></e>
<e lm="bruttoavkastning"><i>bruttoavkastning</i><par n="då__n"/></e>
Expand Down Expand Up @@ -99714,7 +99711,7 @@
<e lm="colombianer"><i>colombianer</i><par n="uer__n"/></e>
<e lm="coloradobille"><i>coloradobille</i><par n="re_\Ø__n"/></e>
<e lm="columbi egg"><i>columbi<b/>egg</i><par n="ul__n"/></e>
<e lm="commedia dell'arte"><i>commedia<b/>dell'arte</i><par n="MC__n"/></e>
<e lm="commedia dellarte"><i>commedia<b/>dellarte</i><par n="MC__n"/></e>
<e lm="commedia"><i>commedia</i><par n="MC__n"/></e>
<e lm="computer"><i>computer</i><par n="uer__n"/></e>
<e lm="computertomografi"><i>computertomografi</i><par n="ep_\Ø__n"/></e>
Expand All @@ -99729,7 +99726,7 @@
<e lm="copyrightside"><i>copyrightsid</i><par n="enk/e__n"/></e>
<e lm="cornerflagg"><i>cornerflagg</i><par n="ul__n"/></e>
<e lm="corps de ballet"><i>corps<b/>de<b/>ballet</i><par n="sør__n"/></e>
<e lm="ballet d'action"><i>ballet<b/>d'action</i><par n="sør__n"/></e>
<e lm="ballet daction"><i>ballet<b/>daction</i><par n="sør__n"/></e>
<e lm="corps diplomatique"><i>corps<b/>diplomatique</i><par n="sør__n"/></e>
<e lm="corsicaner"><i>corsicaner</i><par n="uer__n"/></e>
<e lm="costaricaner"><i>costaricaner</i><par n="uer__n"/></e>
Expand Down Expand Up @@ -104173,7 +104170,7 @@
<e lm="fatling"><i>fatling</i><par n="då__n"/></e>
<e lm="fatning"><i>fatning</i><par n="då__n"/></e>
<e lm="fatteevne"><i>fatteevn</i><par n="enk/e__n"/></e>
<e lm="fatter'n"><i>fatter'n</i><par n="småen__n"/></e>
<e lm="fattern"><i>fattern</i><par n="småen__n"/></e>
<e lm="fattigdomsbekjempelse"><i>fattigdomsbekjempelse</i><par n="re_\s__n"/></e>
<e lm="fattigdomsgrense"><i>fattigdomsgrens</i><par n="enk/e__n"/></e>
<e lm="fattigdomsproblem"><i>fattigdomsproblem</i><par n="ja__n"/></e>
Expand Down Expand Up @@ -105060,7 +105057,7 @@
<e lm="finersag"><i>finersag</i><par n="då__n"/></e>
<e lm="finfordeling"><i>finfordeling</i><par n="då__n"/></e>
<e lm="finfølelse"><i>finfølelse</i><par n="re_\s__n"/></e>
<e lm="finger'n"><i>finger'n</i><par n="småen__n"/></e>
<e lm="fingern"><i>fingern</i><par n="småen__n"/></e>
<e lm="fingerbredde"><i>fingerbredde</i><par n="re_\Ø__n"/></e>
<e lm="fingerbreidd"><i>fingerbreidd</i><par n="då__n"/></e>
<e lm="fingerbøl"><i>fingerbøl</i><par n="ul__n"/></e>
Expand Down Expand Up @@ -110320,7 +110317,7 @@
<e lm="genspleising"> <i>genspleising</i><par n="då__n"/></e>
<e lm="genteknikk"> <i>genteknikk</i><par n="ep_\Ø__n"/></e>
<e lm="genteknologi"> <i>genteknologi</i><par n="ep_\Ø__n"/></e>
<e lm="gentlemen's agreement"><i>gentlemen's<b/>agreement</i><par n="sør__n"/></e>
<e lm="gentlemens agreement"><i>gentlemens<b/>agreement</i><par n="sør__n"/></e>
<e lm="genuafokk"> <i>genuafokk</i><par n="då__n"/></e>
<e lm="genueser"> <i>genueser</i><par n="uer__n"/></e>
<e lm="genvitenskap"> <i>genvitenskap</i><par n="ep_\Ø__n"/></e>
Expand Down Expand Up @@ -115099,7 +115096,7 @@
<e lm="hornskei"> <i>hornskei</i><par n="då__n"/></e>
<e lm="hornskje"> <i>hornskje</i><par n="då__n"/></e>
<e lm="hornugle"> <i>hornugl</i><par n="enk/e__n"/></e>
<e lm="hors d'oeuvre"> <i>hors<b/>d'oeuvre</i><par n="mai__n"/></e>
<e lm="hors doeuvre"> <i>hors<b/>doeuvre</i><par n="mai__n"/></e>
<e lm="horse"> <i>hors</i><par n="enk/e__n"/></e>
<e lm="horv"> <i>horv</i><par n="då__n"/></e>
<e lm="horving"> <i>horving</i><par n="då__n"/></e>
Expand Down Expand Up @@ -127014,7 +127011,7 @@
<e lm="køye"> <i>køy</i><par n="enk/e__n"/></e>
<e lm="køyeseng"> <i>køyeseng</i><par n="då__n"/></e>
<e lm="køying"> <i>køying</i><par n="då__n"/></e>
<e lm="l'hombre"> <i>l'hombre</i><par n="mai__n"/></e>
<e lm="lhombre"> <i>lhombre</i><par n="mai__n"/></e>
<e lm="la dolce vita"> <i>la<b/>dolce<b/>vita</i><par n="MC__n"/></e>
<e lm="la-skure-mentalitet"><i>la-skure-mentalitet</i><par n="ep_\Ø__n"/></e>
<e lm="labbing"> <i>labbing</i><par n="då__n"/></e>
Expand Down Expand Up @@ -129501,7 +129498,7 @@
<e lm="lottovinner"> <i>lottovinner</i><par n="leser__n"/></e>
<e lm="lotusblomst"> <i>lotusblomst</i><par n="ep_\Ø__n"/></e>
<e lm="lotusstilling"> <i>lotusstilling</i><par n="då__n"/></e>
<e lm="louisd'or"> <i>louisd'or</i><par n="ep_\Ø__n"/></e>
<e lm="louisdor"> <i>louisdor</i><par n="ep_\Ø__n"/></e>
<e lm="lovarbeid"> <i>lovarbeid</i><par n="marked__n"/></e>
<e lm="lovart"> <i>lovart</i><par n="MC__n"/></e>
<e lm="lovavdeling"> <i>lovavdeling</i><par n="då__n"/></e>
Expand Down Expand Up @@ -134247,8 +134244,8 @@
<e lm="mute"> <i>mut</i><par n="enk/e__n"/></e>
<e lm="muting"> <i>muting</i><par n="då_\s__n"/></e>
<e lm="mutter"> <i>mut</i><par n="mut/ter__n"/></e>
<e lm="mutter'n"> <i>mutter'n</i><par n="småen__n"/></e>
<e lm="pepper'n"> <i>pepper'n</i><par n="småen__n"/></e>
<e lm="muttern"> <i>muttern</i><par n="småen__n"/></e>
<e lm="peppern"> <i>peppern</i><par n="småen__n"/></e>
<e lm="mutthet"> <i>mutthet</i><par n="då__n"/></e>
<e lm="muzak"> <i>muzak</i><par n="ep_\Ø__n"/></e>
<e lm="myanmarer"> <i>myanmarer</i><par n="uer__n"/></e>
Expand Down Expand Up @@ -192683,7 +192680,7 @@
<e lm="dupuytrens kontraktur"> <i>dupuytrens<b/>kontraktur</i><par n="ep_\Ø__n"/></e>
<e lm="dårlige boforhold"> <i>dårlige<b/>boforhold</i><par n="ul__n"/></e>
<e lm="eczema herpeticum"> <i>eczema<b/>herpeticum</i><par n="MC__n"/></e>
<e lm="ehlers-danlos' syndrom"> <i>ehlers-danlos'<b/>syndrom</i><par n="MC__n"/></e>
<e lm="ehlers-danlos syndrom"> <i>ehlers-danlos<b/>syndrom</i><par n="MC__n"/></e>
<e lm="ektopisk acth-syndrom"> <i>ektopisk<b/>acth-syndrom</i><par n="ja_\Ø__n"/></e>
<e lm="erythema infectiosum"> <i>erythema<b/>infectios</i><par n="vel/um__n"/></e>
<e lm="erythema nodosum"> <i>erythema<b/>nodosum</i><par n="bom__n"/></e>
Expand Down Expand Up @@ -193717,7 +193714,8 @@
<!-- not using letter pardefs since that makes the final transducer huge -->
<e lm="np-guio regex"><re>[A-ZÆØÅ][a-zæøå]+\-</re><p><l/><r><s n="np"/><s n="guess"/></r></p><par n="cp-guio"/></e>
<e lm="np-guio regex"><re>[A-ZÆØÅ][A-ZÆØÅ]+\-</re><p><l/><r><s n="np"/><s n="guess"/></r></p><par n="cp-guio"/></e>
<e lm="np-apos regex"><re>[A-ZÆØÅ][A-ZÆØÅa-zæøå\-]+[SZXszx]'</re><p><l/><r><s n="np"/><s n="guess"/><s n="gen"/></r></p></e>
<e lm="np-apos regex"><re>[A-ZÆØÅ][A-ZÆØÅa-zæøå\-]+[SZXszx]</re><p><l>’</l><r>’<s n="np"/><s n="guess"/><s n="gen"/></r></p></e>
<!-- Note: ’ has to be outside of regex in order for acx to take effect -->

<!-- Numbers are *not* in inconditional, since that would block compounding with them: -->
<e> <par n="numeros"/></e>
Expand Down
12 changes: 12 additions & 0 deletions apertium-nob.nob.rlx
Original file line number Diff line number Diff line change
Expand Up @@ -2752,6 +2752,18 @@ SELECT:name-seen-samecase-midsent np + ("(<\\p{Lu}.*>)"r) IF
# «Sammenlagtpremien er den gjeveste, sa Kilde til NTB. Kilde og Alexis Pinturault kjempet om totalseieren …»



# Fot-teiknet er berre fot etter tal, elles er det skrivemaskinsapostrof:
SELECT:fot-fot ("'") IF
(-1 siffer)
;
SELECT:fot-tomme ("'") IF
(-2 siffer)
(-1 ("'")) # TODO: kanskje '' som eige oppslag?
;
REMOVE:fot-apos ("'");


###################################################
# SECTION Early removal of unlikely / rare forms: #
###################################################
Expand Down
Loading

0 comments on commit 5b3f602

Please sign in to comment.