Skip to content

Commit

Permalink
Merge pull request #47 from tshatrov/dec23
Browse files Browse the repository at this point in the history
December 23 release branch
  • Loading branch information
tshatrov authored Jan 7, 2024
2 parents f0386e2 + 395315f commit 783e29b
Show file tree
Hide file tree
Showing 11 changed files with 252 additions and 89 deletions.
33 changes: 33 additions & 0 deletions data/sources/extra.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,37 @@
<gloss xml:lang="eng">without</gloss>
</sense>
</entry>
<entry>
<ent_seq>甲斐もない</ent_seq>
<k_ele>
<keb>甲斐もない</keb>
</k_ele>
<k_ele>
<keb>甲斐も無い</keb>
</k_ele>
<r_ele>
<reb>かいもない</reb>
</r_ele>
<sense>
<pos>exp</pos>
<pos>adj-i</pos>
<gloss xml:lang="eng">pointless</gloss>
<gloss xml:lang="eng">in vain</gloss>
</sense>
</entry>
<entry>
<ent_seq>観了</ent_seq>
<k_ele>
<keb>観了</keb>
</k_ele>
<r_ele>
<reb>かんりょう</reb>
</r_ele>
<sense>
<pos>n</pos>
<pos>vs</pos>
<pos>vt</pos>
<gloss xml:lang="eng">finishing watching</gloss>
</sense>
</entry>
</outer>
8 changes: 7 additions & 1 deletion dict-counters.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,10 @@
(defparameter *special-counters* (make-hash-table))

(defparameter *counter-suffixes* '((:kan "" "かん" "[duration]")
(:kango "間後" "かんご" "[after ...]")
(:chuu "" "ちゅう" "[among/out of ...]")))

(defparameter *counter-accepts* '((1194480 :kan) (1490430 :kan) (1333450 :kan)))
(defparameter *counter-accepts* '((1194480 :kan) (1490430 :kan) (1333450 :kan :kango)))

(defparameter *counter-foreign* '(1120410))

Expand Down Expand Up @@ -375,6 +376,7 @@
:source (find (car ,text-var) ,readings-var :key 'text :test 'equal)
,keys-var))
)
(declare (ignorable (function args-suffix)))
(list ,@body))))))

(def-special-counter 1203020 ()
Expand Down Expand Up @@ -677,6 +679,10 @@
(def-special-counter 1175140 ()
(args 'counter-hifumi "" "えき" :digit-set '(1 2)))

(def-special-counter 2855028 ()
(args 'counter-hifumi "揃え" "そろえ" :digit-set '(1 2)))


(defclass counter-days-kun (counter-text)
((allowed :initform '(1 2 3 4 5 6 7 8 9 10 14 20 24 30))))

Expand Down
6 changes: 5 additions & 1 deletion dict-custom.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ Returns 2 values, whether the entry should be either added or updated, and which

(defmethod insert ((loader xml-loader))
(loop for entry in (entries loader)
do (ichiran/dict::load-entry (xml-entry-content entry) :if-exists :skip :seq (xml-entry-seq entry))))
do (ichiran/dict::load-entry
(xml-entry-content entry)
:if-exists :skip
:seq (xml-entry-seq entry)
:conjugate-p t)))

(defclass csv-loader (custom-source)
((description :initform "csv")
Expand Down
65 changes: 51 additions & 14 deletions dict-errata.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@
:text reading
:source-text src-reading)))))))

(defun add-reading (seq reading &key (common :null) (conjugate-p t))
(defun add-reading (seq reading &key (common :null) (conjugate-p t) (table nil))
(let* ((is-kana (test-word reading :kana))
(table (if is-kana 'kana-text 'kanji-text))
(table (or table (if is-kana 'kana-text 'kanji-text)))
(entry (get-dao 'entry seq)))
(when (not (select-dao table (:and (:= 'seq seq) (:= 'text reading))))
(let* ((maxord (query (:select (:max 'ord) :from table :where (:= 'seq seq)) :single))
(ord (if (eql maxord :null) 0 (1+ maxord))))
(make-dao table :text reading :seq seq :ord ord :common common :conjugate-p conjugate-p)
(if is-kana
(if (eql table 'kana-text)
(incf (n-kana entry))
(incf (n-kanji entry)))
(update-dao entry)))
Expand All @@ -65,17 +65,17 @@
(select-dao 'kanji-text (:in 'seq (:set seqs))))))
(mapcar 'set-reading readings)))

(defun delete-reading (seq reading)
(defun delete-reading (seq reading &key (table nil))
(let* ((is-kana (test-word reading :kana))
(table (if is-kana 'kana-text 'kanji-text))
(table (or table (if is-kana 'kana-text 'kanji-text)))
(entry (get-dao 'entry seq))
(to-delete (select-dao table (:and (:= 'seq seq) (:= 'text reading))))
(deleted 0))
(when to-delete
(dolist (obj to-delete)
(delete-dao obj)
(incf deleted))
(if is-kana
(if (eql table 'kana-text)
(decf (n-kana entry) deleted)
(decf (n-kanji entry) deleted))
(update-dao entry)
Expand Down Expand Up @@ -563,6 +563,7 @@
(add-errata-jan21)
(add-errata-may21)
(add-errata-jan22)
(add-errata-dec23)
(add-errata-counters)

(ichiran/custom:load-custom-data '(:extra) t)
Expand Down Expand Up @@ -780,7 +781,7 @@
(set-common 'kana-text 2147610 "いなくなる" 0)

(set-common 'kana-text 1346290 "マス" 37)
(add-sense-prop 1346290 2 "misc" "uk")
(add-sense-prop 1346290 3 "misc" "uk")
(set-primary-nokanji 1346290 t)

(set-primary-nokanji 1409110 nil)
Expand All @@ -795,8 +796,6 @@
(defun add-errata-jan20 ()
(add-reading 2839843 "うえをしたへ")
(delete-reading 2839843 "うえをしたえ")
(add-reading 1930050 "バラす")
(add-conj-reading 1930050 "バラす")
(add-reading 1593170 "コケる")
(add-conj-reading 1593170 "コケる")

Expand Down Expand Up @@ -926,8 +925,6 @@

;; these words had no kana in jmdict
(add-reading 1161240 "いっかねん")
(add-reading 2209300 "たへる")
(add-conj-reading 2209300 "たへる") ;; this doesn't actually work because there are no existing conjugations but whatever

(set-common 'kana-text 2008650 "そうした" :null)
(add-sense-prop 1188270 0 "pos" "n") ;; 何か
Expand All @@ -943,6 +940,36 @@

)

(defun add-errata-dec23 ()
(add-reading 2220325 "" :table 'kanji-text)
(add-reading 2220325 "" :table 'kanji-text)
(delete-reading 2220325 "" :table 'kana-text)
(delete-reading 2220325 "" :table 'kana-text)
(add-reading 2220325 "")

(add-sense-prop 1180540 0 "misc" "uk") ;; おっす
(delete-sense-prop 2854117 "misc" "uk") ;; おき but I boost it later with synergy
(delete-sense-prop 2859257 "misc" "uk") ;; あれ (imperative of 有る)
(delete-sense-prop 1198890 "misc" "uk") ;; 解く

(add-sense-prop 2826371 0 "misc" "uk")
(delete-sense-prop 2826371 "misc" "rare") ;; いつなりと

;; はいかん
(set-common 'kana-text 1625620 "はいかん" :null)
(set-common 'kana-text 1625610 "はいかん" :null)
(set-common 'kana-text 1681460 "はいかん" :null)

(set-common 'kanji-text 2855480 "乙女" 0)
(set-common 'kana-text 2855480 "おとめ" 0)

(set-common 'kana-text 1930050 "バラす" 0)
(set-common 'kana-text 1582460 "ないかい" :null)
(set-common 'kana-text 1202300 "かいが" 0)

(set-common 'kanji-text 1328740 "狩る" 0)
)


(defun add-errata-counters ()
(delete-reading 1299960 "さんかい")
Expand Down Expand Up @@ -1001,9 +1028,6 @@
(add-sense-prop 1505390 0 "pos" "ctr") ;; 文字

(add-sense-prop 1101700 0 "pos" "ctr") ;; パック
(add-sense-prop 1101700 1 "pos" "n")
(add-sense-prop 1101700 1 "pos" "vs")

(add-sense-prop 1120410 0 "pos" "ctr") ;; ページ
(add-sense-prop 1138570 0 "pos" "ctr") ;; ラウンド
(add-sense-prop 1956400 0 "pos" "ctr") ;; 集
Expand All @@ -1030,6 +1054,8 @@
(add-sense-prop 1732510 1 "pos" "ctr") ;; 番手
(add-sense-prop 1732510 2 "pos" "ctr")
(add-sense-prop 2086480 1 "pos" "ctr") ;; 頭身

(add-sense-prop 1331080 0 "pos" "ctr") ;; 周忌
)


Expand Down Expand Up @@ -1069,6 +1095,7 @@
2718360 ;; がな
2201380 ;; わい
2722170 ;; のう
2751630 ;; かいな
)
"Words that only have meaning when they're final")

Expand Down Expand Up @@ -1132,6 +1159,16 @@
(dolist (rule rules)
(push rule (gethash pos hash nil))))

(let* ((pos (get-pos-index "adj-ix"))
(rules (list (make-conjugation-rule pos +conj-adverbial+ nil nil 1
1 "" "" "")
(make-conjugation-rule pos +conj-adjective-stem+ nil nil 1
1 "" "" "")
(make-conjugation-rule pos +conj-adjective-literary+ nil nil 1
1 "" "" ""))))
(dolist (rule rules)
(push rule (gethash pos hash nil))))

(let ((pos (get-pos-index "v5aru")))
(push (make-conjugation-rule pos 3 nil nil 2 1 "" "" "")
(gethash pos hash nil)))
Expand Down
12 changes: 12 additions & 0 deletions dict-fix.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,15 @@
for seq in seqs
do (conjugate-entry-outer seq :conj-types (list +conj-adjective-literary+) :as-posi '("adj-i"))
if (zerop (mod cnt 100)) do (format t "~a entries processed~%" cnt))))


(defun add-adj-ix-conjs ()
(let ((seqs (query (:select 'seq :distinct :from 'sense-prop
:where (:and (:not (:in 'seq (:set *do-not-conjugate-seq*)))
(:= 'tag "pos")
(:= 'text "adj-ix")))
:column)))
(loop for cnt from 1
for seq in seqs
do (conjugate-entry-outer seq :conj-types (list +conj-adverbial+ +conj-adjective-stem+ +conj-adjective-literary+) :as-posi '("adj-ix"))
if (zerop (mod cnt 100)) do (format t "~a entries processed~%" cnt))))
42 changes: 35 additions & 7 deletions dict-grammar.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
:tosuru "to try to .../to be about to..."
:garu "to feel .../have a ... impression of someone"
:me "somewhat/-ish"
:gai "worth it to ..."
;; these are used for splitsegs
2826528 "polite prefix" ;; お
2028980 "at / in / by" ;; で
Expand Down Expand Up @@ -201,10 +202,10 @@

(loop for kf in (get-kana-forms 1577980) ;; いる (る)
for tkf = (text kf)
do (setf (gethash tkf *suffix-cache*) (list (if (> (length tkf) 1) :te++ :te+) kf)
do (setf (gethash tkf *suffix-cache*) (list (if (> (length tkf) 1) :teiru+ :teiru) kf)
(gethash (seq kf) *suffix-class*) :iru)
(when (> (length tkf) 1)
(setf (gethash (subseq tkf 1) *suffix-cache*) (list :te+ kf))))
(setf (gethash (subseq tkf 1) *suffix-cache*) (list :teiru kf))))

(load-conjs :te 1547720 :kuru) ;; くる

Expand Down Expand Up @@ -279,6 +280,8 @@
(load-kf :iadj (get-kana-form 2006580 ""))
(load-kf :iadj (get-kana-form 1604890 "") :class :me)

(load-kf :ren- (get-kana-form 2606690 "がい") :class :gai)

(load-abbr :nai "ねえ")
(load-abbr :nai "ねぇ")
(load-abbr :nai "ねー")
Expand Down Expand Up @@ -357,7 +360,8 @@
,primary-words)))))

(def-simple-suffix suffix-tai :tai (:connector "" :score 5) (root)
(find-word-with-conj-type root 13))
(unless (member root '("") :test 'equal)
(find-word-with-conj-type root 13)))

(def-simple-suffix suffix-ren :ren (:connector "" :score 5) (root)
;; generic ren'youkei suffix
Expand All @@ -377,11 +381,14 @@
(def-simple-suffix suffix-te :te (:connector "" :score 0) (root)
(te-check root))

(def-simple-suffix suffix-te+ :te+ (:connector "" :score 3) (root)
(te-check root))
(defun teiru-check (root)
(and (not (equal root "いて")) (te-check root)))

(def-simple-suffix suffix-te++ :te++ (:connector "" :score 6) (root)
(te-check root))
(def-simple-suffix suffix-teiru :teiru (:connector "" :score 3) (root)
(teiru-check root))

(def-simple-suffix suffix-teiru+ :teiru+ (:connector "" :score 6) (root)
(teiru-check root))

(def-simple-suffix suffix-te+space :te+space (:connector " " :score 3) (root)
(te-check root))
Expand Down Expand Up @@ -527,6 +534,7 @@

(pushnew :mo *suffix-unique-only*)
(pushnew :nikui *suffix-unique-only*)
(pushnew :gai *suffix-unique-only*)

(defmacro def-abbr-suffix (name keyword stem
(root-var &optional suf-var patch-var)
Expand Down Expand Up @@ -924,6 +932,12 @@
:score 50
:connector " ")

(def-generic-synergy synergy-oki (l r)
(filter-is-pos ("ctr") (segment k p c l) t)
(filter-in-seq-set 2854117 2084550)
:score 20
:connector "")

(defun get-synergies (segment-list-left segment-list-right)
(loop for fn in *synergy-list*
nconc (funcall fn segment-list-left segment-list-right)))
Expand Down Expand Up @@ -1061,6 +1075,14 @@
(constantly nil)
(filter-is-compound-end-text "ちゃい" "いか" "とか" "とき" ""))

;; some of adj-ix words end with 好い which produces a confusing 好き conjugation
;; this should disable it
(def-segfilter-must-follow segfilter-sukiyoki (l r)
(constantly nil)
(lambda (segment)
(and (funcall (filter-is-conjugation +conj-adjective-literary+) segment)
(alexandria:ends-with-subseq "好き" (get-text segment)))))

;; (def-segfilter-must-follow segfilter-itsu (l r)
;; (complement (filter-is-compound-end-text "い"))
;; (filter-in-seq-set 2221640 1013250)
Expand Down Expand Up @@ -1104,6 +1126,12 @@
(filter-in-seq-set 1157170 2424740 1305070) ;; する して
:allow-first t)

(def-segfilter-must-follow segfilter-dekiru (l r)
;; 出 followed by 来る or 来てる
(complement (filter-in-seq-set 1896380 2422860))
(filter-in-seq-set 2830009 1547720)
:allow-first t)

(defun apply-segfilters (seg-left seg-right)
(loop with splits = (list (list seg-left seg-right))
for segfilter in *segfilter-list*
Expand Down
Loading

0 comments on commit 783e29b

Please sign in to comment.