diff --git a/src/WordNet.jl b/src/WordNet.jl index 7bb0723..6a5c58d 100644 --- a/src/WordNet.jl +++ b/src/WordNet.jl @@ -9,5 +9,6 @@ include("synset.jl") include("db.jl") include("operations.jl") include("sensekeys.jl") +include("sensecounts.jl") end diff --git a/src/db.jl b/src/db.jl index 0af4c45..e3c2c19 100644 --- a/src/db.jl +++ b/src/db.jl @@ -4,13 +4,15 @@ immutable DB lemmas::Dict{Char, Dict{AbstractString, Lemma}} synsets::Dict{Char, Dict{Int, Synset}} sensekeys::Dict{Tuple{Int, AbstractString}, AbstractString} + counts::Dict{AbstractString, Int} end function DB(base_dir::AbstractString) DB( load_lemmas(base_dir), load_synsets(base_dir), - load_sensekeys(base_dir) + load_sensekeys(base_dir), + load_counts(base_dir) ) end @@ -80,6 +82,19 @@ function load_sensekeys(basedir) end +function load_counts(basedir) + path=joinpath(basedir, "dict", "cntlist") + counts =Dict{AbstractString, Int}() + + for line in eachline(path) + tag_cnt, sense_key, sense_number = split(line) + counts[sense_key] = parse(Int, tag_cnt) + end + + counts +end + + function path_to_data_file(base_dir, pos) joinpath(base_dir, "dict", "data.$(SYNSET_TYPES[pos])") end diff --git a/src/sensecounts.jl b/src/sensecounts.jl new file mode 100644 index 0000000..fb1ee2f --- /dev/null +++ b/src/sensecounts.jl @@ -0,0 +1,12 @@ +export sensecount, sensecounts + +function sensecount(db::DB, ss::Synset, lem::Lemma) + get(db.counts, sensekey(db, ss, lem), 0) + # zero is default for senses that are not found in CNTLIST + # note: this will still error for senses that doen't have a sense key + # that is a good thing. +end + +function sensecounts(db::DB, lem::Lemma) + Dict([ss=>sensecount(db, ss, lem) for ss in synsets(db, lem)]) +end diff --git a/test/test_db.jl b/test/test_db.jl index d03d8f9..f35ff83 100644 --- a/test/test_db.jl +++ b/test/test_db.jl @@ -2,8 +2,9 @@ facts("DB") do const mock_db = DB( Dict{Char, Dict{AbstractString, Lemma}}(), Dict{Char, Dict{Int, Synset}}(), - Dict{Tuple{Int,AbstractString}, AbstractString}() - ) + Dict{Tuple{Int,AbstractString}, AbstractString}(), + Dict{AbstractString, Int}() + ) context("path_to_data_file") do expected = joinpath("MockDB", "dict", "data.verb")