-
Notifications
You must be signed in to change notification settings - Fork 1
/
script_back_off.r
80 lines (75 loc) · 2.25 KB
/
script_back_off.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/Rscript
#back_off <- function(gp, dtq_ml_wuvxyz, dtq_ml_wuvxy, dtq_ml_wuvx,
# dtq_ml_wuv, dtq_ml_wv, dtq_ml_w)
back_off <- function(gp)
{
iconv(gp, "latin1", "ASCII", sub="")
gp <- gsub("'", "", gp)
gp <- gsub("\\(|\\)", "", gp)
gp <- gsub("\\[|\\]", "", gp)
gp <- gsub("\\{|\\}", "", gp)
gp <- gsub("\\*", "", gp)
gsub("\\.\\.", "", gp)
gp <- trim_all(gp)
# This makes it unnecessary to check for input with length > 4.
print(paste("length(gp)==0:", length(gp)==0))
print(paste("typeof(gp)=='character':", typeof(gp)=="character"))
print(paste("typeof(gp):", typeof(gp)))
gp <- get_last_four_words(gp)
if(get_pattern_length(gp) == 0)
{
data_found <- get_top_unigrams(dtq_ml_w)
}
if(get_pattern_length(gp) == 1)
{
data_found <- get_data(gp, dtq_ml_wv)
}
if(get_pattern_length(gp) == 2)
{
data_found <- get_data(gp, dtq_ml_wuv)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wv)
}
}
if(get_pattern_length(gp) == 3)
{
data_found <- get_data(gp, dtq_ml_wuvx)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wuv)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wv)
}
}
}
if(get_pattern_length(gp) == 4)
{
data_found <- get_data(gp, dtq_ml_wuvxy)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wuvx)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wuv)
if(is_zero(data_found))
{
gp <- remove_first_term(gp)
data_found <- get_data(gp, dtq_ml_wv)
}
}
}
}
if(is_zero(data_found))
{
data_found <- get_top_unigrams(dtq_ml_w)
}
# Modify to optionally only return first 3 results.
data_found
}