Skip to content

For display #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/RCode/IDEA-figures/discrtization_approximation.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@ unique(df_common$dataset)
#
df = select(filter(df_common, dataset == "ecg0606"), algorithm.x, frequency.x, cover.x)
setnames(df, c("algorithm.y","frequency.y","cover.y"))
df = rbind(df, select(df_common,algorithm.y,frequency.y))
setnames(df, c("algorithm","frequency"))
ggplot(df[df$frequency<50,], aes(x = frequency, fill=algorithm)) + geom_density(alpha=0.5)

df = rbind(df, select(df_common,algorithm.y,frequency.y,cover.y))
setnames(df, c("algorithm","frequency","cover"))
ggplot(df[df$cover>0.98 & df$frequency < 100,], aes(x = frequency, fill=algorithm)) +
geom_density(alpha=0.5, binwidth=1) + geom_vline(xintercept=14, col="red") + theme_bw() +
ggtitle(paste("Estimated kernel densities for the most frequent rule occurrence\n",
"when the total cover above 0.98"))


df = select(filter(df_common, dataset=="stdb_308"), algorithm.x, frequency.x)
setnames(df, c("algorithm.y","frequency.y"))
df = rbind(df, select(df_common,algorithm.y,frequency.y))
Expand Down
68 changes: 68 additions & 0 deletions src/main/java/net/seninp/gi/logic/GIUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,72 @@ public static double getMeanRuleCoverage(int length, GrammarRules rules) {
return (double) coverageSum / (double) length;
}

/**
* Computes which fraction of the time series is covered by the rules set.
*
* @param seriesLength the time series length.
* @param rules the grammar rules set.
* @return a fraction covered by the rules.
*/
public static double getCoverAsFraction(int seriesLength, ArrayList<SameLengthMotifs> refinedClassifiedMotifs) {

boolean[] coverageArray = new boolean[seriesLength];

for (SameLengthMotifs rule : refinedClassifiedMotifs) {
for(SAXMotif motif : rule.getSameLenMotifs()){
RuleInterval saxPos = motif.getPos();
int startPos = saxPos.getStart();
int endPos = saxPos.getEnd();
for (int j = startPos; j < endPos; j++) {
coverageArray[j] = true;
}
}
}

int coverSum = 0;
for (int i = 0; i < seriesLength; i++) {
if (coverageArray[i]) {
coverSum++;
}
}
return (double) coverSum / (double) seriesLength;
}

/**
* Gets the mean rule coverage.
*
* @param length the original time-series length.
* @param rules the grammar rules set.
* @return
*/
public static double getMeanRuleCoverage(int length, ArrayList<SameLengthMotifs> refinedClassifiedMotifs) {
// get the coverage array
//
int[] coverageArray = new int[length];
for (SameLengthMotifs rule : refinedClassifiedMotifs) {
for(SAXMotif motif : rule.getSameLenMotifs()){

RuleInterval saxPos = motif.getPos();
int startPos = saxPos.getStart();
int endPos = saxPos.getEnd();
for (int j = startPos; j < endPos; j++) {
coverageArray[j] = coverageArray[j] + 1;
}
}
}
int minCoverage = 0;
int maxCoverage = 0;
int coverageSum = 0;
for (int i : coverageArray) {
coverageSum += i;
if (i < minCoverage) {
minCoverage = i;
}
if (i > maxCoverage) {
maxCoverage = i;
}
}
return (double) coverageSum / (double) length;
}

}
169 changes: 88 additions & 81 deletions src/main/java/net/seninp/gi/tinker/EvaluatorClusterRule.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,116 +24,123 @@

public class EvaluatorClusterRule {

private static final String[] DATASETS = { "ann_gun_CentroidA1", "chfdbchf15",
"dutch_power_demand", "ecg0606", "gps_track", "insect", "mitdbx_108", "nprs43", "nprs44",
"stdb_308", "TEK14", "TEK16", "TEK17", "winding_col", "300_signal1", "318_signal1" };
private static final String[] DATASETS = { "ann_gun_CentroidA1", "chfdbchf15", "dutch_power_demand", "ecg0606",
"gps_track", "insect", "mitdbx_108", "nprs43", "nprs44", "stdb_308", "TEK14", "TEK16", "TEK17", "winding_col",
"300_signal1", "318_signal1" };

private static final int[] WINDOWS = { 30, 50, 70, 90, 100, 110, 120, 130, 140, 160, 180, 200,
220, 240, 260, 280, 300, 320, 330, 340, 350, 360, 380, 400, 420, 440, 460 };
private static final int[] WINDOWS = { 30, 50, 70, 90, 100, 110, 120, 130, 140, 160, 180, 200, 220, 240, 260, 280,
300, 320, 330, 340, 350, 360, 380, 400, 420, 440, 460 };

private static final int[] WINDOWS_PD = { 480, 500, 520, 540, 560, 580, 600, 320, 640, 680, 700,
720, 740, 760, 780, 800, 820, 840, 860, 880, 900 };
private static final int[] WINDOWS_PD = { 480, 500, 520, 540, 560, 580, 600, 320, 640, 680, 700, 720, 740, 760, 780,
800, 820, 840, 860, 880, 900 };

private static final int[] PAAS = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
private static final int[] ALPHABETS = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12 };
private static final int[] PAAS = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
private static final int[] ALPHABETS = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12 };

private static final String TAB = "\t";
private static final String TAB = "\t";

private static final String CR = "\n";
private static final String CR = "\n";

private static TSProcessor tp = new TSProcessor();
private static NormalAlphabet na = new NormalAlphabet();
private static SAXProcessor sp = new SAXProcessor();
private static TSProcessor tp = new TSProcessor();
private static NormalAlphabet na = new NormalAlphabet();
private static SAXProcessor sp = new SAXProcessor();

public static void main(String[] args) throws Exception {
public static void main(String[] args) throws Exception {

double thresholdLength = 0.1;
double thresholdCom = 0.5;
double fractionTopDist = 0.67;

String dataset = DATASETS[Integer.valueOf(args[0])];
double thresholdLength = 0.1;
double thresholdCom = 0.5;
double fractionTopDist = 0.67;

System.out.println("Sampling " + dataset);
String dataset = DATASETS[Integer.valueOf(args[0])];

BufferedWriter bw = new BufferedWriter(
new FileWriter(new File(dataset + "_repair_grammarsampler_clusterrule.out")));
bw.write("dataset\twindow\tpaa\talphabet\tapproximation\t");
bw.write("rules\tgr_size\tfrequency\tcover\tcoverage\t");
bw.write("pruned_rules\tpruned_gr_size\tpruned_frequency\tpruned_cover\tpruned_coverage\n");
System.out.println("Sampling " + dataset);

double[] series = tp.readTS("src/resources/test-data/" + dataset + ".txt", 0);
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(dataset + "_repair_grammarsampler_clusterrule.out")));
bw.write("dataset\twindow\tpaa\talphabet\tapproximation\t");
bw.write("rules\tgr_size\tfrequency\tcover\tcoverage\t");
bw.write("packed_rules\tpruned_gr_size\tpacked_frequency\tpruned_cover\tpacked_coverage\n");

if ("300_signal1".equalsIgnoreCase(dataset) || "318_signal1".equalsIgnoreCase(dataset)) {
series = Arrays.copyOfRange(series, 0, 30000);
}
double[] series = tp.readTS("src/resources/test-data/" + dataset + ".txt", 0);

ArrayList<Integer> wins = new ArrayList<Integer>();
for (int i : WINDOWS) {
wins.add(i);
}
if ("dutch_power_demand".equalsIgnoreCase(dataset)) {
for (int i : WINDOWS_PD) {
wins.add(i);
}
}
if ("300_signal1".equalsIgnoreCase(dataset) || "318_signal1".equalsIgnoreCase(dataset)) {
series = Arrays.copyOfRange(series, 0, 30000);
}

for (int w : wins) {
for (int p : PAAS) {
for (int a : ALPHABETS) {
ArrayList<Integer> wins = new ArrayList<Integer>();
for (int i : WINDOWS) {
wins.add(i);
}
if ("dutch_power_demand".equalsIgnoreCase(dataset)) {
for (int i : WINDOWS_PD) {
wins.add(i);
}
}

SAXRecords saxData = sp.ts2saxViaWindow(series, w, p, na.getCuts(a),
NumerosityReductionStrategy.EXACT, 0.01);
for (int w : wins) {
for (int p : PAAS) {
for (int a : ALPHABETS) {

// sequitur section
//
String discretizedTS = saxData.getSAXString(" ");
SAXRecords saxData = sp.ts2saxViaWindow(series, w, p, na.getCuts(a), NumerosityReductionStrategy.EXACT, 0.01);

SAXRule grammar = SequiturFactory.runSequitur(discretizedTS);
GrammarRules rules = grammar.toGrammarRulesData();
SequiturFactory.updateRuleIntervals(rules, saxData, true, series, w, p);
// sequitur section
//
String discretizedTS = saxData.getSAXString(" ");

ArrayList<SameLengthMotifs> refinedClassifiedMotifs=ClusterRuleFactory.performPruning(series, rules,
thresholdLength, thresholdCom, fractionTopDist);
ArrayList<PackedRuleRecord> packedRules = ClusterRuleFactory.getPackedRule(refinedClassifiedMotifs);
SAXRule grammar = SequiturFactory.runSequitur(discretizedTS);
GrammarRules rules = grammar.toGrammarRulesData();
SequiturFactory.updateRuleIntervals(rules, saxData, true, series, w, p);

RuleOrganizer ro = new RuleOrganizer();
SAXPointsNumber[] pointsOccurenceInPackedRule = ro.countPointNumberAfterRemoving(series,
refinedClassifiedMotifs);

ArrayList<SameLengthMotifs> refinedClassifiedMotifs = ClusterRuleFactory.performPruning(series, rules,
thresholdLength, thresholdCom, fractionTopDist);
ArrayList<PackedRuleRecord> packedRules = ClusterRuleFactory.getPackedRule(refinedClassifiedMotifs);

StringBuilder sb = new StringBuilder();
RuleOrganizer ro = new RuleOrganizer();
SAXPointsNumber[] pointsOccurenceInPackedRule = ro.countPointNumberAfterRemoving(series,
refinedClassifiedMotifs);

sb.append(dataset).append(TAB);
StringBuilder sb = new StringBuilder();

sb.append(w).append(TAB);
sb.append(p).append(TAB);
sb.append(a).append(TAB);
sb.append(sp.approximationDistancePAA(series, w, p, 0.01)
+ sp.approximationDistanceAlphabet(series, w, p, a, 0.01)).append(TAB);
sb.append(dataset).append(TAB);

sb.append(rules.size()).append(TAB);
sb.append(RulePrunerFactory.computeGrammarSize(rules, p)).append(TAB);
sb.append(rules.getHighestFrequency()).append(TAB);
sb.append(GIUtils.getCoverAsFraction(series.length, rules)).append(TAB);
sb.append(GIUtils.getMeanRuleCoverage(series.length, rules)).append(TAB);
sb.append(w).append(TAB);
sb.append(p).append(TAB);
sb.append(a).append(TAB);
sb.append(
sp.approximationDistancePAA(series, w, p, 0.01) + sp.approximationDistanceAlphabet(series, w, p, a, 0.01))
.append(TAB);

sb.append(packedRules.size()).append(TAB);
sb.append("none").append(TAB);
sb.append("none").append(TAB);
sb.append("none").append(TAB);
sb.append("none").append(CR);
sb.append(rules.size()).append(TAB);
sb.append(RulePrunerFactory.computeGrammarSize(rules, p)).append(TAB);
sb.append(rules.getHighestFrequency()).append(TAB);
sb.append(GIUtils.getCoverAsFraction(series.length, rules)).append(TAB);
sb.append(GIUtils.getMeanRuleCoverage(series.length, rules)).append(TAB);

System.out.print(sb.toString());
bw.write(sb.toString());
sb.append(packedRules.size()).append(TAB);
sb.append("none").append(TAB);
sb.append(getHighestFrequency(pointsOccurenceInPackedRule)).append(TAB);
sb.append(GIUtils.getCoverAsFraction(series.length, refinedClassifiedMotifs)).append(TAB);
sb.append(GIUtils.getMeanRuleCoverage(series.length, refinedClassifiedMotifs)).append(CR);

}
}
}
System.out.print(sb.toString());
bw.write(sb.toString());

bw.close();
}
}
}

}
bw.close();

}

public static int getHighestFrequency(SAXPointsNumber[] pointsOccurenceInPackedRule) {
int res = 0;
for (SAXPointsNumber r : pointsOccurenceInPackedRule) {
if (r.getPointOccurenceNumber() > res) {
res = r.getPointOccurenceNumber();
}

}
return res;
}

}