Skip to content

Commit

Permalink
added CLI option for collocationalValue
Browse files Browse the repository at this point in the history
  • Loading branch information
linguatools committed Feb 16, 2018
1 parent 2d1ebe5 commit 89944fd
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 11 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/nbproject/private/
/build/
Binary file added disco-2.1.jar
Binary file not shown.
35 changes: 25 additions & 10 deletions src/de/linguatools/disco/DISCO.java
Original file line number Diff line number Diff line change
Expand Up @@ -657,24 +657,39 @@ public float secondOrderSimilarity(String w1, String w2)
* <code>w2</code>, summed up over all relations.
* @param w1 input word #1 (must be a single token).
* @param w2 input word #2 (must be a single token).
* @return the sum of the significance values between word w1 and all its
* @return computes the sum of the significance values between word w1 and all its
* features that have w2 as their word part while ignoring the relation (if
* any). If w1 is not found the return value is 0.
* any) and the same for w2 with w1 as feature. Returns whichever value is greater.
* If w1 or w2 are not found the return value is 0.
* @throws java.io.IOException
*/
public float collocationalValue(String w1, String w2) throws IOException{

// get the cooccurrences of w1 summed up over all relations
ReturnDataCol[] cols = collocations(w1);
if( cols == null ) return 0.0F;
// get the cooccurrences of w1 and w2 summed up over all relations
ReturnDataCol[] cols1 = collocations(w1);
ReturnDataCol[] cols2 = collocations(w2);

float v = 0.0F;
for(ReturnDataCol col : cols){
if( col.word.equals(w2) ){
v = col.value;
float v1 = 0.0F;
if( cols1 != null ){
for(ReturnDataCol col : cols1){
if( col.word.equals(w2) ){
v1 = col.value;
break;
}
}
}

float v2 = 0.0F;
if( cols2 != null ){
for(ReturnDataCol col : cols2){
if( col.word.equals(w1) ){
v2 = col.value;
break;
}
}
}
return v;

return (v1 > v2) ? v1 : v2;
}

/***************************************************************************
Expand Down
23 changes: 22 additions & 1 deletion src/de/linguatools/disco/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ public class Main{
*******************************************************************/
private static void printUsage(){
System.out.println("disco V2.1 -- www.linguatools.de/disco");
System.out.println("Usage: java -jar disco-2.0.jar <indexDir> <option>");
System.out.println("Usage: java -jar disco-2.1.jar <indexDir> <option>");
System.out.println("Options: NOTE THAT <w>, <w1>, <w2> have to be single tokens!");
System.out.println("\t\t-f <w>\t\treturn corpus frequency of word <w>");
System.out.println("\t\t-s <w1> <w2> <simMeasure>\treturn semantic similarity between words <w1> and <w2>");
System.out.println("\t\t simMeasure = {COSINE, KOLB}, default is COSINE.");
System.out.println("\t\t-s2 <w1> <w2>\treturn second order similarity between words <w1> and <w2>");
System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
System.out.println("\t\t-cv <w1> <w2>\treturn collocational value between words <w1> and <w2>");
System.out.println("\t\t-bn <w> <n>\treturn the <n> most similar words for word <w>");
System.out.println("\t\t\t\tDoes not work with word spaces of type \"COL\"!");
System.out.println("\t\t-bs <w> <s>\treturn all words that are at least <s> similar to word <w>");
Expand Down Expand Up @@ -302,6 +303,26 @@ else if( args[1].equals("-s2") ){
+ "with wordspaces of type SIM!");
}
}
///////////////////////////////////////////////////////////////////////////
// -cv <w1> <w2>: return collocational value between words <w1> and <w2> //
///////////////////////////////////////////////////////////////////////////
else if( args[1].equals("-cv") ){
if ( args.length < 4 ){
printUsage();
return;
}
if ( args[2] == null || args[3] == null ){
printUsage();
return;
}
try {
DISCO d = new DISCO(args[0], false);
float sig = d.collocationalValue(args[2], args[3]);
System.out.println(sig);
} catch (IOException ex) {
System.out.println("Error: IOException: "+ex);
}
}
/////////////////////////////////////////////////////////////////
// -bn <w> <n>: return the <n> most similar words for word <w> //
/////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 89944fd

Please sign in to comment.