From af85460e602c2fa8c4126b217593710c986384ca Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 7 Apr 2018 13:33:59 -0300 Subject: [PATCH] #10 - Using WordNet as Maven Dependency - Added default resource file containing a wordnet resource used when none is specified otherwise - Allow loading wordnet from Maven dependency - Use Mavenized wordnet in test - Formatting --- .../lexsemresource/core/ResourceFactory.java | 13 +++- .../lexsemresource/core/default-resources.xml | 8 +++ .../graph/EntityGraphJGraphT.java | 36 +++++++--- .../graph/EntityGraphManager.java | 16 +++-- .../pom.xml | 72 ++++++++++--------- .../wordnet/WordNetResource.java | 71 ++++++++++-------- .../wordnet/WordNetResourceTest.java | 22 +++--- 7 files changed, 146 insertions(+), 92 deletions(-) create mode 100644 de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/lexsemresource/core/default-resources.xml diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/core/ResourceFactory.java b/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/core/ResourceFactory.java index 9e08921..f67641a 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/core/ResourceFactory.java +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/core/ResourceFactory.java @@ -40,6 +40,7 @@ public class ResourceFactory { public static final String ENV_DKPRO_HOME = "DKPRO_HOME"; public final static String CONFIG_FILE = "resources.xml"; + public final static String DEFAULT_CONFIG_FILE = "/de/tudarmstadt/ukp/dkpro/lexsemresource/core/default-resources.xml"; private static ResourceFactory loader; @@ -72,8 +73,8 @@ public static synchronized ResourceFactory getInstance() // Check in classpath if (resourceXmlUrl == null) { resourceXmlUrl = ResourceFactory.class - .getResource(CONFIG_FILE); - locs.add("Classpath: " + CONFIG_FILE); + .getResource("/"+CONFIG_FILE); + locs.add("Classpath: /" + CONFIG_FILE); } // Check in default file system location @@ -87,6 +88,14 @@ public static synchronized ResourceFactory getInstance() locs.add(new File(CONFIG_FILE).getAbsolutePath()); } + // Check default resources file in classpath (this should never fail) + if (resourceXmlUrl == null) { + resourceXmlUrl = ResourceFactory.class + .getResource(DEFAULT_CONFIG_FILE); + locs.add("Classpath: " + DEFAULT_CONFIG_FILE); + } + + // Bail out if still not found if (resourceXmlUrl == null) { throw new ResourceLoaderException( diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/lexsemresource/core/default-resources.xml b/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/lexsemresource/core/default-resources.xml new file mode 100644 index 0000000..047c552 --- /dev/null +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/lexsemresource/core/default-resources.xml @@ -0,0 +1,8 @@ + + + + + diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphJGraphT.java b/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphJGraphT.java index e38645b..627f9db 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphJGraphT.java +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphJGraphT.java @@ -97,6 +97,31 @@ public class EntityGraphJGraphT private LexicalSemanticResource lexSemRes; + public EntityGraphJGraphT() + { + this(null); + } + + public EntityGraphJGraphT(File aGraphDirectory) + { + if (aGraphDirectory != null) { + graphDirectory = aGraphDirectory; + } + else { + if (System.getenv(ResourceFactory.ENV_DKPRO_HOME) == null) { + throw new IllegalStateException( + "Environment variable [" + ResourceFactory.ENV_DKPRO_HOME + "] not set"); + } + + graphDirectory = new File(System.getenv(ResourceFactory.ENV_DKPRO_HOME) + + "/" + EntityGraphJGraphT.class.getName()); + } + + if (!graphDirectory.exists()) { + graphDirectory.mkdirs(); + } + } + protected EntityGraphJGraphT getEntityGraphJGraphT(LexicalSemanticResource aLsr) throws LexicalSemanticResourceException { @@ -114,17 +139,6 @@ protected EntityGraphJGraphT getEntityGraphJGraphT(LexicalSemanticResource lexSe graphId = "graphSer_" + lexSemResource.getResourceName() + nameSuffix + "_" + lexSemResource.getResourceVersion(); - if (System.getenv(ResourceFactory.ENV_DKPRO_HOME) == null) { - throw new LexicalSemanticResourceException("Environment variable [" - + ResourceFactory.ENV_DKPRO_HOME + "] not set"); - } - - graphDirectory = new File(System.getenv(ResourceFactory.ENV_DKPRO_HOME) - + "/" + EntityGraphJGraphT.class.getName()); - if (!graphDirectory.exists()) { - graphDirectory.mkdir(); - } - serializedGraphFile = new File(graphDirectory, graphId); if (serializedGraphFile.exists()) { try { diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphManager.java b/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphManager.java index 9c36625..1ddb7e1 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphManager.java +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/graph/EntityGraphManager.java @@ -17,6 +17,7 @@ *******************************************************************************/ package de.tudarmstadt.ukp.dkpro.lexsemresource.graph; +import java.io.File; import java.util.HashMap; import java.util.Map; @@ -41,14 +42,21 @@ public enum EntityGraphType JGraphT, JUNG } - public static EntityGraph getEntityGraph(LexicalSemanticResource lsr, EntityGraphType type) - throws LexicalSemanticResourceException - { + public static EntityGraph getEntityGraph(LexicalSemanticResource lsr, EntityGraphType type) + throws LexicalSemanticResourceException + { + return getEntityGraph(lsr, type, null); + } + + public static EntityGraph getEntityGraph(LexicalSemanticResource lsr, EntityGraphType type, + File aGraphDirectory) + throws LexicalSemanticResourceException + { String graphID = getGraphID(lsr, ""); if (!entityGraphMap.containsKey(graphID)) { EntityGraph entityGraph = null; if (type.equals(EntityGraphType.JGraphT)) { - EntityGraphJGraphT entityGraphJGraphT = new EntityGraphJGraphT(); + EntityGraphJGraphT entityGraphJGraphT = new EntityGraphJGraphT(aGraphDirectory); entityGraph = entityGraphJGraphT.getEntityGraphJGraphT(lsr, lsr.getEntities(), "", lsr.getNumberOfEntities()); } diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/pom.xml b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/pom.xml index 1d2b020..db63bc8 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/pom.xml +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/pom.xml @@ -1,21 +1,23 @@ - + 4.0.0 de.tudarmstadt.ukp.dkpro.lexsemresource-asl @@ -24,28 +26,34 @@ de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl - - de.tudarmstadt.ukp.dkpro.lexsemresource - de.tudarmstadt.ukp.dkpro.lexsemresource.api-asl - - - de.tudarmstadt.ukp.dkpro.lexsemresource - de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl - - - net.sf.extjwnl - extjwnl - 1.9.1 - - edu.mit - jwi - 2.2.3 + de.tudarmstadt.ukp.dkpro.lexsemresource + de.tudarmstadt.ukp.dkpro.lexsemresource.api-asl - de.tudarmstadt.ukp.dkpro.lexsemresource - de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl - test + de.tudarmstadt.ukp.dkpro.lexsemresource + de.tudarmstadt.ukp.dkpro.lexsemresource.core-asl + + + net.sf.extjwnl + extjwnl + 1.9.4 + + + edu.mit + jwi + 2.2.3 + + + de.tudarmstadt.ukp.dkpro.lexsemresource + de.tudarmstadt.ukp.dkpro.lexsemresource.graph-asl + test + + + net.sf.extjwnl + extjwnl-data-wn30 + 1.2 + test \ No newline at end of file diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResource.java b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResource.java index 4659692..27c7d6d 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResource.java +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/main/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResource.java @@ -41,9 +41,6 @@ import net.sf.extjwnl.dictionary.Dictionary; import net.sf.extjwnl.dictionary.Dictionary.Version; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import de.tudarmstadt.ukp.dkpro.lexsemresource.Entity; import de.tudarmstadt.ukp.dkpro.lexsemresource.Entity.PoS; import de.tudarmstadt.ukp.dkpro.lexsemresource.core.AbstractResource; @@ -51,9 +48,9 @@ import de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet.util.WordNetEntityIterable; import de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet.util.WordNetUtils; -public class WordNetResource extends AbstractResource { - private final Log logger = LogFactory.getLog(getClass()); - +public class WordNetResource + extends AbstractResource +{ private static final String RESOURCE_NAME = "WordNet"; private Dictionary dict; @@ -62,32 +59,48 @@ public class WordNetResource extends AbstractResource { private int numberOfEntities = -1; - public WordNetResource(String wordNetPropertiesFile) throws LexicalSemanticResourceException { + public WordNetResource() throws LexicalSemanticResourceException + { + this(null); + } + + public WordNetResource(String wordNetPropertiesFile) throws LexicalSemanticResourceException + { try { - InputStream is; - URL url = getClass().getResource("/"+wordNetPropertiesFile); - if (url != null) { - is = url.openStream(); - } - else { - try { - url = new URL(wordNetPropertiesFile); - is = url.openStream(); - } - catch (MalformedURLException e) { - // Ignore, we try if it is a file. - is = new FileInputStream(wordNetPropertiesFile); - } - } - this.dict = Dictionary.getInstance(is); + InputStream is; + if (wordNetPropertiesFile != null) { + try { + URL url = getClass().getResource("/" + wordNetPropertiesFile); + if (url != null) { + is = url.openStream(); + } + else { + try { + url = new URL(wordNetPropertiesFile); + is = url.openStream(); + } + catch (MalformedURLException e) { + // Ignore, we try if it is a file. + is = new FileInputStream(wordNetPropertiesFile); + } + } + this.dict = Dictionary.getInstance(is); + } + catch (IOException e) { + throw new LexicalSemanticResourceException( + "Could not access WordNet properties file: " + wordNetPropertiesFile, + e); + } + } + else { + dict = Dictionary.getDefaultResourceInstance(); + } this.v = dict.getVersion(); setIsCaseSensitive(isCaseSensitive); //zhu - } catch (IOException e) { - logger.info("Could not access WordNet properties file: " + wordNetPropertiesFile); - throw new LexicalSemanticResourceException(e); - } catch (JWNLException e) { - logger.info("JWNL exception while initializing reader."); - throw new LexicalSemanticResourceException(e); + } + catch (JWNLException e) { + throw new LexicalSemanticResourceException("JWNL exception while initializing reader.", + e); } } diff --git a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/test/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResourceTest.java b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/test/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResourceTest.java index 11a1427..5906443 100644 --- a/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/test/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResourceTest.java +++ b/de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl/src/test/java/de/tudarmstadt/ukp/dkpro/lexsemresource/wordnet/WordNetResourceTest.java @@ -20,8 +20,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import java.io.File; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; @@ -47,18 +47,11 @@ public class WordNetResourceTest private static LexicalSemanticResource wordnet; - @BeforeClass - public static void initializeWordNet() - { - try { - wordnet = new WordNetResource( - "src/main/resources/resource/WordNet_3/wordnet_properties.xml"); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } - } + @BeforeClass + public static void initializeWordNet() throws LexicalSemanticResourceException + { + wordnet = new WordNetResource(); + } @Test public void testContainsLexeme() throws Exception @@ -476,7 +469,8 @@ public void testHyponymMap() throws Exception { wordnet.setIsCaseSensitive(false); - EntityGraph eg = EntityGraphManager.getEntityGraph(wordnet, EntityGraphType.JGraphT); + EntityGraph eg = EntityGraphManager.getEntityGraph(wordnet, EntityGraphType.JGraphT, + new File("target/test-output/getEntityGraph")); eg.getIntrinsicInformationContent(wordnet.getEntity("tree").iterator().next()); }