Skip to content

Commit

Permalink
Merge pull request #12 from GateNLP/handler-plugins
Browse files Browse the repository at this point in the history
Move some "less-core" input and output handlers to plugins
  • Loading branch information
ianroberts authored Aug 23, 2023
2 parents 60d9498 + 0411434 commit 83a6cab
Show file tree
Hide file tree
Showing 21 changed files with 358 additions and 189 deletions.
10 changes: 2 additions & 8 deletions distribution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.1.1</version>
<version>3.6.0</version>
<executions>
<execution>
<id>distro-assembly</id>
Expand Down Expand Up @@ -52,13 +53,6 @@
<version>1.2.3</version>
</dependency>

<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gcp-cli</artifactId>
<version>3.3-SNAPSHOT</version>
<!-- easy way to separate this in the assembly descriptor -->
<scope>provided</scope>
</dependency>
</dependencies>

<distributionManagement>
Expand Down
37 changes: 28 additions & 9 deletions distribution/src/assembly/distro.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.2.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.2.0 https://maven.apache.org/xsd/assembly-2.2.0.xsd">
<id>distro</id>
<formats>
<format>zip</format>
Expand Down Expand Up @@ -36,13 +36,32 @@
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
</dependencySet>
<dependencySet>
<scope>provided</scope>
<outputDirectory></outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<outputFileNameMapping>${artifact.artifactId}.${artifact.extension}</outputFileNameMapping>
</dependencySet>
</dependencySets>

<moduleSets>
<moduleSet>
<useAllReactorProjects>true</useAllReactorProjects>
<includes>
<include>uk.ac.gate:gcp-cli</include>
</includes>
<binaries>
<outputDirectory></outputDirectory>
<unpack>false</unpack>
<includeDependencies>false</includeDependencies>
<outputFileNameMapping>${artifact.artifactId}.${artifact.extension}</outputFileNameMapping>
</binaries>
</moduleSet>
<moduleSet>
<useAllReactorProjects>true</useAllReactorProjects>
<includes>
<include>uk.ac.gate:gcp-plugin-*</include>
</includes>
<binaries>
<attachmentClassifier>distro</attachmentClassifier>
<includeDependencies>false</includeDependencies>
<unpack>true</unpack>
<outputDirectory>plugins</outputDirectory>
</binaries>
</moduleSet>
</moduleSets>
</assembly>
40 changes: 0 additions & 40 deletions impl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,6 @@
<scope>compile</scope>
</dependency>

<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.0.1</version>
<scope>compile</scope>
</dependency>

<!-- Heritrix library for parsing ARC and WARC files -->
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.9</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<!--
<dependency>
<groupId>it.unimi.dsi</groupId>
Expand All @@ -79,26 +59,6 @@
<scope>compile</scope>
</dependency>

<!-- mimir-connector for the Mimir output handler -->
<dependency>
<groupId>uk.ac.gate.mimir</groupId>
<artifactId>mimir-connector</artifactId>
<version>6.2</version>
<scope>compile</scope>
</dependency>

<!--
We should get this as a transitive of gate-core, but heritrix-commons also
declares a dependency on an earlier version, and Maven prefers this as it
is closer to the root of the dependency tree.
-->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.7</version>
<scope>compile</scope>
</dependency>

<!-- commons-cli for command line parsing -->
<dependency>
<groupId>commons-cli</groupId>
Expand Down
11 changes: 11 additions & 0 deletions impl/src/main/java/gate/cloud/batch/BatchRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,17 @@ public void uncaughtException(Thread t, Throwable e) {

Gate.init();

// load built-in plugins
File builtInPluginsDir = new File(gcpHome, "plugins");
if(builtInPluginsDir.isDirectory()) {
File[] plugins = builtInPluginsDir.listFiles(File::isDirectory);
if(plugins != null) {
for(File pluginFile : plugins) {
Gate.getCreoleRegister().registerPlugin(new Plugin.Directory(pluginFile.toURI().toURL()));
}
}
}

// load any other plugins specified on the command line
String[] pluginsToLoad = line.getOptionValues('p');
if(pluginsToLoad != null) {
Expand Down
32 changes: 28 additions & 4 deletions impl/src/main/java/gate/cloud/util/ByteArrayURLStreamHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,43 @@
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.Header;

/**
* This oddity is just a wrapper around a byte array and a URL, to
* allow creation of GATE documents from a byte array with
* application/pdf type. Donated by Ian.
*/
public class ByteArrayURLStreamHandler
extends URLStreamHandler {

public static class Header {
public Header(String name, String value) {
this.name = name;
this.value = value;
}

private String name;
private String value;

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getValue() {
return value;
}

public void setValue(String value) {
this.value = value;
}
}

private byte[] data;
private Header[] headers;

public ByteArrayURLStreamHandler(byte[] data) {
this(data, null);
}
Expand Down Expand Up @@ -78,7 +102,7 @@ public Map<String, List<String>> getHeaderFields() {
} else if(values.size() == 1) {
values = new ArrayList<String>(values);
values.add(h.getValue());
fields.put(h.getName(), values);
fields.put(h.getName(), values);
} else {
values.add(h.getValue());
}
Expand Down
126 changes: 0 additions & 126 deletions impl/src/main/java/gate/cloud/util/Scratch.java

This file was deleted.

Loading

0 comments on commit 83a6cab

Please sign in to comment.