Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

Commit

Permalink
Merge pull request #311 from myui/feature/v0.4.2-rc.2
Browse files Browse the repository at this point in the history
Updated hivemall version to v0.4.2-rc.2
  • Loading branch information
myui authored Jun 28, 2016
2 parents 0e99357 + c7c5213 commit fecd775
Show file tree
Hide file tree
Showing 26 changed files with 231 additions and 108 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.2-rc.1
0.4.2-rc.2
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>io.github.myui</groupId>
<artifactId>hivemall</artifactId>
<version>0.4.2-rc.1</version>
<version>0.4.2-rc.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
3 changes: 2 additions & 1 deletion core/src/main/java/hivemall/HivemallConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@

public final class HivemallConstants {

public static final String VERSION = "0.4.2-rc.1";
public static final String VERSION = "0.4.2-rc.2";

public static final String BIAS_CLAUSE = "0";
public static final int BIAS_CLAUSE_HASHVAL = 0;
public static final String CONFKEY_RAND_AMPLIFY_SEED = "hivemall.amplify.seed";

// org.apache.hadoop.hive.serde.Constants (hive 0.9)
Expand Down
31 changes: 13 additions & 18 deletions core/src/main/java/hivemall/fm/FMArrayModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,7 @@ public FMArrayModel(@Nonnull FMHyperParameters params) {
super(params);
this._p = params.numFeatures;
this._w = new float[params.numFeatures + 1];
this._V = new float[params.numFeatures][params.factors];
}

@Override
protected void initLearningParams() {
for (int i = 0; i < _p; i++) {
_V[i] = initV();
}
this._V = new float[params.numFeatures][];
}

@Override
Expand Down Expand Up @@ -92,29 +85,31 @@ protected void setW(@Nonnull Feature x, float nextWi) {
}

@Override
protected float[] getV(int i) {
protected float[] getV(int i, boolean init) {
if (i < 1 || i > _p) {
throw new IllegalArgumentException("Index i should be in range [1," + _p + "]: " + i);
}
return _V[i - 1];
final int idx = i - 1;
float[] v = _V[idx];
if (v == null && init) {
v = initV();
_V[idx] = v;
}
return v;
}

@Override
public float getV(@Nonnull final Feature x, int f) {
final int i = x.getFeatureIndex();
if (i < 1 || i > _p) {
throw new IllegalArgumentException("Index i should be in range [1," + _p + "]: " + i);
}
return _V[i - 1][f];
float[] v = getV(i, true);
return v[f];
}

@Override
protected void setV(@Nonnull Feature x, int f, float nextVif) {
final int i = x.getFeatureIndex();
if (i < 1 || i > _p) {
throw new IllegalArgumentException("Index i should be in range [1," + _p + "]: " + i);
}
_V[i - 1][f] = nextVif;
float[] v = getV(i, true);
v[f] = nextVif;
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ protected void setW(@Nonnull Feature x, float nextWi) {
}

@Override
protected float[] getV(int i) {
protected float[] getV(int i, boolean init) {
assert (i >= 1) : i;
return _V.get(i);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,8 @@ public FactorizationMachineModel(@Nonnull FMHyperParameters params) {
this._lambdaW = params.lambdaW;
this._lambdaV = new float[params.factors];
Arrays.fill(_lambdaV, params.lambdaV);

initLearningParams();
}

protected void initLearningParams() {}

public abstract int getSize();

protected int getMinIndex() {
Expand Down Expand Up @@ -100,7 +96,7 @@ protected float getW(int i) {
* @param i index value >= 1
*/
@Nullable
protected float[] getV(int i) {
protected float[] getV(int i, boolean init) {
throw new UnsupportedOperationException();
}

Expand Down
30 changes: 16 additions & 14 deletions core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public class FactorizationMachineUDTF extends UDTFWithOptions {

// ----------------------------------------

protected FactorizationMachineModel _model;
protected transient FactorizationMachineModel _model;

/**
* The number of training examples processed
Expand Down Expand Up @@ -197,11 +197,6 @@ protected CommandLine processOptions(@Nonnull ObjectInspector[] argOIs)
return cl;
}

@Nonnull
protected FactorizationMachineModel getModel() {
return _model;
}

@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if (argOIs.length != 2 && argOIs.length != 3) {
Expand All @@ -215,9 +210,9 @@ public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgu
this._yOI = HiveUtils.asDoubleCompatibleOI(argOIs[1]);

this._params = newHyperParameters();
CommandLine cl = processOptions(argOIs);
processOptions(argOIs);

this._model = initModel(cl, _params);
this._model = null;
this._t = 0L;

if (LOG.isInfoEnabled()) {
Expand Down Expand Up @@ -251,21 +246,28 @@ protected StructObjectInspector getOutputOI(@Nonnull FMHyperParameters params) {
}

@Nonnull
protected FactorizationMachineModel initModel(@Nullable CommandLine cl,
@Nonnull FMHyperParameters params) throws UDFArgumentException {
protected FactorizationMachineModel initModel(@Nonnull FMHyperParameters params)
throws UDFArgumentException {
final FactorizationMachineModel model;
if (params.parseFeatureAsInt) {
if (params.numFeatures == -1) {
return new FMIntFeatureMapModel(params);
model = new FMIntFeatureMapModel(params);
} else {
return new FMArrayModel(params);
model = new FMArrayModel(params);
}
} else {
return new FMStringFeatureMapModel(params);
model = new FMStringFeatureMapModel(params);
}
this._model = model;
return model;
}

@Override
public void process(Object[] args) throws HiveException {
if (_model == null) {
this._model = initModel(_params);
}

Feature[] x = parseFeatures(args[0]);
if (x == null) {
return;
Expand Down Expand Up @@ -463,7 +465,7 @@ private void forwardAsIntFeature(@Nonnull final FactorizationMachineModel model,
forwardObjs[2] = Arrays.asList(f_Vi);

for (int i = model.getMinIndex(), maxIdx = model.getMaxIndex(); i <= maxIdx; i++) {
final float[] vi = model.getV(i);
final float[] vi = model.getV(i, false);
if (vi == null) {
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi
private int _numFields;
// ----------------------------------------

private FFMStringFeatureMapModel _ffmModel;
private transient FFMStringFeatureMapModel _ffmModel;

private IntArrayList _fieldList;
private transient IntArrayList _fieldList;
@Nullable
private DoubleArray3D _sumVfX;
private transient DoubleArray3D _sumVfX;

public FieldAwareFactorizationMachineUDTF() {
super();
Expand Down Expand Up @@ -156,8 +156,8 @@ protected StructObjectInspector getOutputOI(@Nonnull FMHyperParameters params) {
}

@Override
protected FFMStringFeatureMapModel initModel(@Nullable CommandLine cl,
@Nonnull FMHyperParameters params) throws UDFArgumentException {
protected FFMStringFeatureMapModel initModel(@Nonnull FMHyperParameters params)
throws UDFArgumentException {
FFMHyperParameters ffmParams = (FFMHyperParameters) params;

FFMStringFeatureMapModel model = new FFMStringFeatureMapModel(ffmParams);
Expand Down
19 changes: 15 additions & 4 deletions core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package hivemall.ftvec.hashing;

import hivemall.HivemallConstants;
import hivemall.UDFWithOptions;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.hashing.MurmurHash3;
Expand Down Expand Up @@ -146,18 +147,28 @@ private List<Text> evaluateList(@Nonnull final Object arg0) {
}

@Nonnull
private static String featureHashing(@Nonnull final String fv, final int numFeatures) {
static String featureHashing(@Nonnull final String fv, final int numFeatures) {
final int headPos = fv.indexOf(':');
if (headPos == -1) {
if (fv.equals(HivemallConstants.BIAS_CLAUSE)) {
return fv;
}
int h = mhash(fv, numFeatures);
return String.valueOf(h);
} else {
final int tailPos = fv.lastIndexOf(':');
if (headPos == tailPos) {
String f = fv.substring(0, headPos);
String tail = fv.substring(headPos);
if (f.equals(HivemallConstants.BIAS_CLAUSE)) {
String v = fv.substring(headPos + 1);
double d = Double.parseDouble(v);
if (d == 1.d) {
return fv;
}
}
int h = mhash(f, numFeatures);
String v = fv.substring(headPos);
return h + v;
return h + tail;
} else {
String field = fv.substring(0, headPos + 1);
String f = fv.substring(headPos + 1, tailPos);
Expand All @@ -168,7 +179,7 @@ private static String featureHashing(@Nonnull final String fv, final int numFeat
}
}

private static int mhash(@Nonnull final String word, final int numFeatures) {
static int mhash(@Nonnull final String word, final int numFeatures) {
int r = MurmurHash3.murmurhash3_x86_32(word, 0, word.length(), 0x9747b28c) % numFeatures;
if (r < 0) {
r += numFeatures;
Expand Down
19 changes: 13 additions & 6 deletions core/src/main/java/hivemall/ftvec/hashing/MurmurHash3UDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

import java.util.List;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
Expand All @@ -33,11 +36,13 @@
@UDFType(deterministic = true, stateful = false)
public final class MurmurHash3UDF extends UDF {

public IntWritable evaluate(final String word) throws UDFArgumentException {
@Nullable
public IntWritable evaluate(@Nullable final String word) throws UDFArgumentException {
return evaluate(word, MurmurHash3.DEFAULT_NUM_FEATURES);
}

public IntWritable evaluate(final String word, final int numFeatures)
@Nullable
public IntWritable evaluate(@Nullable final String word, final int numFeatures)
throws UDFArgumentException {
if (word == null) {
return null;
Expand All @@ -46,11 +51,13 @@ public IntWritable evaluate(final String word, final int numFeatures)
return new IntWritable(h);
}

public IntWritable evaluate(final List<String> words) throws UDFArgumentException {
@Nullable
public IntWritable evaluate(@Nullable final List<String> words) throws UDFArgumentException {
return evaluate(words, MurmurHash3.DEFAULT_NUM_FEATURES);
}

public IntWritable evaluate(final List<String> words, final int numFeatures)
@Nullable
public IntWritable evaluate(@Nullable final List<String> words, final int numFeatures)
throws UDFArgumentException {
if (words == null) {
return null;
Expand All @@ -70,11 +77,11 @@ public IntWritable evaluate(final List<String> words, final int numFeatures)
return evaluate(s, numFeatures);
}

public static int mhash(final String word) {
public static int mhash(@Nonnull final String word) {
return mhash(word, MurmurHash3.DEFAULT_NUM_FEATURES);
}

public static int mhash(final String word, final int numFeatures) {
public static int mhash(@Nonnull final String word, final int numFeatures) {
int r = MurmurHash3.murmurhash3_x86_32(word, 0, word.length(), 0x9747b28c) % numFeatures;
if (r < 0) {
r += numFeatures;
Expand Down
Loading

0 comments on commit fecd775

Please sign in to comment.