Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

Commit

Permalink
Merge pull request #301 from myui/feature/apply_formatter
Browse files Browse the repository at this point in the history
Feature/apply formatter
  • Loading branch information
myui committed Jun 7, 2016
2 parents 9063bb8 + cf28b83 commit 93c831f
Show file tree
Hide file tree
Showing 204 changed files with 2,774 additions and 2,251 deletions.
3 changes: 2 additions & 1 deletion core/src/main/java/hivemall/HivemallVersionUDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.Text;

@Description(name = "hivemall_version", value = "_FUNC_() - Returns the version of Hivemall")
@Description(name = "hivemall_version", value = "_FUNC_() - Returns the version of Hivemall",
extended = "Usage: SELECT hivemall_version();")
@UDFType(deterministic = true, stateful = false)
public final class HivemallVersionUDF extends UDF {

Expand Down
4 changes: 2 additions & 2 deletions core/src/main/java/hivemall/LearnerBaseUDTF.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
package hivemall;

import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
import hivemall.mix.MixMessage.MixEventName;
import hivemall.mix.client.MixClient;
import hivemall.model.DenseModel;
import hivemall.model.PredictionModel;
import hivemall.model.SpaceEfficientDenseModel;
import hivemall.model.SparseModel;
import hivemall.model.SynchronizedModelWrapper;
import hivemall.model.WeightValue;
import hivemall.model.WeightValue.WeightValueWithCovar;
import hivemall.mix.MixMessage.MixEventName;
import hivemall.mix.client.MixClient;
import hivemall.utils.datetime.StopWatch;
import hivemall.utils.hadoop.HadoopUtils;
import hivemall.utils.hadoop.HiveUtils;
Expand Down
54 changes: 35 additions & 19 deletions core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,24 @@

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

/**
* Adaptive Regularization of Weight Vectors (AROW) binary classifier.
*
* <pre>
* [1] K. Crammer, A. Kulesza, and M. Dredze, "Adaptive Regularization of Weight Vectors",
* In Proc. NIPS, 2009.
* </pre>
*/
@Description(
name = "train_arow",
value = "_FUNC_(list<string|int|bigint> features, int label [, const string options])"
+ " - Returns a relation consists of <string|int|bigint feature, float weight, float covar>",
extended = "Build a prediction model by Adaptive Regularization of Weight Vectors (AROW) binary classifier")
public class AROWClassifierUDTF extends BinaryOnlineClassifierUDTF {

/** Regularization parameter r */
Expand All @@ -47,8 +54,9 @@ public class AROWClassifierUDTF extends BinaryOnlineClassifierUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
final int numArgs = argOIs.length;
if(numArgs != 2 && numArgs != 3) {
throw new UDFArgumentException("AROWClassifierUDTF takes 2 or 3 arguments: List<String|Int|BitInt> features, Int label [, constant String options]");
if (numArgs != 2 && numArgs != 3) {
throw new UDFArgumentException(
"_FUNC_ takes 2 or 3 arguments: List<String|Int|BitInt> features, Int label [, constant String options]");
}

return super.initialize(argOIs);
Expand All @@ -62,7 +70,8 @@ protected boolean useCovariance() {
@Override
protected Options getOptions() {
Options opts = super.getOptions();
opts.addOption("r", "regularization", true, "Regularization parameter for some r > 0 [default 0.1]");
opts.addOption("r", "regularization", true,
"Regularization parameter for some r > 0 [default 0.1]");
return opts;
}

Expand All @@ -71,13 +80,13 @@ protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumen
final CommandLine cl = super.processOptions(argOIs);

float r = 0.1f;
if(cl != null) {
if (cl != null) {
String r_str = cl.getOptionValue("r");
if(r_str != null) {
if (r_str != null) {
r = Float.parseFloat(r_str);
if(!(r > 0)) {
throw new UDFArgumentException("Regularization parameter must be greater than 0: "
+ r_str);
if (!(r > 0)) {
throw new UDFArgumentException(
"Regularization parameter must be greater than 0: " + r_str);
}
}
}
Expand All @@ -93,7 +102,7 @@ protected void train(@Nonnull final FeatureValue[] features, int label) {
PredictionResult margin = calcScoreAndVariance(features);
float m = margin.getScore() * y;

if(m < 1.f) {
if (m < 1.f) {
float var = margin.getVariance();
float beta = 1.f / (var + r);
float alpha = (1.f - m) * beta;
Expand All @@ -106,9 +115,10 @@ protected float loss(PredictionResult margin, float y) {
return m < 0.f ? 1.f : 0.f; // suffer loss = 1 if sign(t) != y
}

protected void update(@Nonnull final FeatureValue[] features, final float y, final float alpha, final float beta) {
for(FeatureValue f : features) {
if(f == null) {
protected void update(@Nonnull final FeatureValue[] features, final float y, final float alpha,
final float beta) {
for (FeatureValue f : features) {
if (f == null) {
continue;
}
final Object k = f.getFeature();
Expand All @@ -120,10 +130,11 @@ protected void update(@Nonnull final FeatureValue[] features, final float y, fin
}
}

private static IWeightValue getNewWeight(final IWeightValue old, final float x, final float y, final float alpha, final float beta) {
private static IWeightValue getNewWeight(final IWeightValue old, final float x, final float y,
final float alpha, final float beta) {
final float old_w;
final float old_cov;
if(old == null) {
if (old == null) {
old_w = 0.f;
old_cov = 1.f;
} else {
Expand All @@ -138,6 +149,11 @@ private static IWeightValue getNewWeight(final IWeightValue old, final float x,
return new WeightValueWithCovar(new_w, new_cov);
}

@Description(
name = "train_arowh",
value = "_FUNC_(list<string|int|bigint> features, int label [, const string options])"
+ " - Returns a relation consists of <string|int|bigint feature, float weight, float covar>",
extended = "Build a prediction model by AROW binary classifier using hinge loss")
public static class AROWh extends AROWClassifierUDTF {

/** Aggressiveness parameter */
Expand All @@ -155,11 +171,11 @@ protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumen
final CommandLine cl = super.processOptions(argOIs);

float c = 1.f;
if(cl != null) {
if (cl != null) {
String c_str = cl.getOptionValue("c");
if(c_str != null) {
if (c_str != null) {
c = Float.parseFloat(c_str);
if(!(c > 0.f)) {
if (!(c > 0.f)) {
throw new UDFArgumentException("Aggressiveness parameter C must be C > 0: "
+ c);
}
Expand All @@ -178,15 +194,15 @@ protected void train(@Nonnull final FeatureValue[] features, int label) {
float p = margin.getScore();
float loss = loss(p, y); // C - m (m = y * p)

if(loss > 0.f) {// m < 1.0 || 1.0 - m > 0
if (loss > 0.f) {// m < 1.0 || 1.0 - m > 0
float var = margin.getVariance();
float beta = 1.f / (var + r);
float alpha = loss * beta; // (1.f - m) * beta
update(features, y, alpha, beta);
}
}

/**
/**
* @return C - y * p
*/
protected float loss(final float p, final float y) {
Expand Down
28 changes: 18 additions & 10 deletions core/src/main/java/hivemall/classifier/AdaGradRDAUDTF.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

@Description(name = "train_adagrad_rda",
value = "_FUNC_(list<string|int|bigint> features, int label [, const string options])"
+ " - Returns a relation consists of <string|int|bigint feature, float weight>",
extended = "Build a prediction model by Adagrad+RDA regularization binary classifier")
public final class AdaGradRDAUDTF extends BinaryOnlineClassifierUDTF {

private float eta;
Expand All @@ -41,8 +46,9 @@ public final class AdaGradRDAUDTF extends BinaryOnlineClassifierUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
final int numArgs = argOIs.length;
if(numArgs != 2 && numArgs != 3) {
throw new UDFArgumentException("AdaGradRDAUDTF takes 2 or 3 arguments: List<Text|Int|BitInt> features, int label [, constant string options]");
if (numArgs != 2 && numArgs != 3) {
throw new UDFArgumentException(
"_FUNC_ takes 2 or 3 arguments: List<Text|Int|BitInt> features, int label [, constant string options]");
}

StructObjectInspector oi = super.initialize(argOIs);
Expand All @@ -55,14 +61,15 @@ protected Options getOptions() {
Options opts = super.getOptions();
opts.addOption("eta", "eta0", true, "The learning rate \\eta [default 0.1]");
opts.addOption("lambda", true, "lambda constant of RDA [default: 1E-6f]");
opts.addOption("scale", true, "Internal scaling/descaling factor for cumulative weights [default: 100]");
opts.addOption("scale", true,
"Internal scaling/descaling factor for cumulative weights [default: 100]");
return opts;
}

@Override
protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumentException {
CommandLine cl = super.processOptions(argOIs);
if(cl == null) {
if (cl == null) {
this.eta = 0.1f;
this.lambda = 1E-6f;
this.scaling = 100f;
Expand All @@ -80,16 +87,16 @@ protected void train(@Nonnull final FeatureValue[] features, final int label) {

float p = predict(features);
float loss = LossFunctions.hingeLoss(p, y); // 1.0 - y * p
if(loss <= 0.f) { // max(0, 1 - y * p)
if (loss <= 0.f) { // max(0, 1 - y * p)
return;
}
// subgradient => -y * W dot xi
update(features, y, count);
}

protected void update(@Nonnull final FeatureValue[] features, final float y, final int t) {
for(FeatureValue f : features) {// w[f] += y * x[f]
if(f == null) {
for (FeatureValue f : features) {// w[f] += y * x[f]
if (f == null) {
continue;
}
Object x = f.getFeature();
Expand All @@ -99,14 +106,15 @@ protected void update(@Nonnull final FeatureValue[] features, final float y, fin
}
}

protected void updateWeight(@Nonnull final Object x, final float xi, final float y, final float t) {
protected void updateWeight(@Nonnull final Object x, final float xi, final float y,
final float t) {
final float gradient = -y * xi;
final float scaled_gradient = gradient * scaling;

float scaled_sum_sqgrad = 0.f;
float scaled_sum_grad = 0.f;
IWeightValue old = model.get(x);
if(old != null) {
if (old != null) {
scaled_sum_sqgrad = old.getSumOfSquaredGradients();
scaled_sum_grad = old.getSumOfGradients();
}
Expand All @@ -120,7 +128,7 @@ protected void updateWeight(@Nonnull final Object x, final float xi, final float
float sign = (sum_grad > 0.f) ? 1.f : -1.f;
// |u_{t,i}|/t - \lambda
float meansOfGradients = sign * sum_grad / t - lambda;
if(meansOfGradients < 0.f) {
if (meansOfGradients < 0.f) {
// x_{t,i} = 0
model.delete(x);
} else {
Expand Down
Loading

0 comments on commit 93c831f

Please sign in to comment.