Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

a9a logistic regression with iterations

Makoto YUI edited this page Jul 2, 2014 · 2 revisions

logress_iter() is deprecated since hivemall v0.2 or later

1st training iteration

Follow this instructions for the 1st iteration:

https://github.com/myui/hivemall/wiki/a9a-binary-classification-(logistic-regression)

2nd training iteration

input data preparation

create or replace view a9atrain_iter2
as
select
  t.rowid,
  t.label,
  to_ordered_map(t.feature, m.weight) as featuresWithWeight
from 
  a9atrain_exploded t LEFT OUTER JOIN
  a9a_model1 m ON (t.feature = m.feature)
group by
  t.rowid, t.label;
-- order by rowid;

training

create table a9a_model2 
as
select 
 feature,
 cast(avg(weight) as float) as weight
from 
 (select 
     logress_iter(featuresWithWeight,label,"-total_steps ${total_steps} -eta0 0.095") as (feature,weight)
  from 
     a9atrain_iter2
 ) t 
group by feature;

Default eta0 is 0.1.

prediction

create or replace view a9a_predict2
as
select
  t.rowid,
  sigmoid(sum(m.weight * t.value)) as prob,
  sum(m.weight * t.value) as total_weight,
  CAST((case when sum(m.weight * t.value) > 0.0 then 1.0 else 0.0 end) as FLOAT) as label
from 
  a9atest_exploded t LEFT OUTER JOIN
  a9a_model2 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

create or replace view a9a_submit2 
as
select 
  t.label as actual, 
  pd.label as predicted, 
  pd.prob as probability
from 
  a9atest t JOIN a9a_predict2 pd 
    on (t.rowid = pd.rowid);
select count(1) / ${num_test_instances} from a9a_submit2
where actual == predicted;

0.8433142927338616 (accuracy improved by iterative parameter mixing!)

Clone this wiki locally