Skip to content

Commit

Permalink
no message
Browse files Browse the repository at this point in the history
  • Loading branch information
���� committed Oct 21, 2015
1 parent 32a4e85 commit 5674972
Show file tree
Hide file tree
Showing 12 changed files with 4,141 additions and 0 deletions.
243 changes: 243 additions & 0 deletions CR-CNN/init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#ifndef INIT_H
#define INIT_H
#include <cstring>
#include <cstdlib>
#include <vector>
#include <map>
#include <string>
#include <cstdio>
#include <float.h>
#include <cmath>
#include <ctime>

using namespace std;

int num_threads = 8;
int trainTimes = 100;
float alpha = 0.1;
float reduce = 0.98;
int dimensionC = 1000;
int dimensionWPE = 25;
float marginPositive = 2.5;
float marginNegative = 0.5;
float margin = 2;
float Belt = 0.001;
int window = 3;
int limit = 20;
int batch = 64;
int turn;
float rate = 1;
int tt,tt1;
float *matrixB1, *matrixRelation, *matrixW1, *matrixRelationDao;
float *positionVecE1, *positionVecE2, *matrixW1PositionE1, *matrixW1PositionE2;
double mx = 0;
FILE *logg;

float *wordVec;
long long wordTotal, dimension, relationTotal;
long long PositionMinE1, PositionMaxE1, PositionTotalE1,PositionMinE2, PositionMaxE2, PositionTotalE2;
map<string,int> wordMapping;
map<string,int> relationMapping;
vector<int *> trainLists, trainPositionE1, trainPositionE2;
vector<int> trainLength;
vector<int> headList, tailList, relationList;
vector<int *> testtrainLists, testPositionE1, testPositionE2;
vector<int> testtrainLength;
vector<int> testheadList, testtailList, testrelationList;
vector<std::string> nam;

void init() {
srand(time(NULL));
FILE *f = fopen("data/vectors1.bin", "rb");
fscanf(f, "%lld", &wordTotal);
fscanf(f, "%lld", &dimension);
PositionMinE1 = 0;
PositionMaxE1 = 0;
PositionMinE2 = 0;
PositionMaxE2 = 0;
wordVec = (float *)malloc(wordTotal * dimension * sizeof(float));
for (int b = 0; b < wordTotal; b++) {
string name = "";
while (1) {
char ch = fgetc(f);
if (feof(f) || ch == ' ') break;
if (ch != '\n') name = name + ch;
}
long long last = b * dimension;
float smp = 0;
for (int a = 0; a < dimension; a++) {
fread(&wordVec[a + last], sizeof(float), 1, f);
smp += wordVec[a + last]*wordVec[a + last];
}
smp = sqrt(smp);
for (int a = 0; a< dimension; a++)
wordVec[a+last] = wordVec[a+last] / smp;
wordMapping[name] = b;
}
fclose(f);

char buffer[1000];
f = fopen("data/relation2id.txt", "r");
while (fscanf(f,"%s",buffer)==1) {
int id;
fscanf(f,"%d",&id);
relationMapping[(string)(buffer)] = id;
relationTotal++;
nam.push_back((std::string)(buffer));
}
fclose(f);

f = fopen("data/cnn.txt", "r");

while (fscanf(f,"%s",buffer)==1) {
int head = wordMapping[(string)(buffer)];
fscanf(f,"%s",buffer);
int tail = wordMapping[(string)(buffer)];
fscanf(f,"%s",buffer);
int num = relationMapping[(string)(buffer)];
int len = 0, lefnum, rignum;
std::vector<int> tmpp;
while (fscanf(f,"%s", buffer)==1) {
std::string con = buffer;
if (con=="#") break;
int gg = wordMapping[con];
if (gg == -1) continue;
if (gg == head) lefnum = len;
if (gg == tail) rignum = len;
len++;
tmpp.push_back(gg);
}
if (head != -1 && tail != -1) {
headList.push_back(head);
tailList.push_back(tail);
relationList.push_back(num);
trainLength.push_back(len);
int *con=(int *)calloc(len,sizeof(int));
int *conl=(int *)calloc(len,sizeof(int));
int *conr=(int *)calloc(len,sizeof(int));
for (int i = 0; i < len; i++) {
con[i] = tmpp[i];
conl[i] = lefnum - i;
conr[i] = rignum - i;
if (conl[i] >= limit) conl[i] = limit;
if (conr[i] >= limit) conr[i] = limit;
if (conl[i] <= -limit) conl[i] = -limit;
if (conr[i] <= -limit) conr[i] = -limit;
if (conl[i] > PositionMaxE1) PositionMaxE1 = conl[i];
if (conr[i] > PositionMaxE2) PositionMaxE2 = conr[i];
if (conl[i] < PositionMinE1) PositionMinE1 = conl[i];
if (conr[i] < PositionMinE2) PositionMinE2 = conr[i];
}
trainLists.push_back(con);
trainPositionE1.push_back(conl);
trainPositionE2.push_back(conr);
}
}
fclose(f);

f = fopen("data/test.txt", "r");
while (fscanf(f,"%s",buffer)==1) {
int head = wordMapping[(string)(buffer)];
fscanf(f,"%s",buffer);
int tail = wordMapping[(string)(buffer)];
fscanf(f,"%s",buffer);
int num = relationMapping[(string)(buffer)];
int len = 0 , lefnum = 0, rignum = 0;
std::vector<int> tmpp;
while (fscanf(f,"%s", buffer)==1) {
std::string con = buffer;
if (con=="#") break;
int gg = wordMapping[con];
if (gg == -1) continue;
if (gg == head) lefnum = len;
if (gg == tail) rignum = len;
len++;
tmpp.push_back(gg);
}
if (head != -1 && tail != -1) {
testheadList.push_back(head);
testtailList.push_back(tail);
testrelationList.push_back(num);
testtrainLength.push_back(len);
int *con=(int *)calloc(len,sizeof(int));
int *conl=(int *)calloc(len,sizeof(int));
int *conr=(int *)calloc(len,sizeof(int));
for (int i = 0; i < len; i++) {
con[i] = tmpp[i];
conl[i] = lefnum - i;
conr[i] = rignum - i;
if (conl[i] >= limit) conl[i] = limit;
if (conr[i] >= limit) conr[i] = limit;
if (conl[i] <= -limit) conl[i] = -limit;
if (conr[i] <= -limit) conr[i] = -limit;
if (conl[i] > PositionMaxE1) PositionMaxE1 = conl[i];
if (conr[i] > PositionMaxE2) PositionMaxE2 = conr[i];
if (conl[i] < PositionMinE1) PositionMinE1 = conl[i];
if (conr[i] < PositionMinE2) PositionMinE2 = conr[i];
}
testtrainLists.push_back(con);
testPositionE1.push_back(conl);
testPositionE2.push_back(conr);
}
}
fclose(f);

for (int i = 0; i < trainPositionE1.size(); i++) {
int len = trainLength[i];
int *work1 = trainPositionE1[i];
for (int j = 0; j < len; j++)
work1[j] = work1[j] - PositionMinE1;
int *work2 = trainPositionE2[i];
for (int j = 0; j < len; j++)
work2[j] = work2[j] - PositionMinE2;
}

for (int i = 0; i < testPositionE1.size(); i++) {
int len = testtrainLength[i];
int *work1 = testPositionE1[i];
for (int j = 0; j < len; j++)
work1[j] = work1[j] - PositionMinE1;
int *work2 = testPositionE2[i];
for (int j = 0; j < len; j++)
work2[j] = work2[j] - PositionMinE2;
}
PositionTotalE1 = PositionMaxE1 - PositionMinE1 + 1;
PositionTotalE2 = PositionMaxE2 - PositionMinE2 + 1;
printf("%lld %lld\n", PositionTotalE1, PositionTotalE2);
}

float CalcTanh(float con) {
if (con > 20) return 1.0;
if (con < -20) return -1.0;
float sinhx = exp(con) - exp(-con);
float coshx = exp(con) + exp(-con);
return sinhx / coshx;
}

float tanhDao(float con) {
float res = CalcTanh(con);
return 1 - res * res;
}

float sigmod(float con) {
if (con > 20) return 1.0;
if (con < -20) return 0.0;
con = exp(con);
return con / (1 + con);
}

int getRand(int l,int r) {
int len = r - l;
int res = rand() % len;
return res + l;
}

float getRandU(float l, float r) {
float len = r - l;
float res = (float)(rand()) / RAND_MAX;
return res * len + l;
}



#endif
118 changes: 118 additions & 0 deletions CR-CNN/test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#ifndef TEST_H
#define TEST_H
#include "init.h"
#include <algorithm>

int tipp = 0;
float ress = 0;

void test(int *sentence, int *testPositionE1, int *testPositionE2, int len, int e1, int e2, int r1, float &res, float *r) {
int tip[dimensionC];

for (int i = 0; i < dimensionC; i++) {
int last = i * dimension * window;
int lastt = i * dimensionWPE * window;
float mx = -FLT_MAX;
for (int i1 = 0; i1 <= len - window; i1++) {
float res = 0;
int tot = 0;
int tot1 = 0;
for (int j = i1; j < i1 + window; j++) {
int last1 = sentence[j] * dimension;
for (int k = 0; k < dimension; k++) {
res += matrixW1[last + tot] * wordVec[last1+k];
tot++;
}
int last2 = testPositionE1[j] * dimensionWPE;
int last3 = testPositionE2[j] * dimensionWPE;
for (int k = 0; k < dimensionWPE; k++) {
res += matrixW1PositionE1[lastt + tot1] * positionVecE1[last2+k];
res += matrixW1PositionE2[lastt + tot1] * positionVecE2[last3+k];
tot1++;
}
}
if (res > mx) mx = res;
}
r[i] = mx + matrixB1[i];
}

for (int i = 0; i < dimensionC; i++)
r[i] = CalcTanh(r[i]);

float s2 = -FLT_MAX;
int r2;

for (int i = 0; i < dimensionC; i++)
r[i] = CalcTanh(r[i]);

for (int j = 1; j < relationTotal; j++) {
float s = 0;
for (int i = 0; i < dimensionC; i++)
s += matrixRelation[j * dimensionC + i] * r[i];
if (s >= s2) {
s2 = s;
r2 = j;
}
}

tipp = r2;
if (s2 < 0) tipp = 0;
ress = s2;
}

float mxx = 0;
struct arr {
int num;
int ans;
float res;
};
arr work[100000];

struct cmp {
bool operator()(const arr &a,const arr&b) {
return a.res>b.res;
}
};

void test() {
float *matrixW1Dao = (float*)calloc(dimensionC * dimension * window, sizeof(float));
float *matrixB1Dao = (float*)calloc(dimensionC, sizeof(float));
float *r = (float *)calloc(dimensionC, sizeof(float));
std::vector<int> ans;
float res = 0;
int re1 = testtrainLists.size();
ans.clear();
int tot=0;
for (int k1 = 0; k1 < re1; k1++) {
int i=k1;
test(testtrainLists[i], testPositionE1[i], testPositionE2[i], testtrainLength[i], testheadList[i], testtailList[i], testrelationList[i], res, r);
work[tot].num = 8001+k1;
work[tot].ans = tipp;
work[tot].res = ress;
tot++;
}
res = 0;
float mxxx=0;
for (int i = 0; i <tot;i++) {
if (work[i].ans==0) continue;
if (work[i].ans == testrelationList[work[i].num-8001]) res+=1;
mxxx+=1;
}
mxxx = res / mxxx;
float mxxy = res / 2263;
mxxx = 2/((1/mxxx)+(1/mxxy));
printf("%.6f\n: ",mxxx);
if (mxxx>mxx) {
mxx=mxxx;
FILE *out=fopen("out/output.txt","w");
for (int i=re1-1;i>=0;i--)
fprintf(out, "%d %s\n", work[i].num, nam[work[i].ans].c_str());
fclose(out);
}

free(matrixW1Dao);
free(matrixB1Dao);
free(r);
}

#endif
Loading

0 comments on commit 5674972

Please sign in to comment.