-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractcodevsdata.js
107 lines (87 loc) · 2.68 KB
/
extractcodevsdata.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
var d3 = require('d3'),
fs = require('fs');
var output = [];
var writeFinalData = function(data){
var finalData = d3.tsv.format(data);
fs.writeFile('output/output_codevsdata_users.tsv', finalData, function(err) {
if(err) {
return console.log(err);
}
console.log("The file was saved!");
});
}
var extensions = fs.readFileSync('data/extensions.tsv','utf8')
extensions = d3.tsv.parse(extensions)
var exList = extensions.map(function(d){
return d.ex;
})
var dir = 'output/users_top_repos_trees/';
fs.readdir(dir,function(err,files){
if (err) throw err;
var count = 0;
var total = files.length;
//var total = 5;
files.forEach(function(file,i){
fs.readFile(dir+file,'utf-8',function(err,data){
var repoData = JSON.parse(data);
var filesData = repoData.filter(function(d){
return d.type == 'blob'
})
var codeFile = 0,
dataFile = 0;
filesData.forEach(function(d){
var ex = d.path.split('.')
if(ex.length > 0){
ex = ex[ex.length-1].toLowerCase();
if(exList.indexOf(ex) > -1){
dataFile++
// var elm = {
// user:d.url.split('/')[4],
// repo:d.url.split('/')[5],
// path:d.path,
// extension:ex,
// size:d.size
// }
//output.push(elm);
// count++
// console.log(count + '/' + total + ' ' + file)
// if(count == total){
// writeFinalData(output);
// }
}else{
codeFile++
// count++
// console.log(count + '/' + total + ' ' + file)
// if(count == total){
// writeFinalData(output);
// }
}
}else{
codeFile++
// count++
// console.log(count + '/' + total + ' ' + file)
// if(count == total){
// writeFinalData(output);
// }
}
})
var user = file.split('_')[0],
repo = file.split('_');
repo.shift();
repo = repo.join('_').replace('.json','');
var elm = {
user:user,
repo:repo,
codeFile: codeFile,
dataFile: dataFile
}
output.push(elm);
count++
console.log(count + '/' + total + ' ' + file)
if(count == total){
writeFinalData(output);
//break;
}
});
});
});