-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgetuuid4bibcode.py
67 lines (58 loc) · 1.85 KB
/
getuuid4bibcode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gzip
import sys
SORTEDYEARLIST='sortedyearlist.txt'
DEFAULTPATH='../AstroExplorer/filebibs/'
# as we are assuming python 2.6 we can use set() rather than Set()
# from sets import Set
def setupAlts(defaultpath,altfile):
dbhash={}
fd=open(defaultpath+altfile)
pairs=[line.strip().split() for line in fd.readlines()]
for ele in pairs:
dbhash[ele[0].strip()]=ele[1].strip()
fd.close()
return dbhash
def storeYears(sortedyearlistfile):
dbhash={}
for line in open(sortedyearlistfile):
year, value = line.strip().split()
if not dbhash.has_key(year):
dbhash[year]=[]
dbhash[year].append(value)
return dbhash
def openset(thefile, dbhash):
for line in gzip.open(DEFAULTPATH+thefile+".gz"):
bibcode,theuuid = line.strip().split()
dbhash[bibcode]=theuuid
def setsFromBibcodes(bibcodefile, yearhash):
#dh=setupAlts(DEFAULTPATH,'bmap.txt')
dbhash={}
bibcodehash={}
fileset=set()
bibcodesiwant=[]
for line in open(bibcodefile):
bibcode = line.strip()
bibcodesiwant.append(bibcode)
bibyear=bibcode[0:4]
filelist=yearhash[bibyear]
print "filelist",filelist
for ele in filelist:
fileset.add(ele)
print "FILESET", fileset
print "BIBCODESIWANT",bibcodesiwant
for everyfile in fileset:
print everyfile
openset(everyfile, bibcodehash)
for bcode in bibcodesiwant:
dbhash[bcode]=bibcodehash[bcode]
#if not dh.has_key(bcode):
# dbhash[bcode]=bibcodehash[bcode]
#else:
# dbhash[bcode]=bibcodehash[dh[bcode]]
return dbhash
if __name__=='__main__':
filename=sys.argv[1]
yhash=storeYears(DEFAULTPATH+SORTEDYEARLIST)
dbhash=setsFromBibcodes(filename, yhash)
for ele in dbhash.keys():
print ele, dbhash[ele]