This repository has been archived by the owner on Jun 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsim_2017_03_18.py
305 lines (278 loc) · 12.6 KB
/
sim_2017_03_18.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# simulation of the Monero selection process for different protocols
import sys, sqlite3, matplotlib, collections, itertools, bisect, os
matplotlib.use('agg')
import matplotlib.pyplot as plt
import numpy as np
__author__ = "Kevin Lee and Andrew Miller"
__maintainer__ = "Kevin Lee"
__email__ = "[email protected]"
# define some global variables
CRYPTONOTE_MINED_MONEY_UNLOCK_WINDOW = 60
CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE = 10
recent_zone = 5 * 86400
recent_ratio = 0.25
top06mo = 1500203268
rec06mo = top06mo - recent_zone
top12mo = 1515755268
rec12mo = top12mo - recent_zone
rct_start = 1484051946
amounts = []
weights = []
if 'amount_dict' not in globals():
amount_dict = collections.OrderedDict()
amount_dict_keys = collections.OrderedDict()
top_block = None
top_time = None
top_idx = {}
recent_zones = {}
time_diff = []
time_dict = collections.OrderedDict()
time_dict_keys = []
def extrap(x, xp, yp):
'''The function adds linear extrapolation to the np.interp function. It is called
in extrapolation simulations in which we want to measure top block heights in the future.
'''
y = np.interp(x, xp, yp)
y = np.where(x<xp[0], yp[0]+(x-xp[0])*(yp[0]-yp[1])/(xp[0]-xp[1]), y)
y = np.where(x>xp[-1], yp[-1]+(x-xp[-1])*(yp[-1]-yp[-2])/(xp[-1]-xp[-2]), y)
return y
def amount_frequency():
'''From the database we scraped blockchain data into, we get the denomination and frequencies
of coins from the past month (~21900 blocks). We ignore RCT transactions b/c they are not going
to be used for this set of simulations, and we ignore outputs with frequencies of less than 1000
because they are insignificant. Afterwards, the counts are used to weigh the denominations such
that the sum of weights adds up to 1.
'''
global amounts, weights
conn = sqlite3.connect('outs_2017_03_18.db')
c_1 = conn.cursor()
c_1.execute('''SELECT amount,count(*) FROM out_table WHERE block_height >= 1220516 - 21900 GROUP BY amount ORDER BY count(*) DESC''')
recent = c_1.fetchall()
for row in recent:
if row[1] > 1000 and int(row[0]) != 0:
amounts.append(int(row[0]))
weights.append(int(row[1]))
amounts = np.asarray(amounts)
weights = np.asarray(weights)
weights = np.asarray([float(i)/np.sum(weights) for i in weights])
def preprocess(is_rct):
'''Sets up the variables in the simulation. The most recent timestamp in the database is
set to be the time of our simulation, the maximum global index for the each amount in the denominations
we gleaned from the blockchain is found, as well as the global index corresponding to the recent zone
(~5 days) for each denomination. We keep track of those in our simulation. We do the same for
RCT values as well. Afterwards, we read in the recent and top zones for 6 and 12 month simulations to use.
Lastly, we read the time differences between transaction time and output creation (receive) time for the
transactions that were spent in-the-clear, because those will define our simulation behavior for choosing
the real spend.
'''
global amount_dict, top_block, top_idx, time_diff, time_dict, time_dict_keys, top_time
amount_frequency()
if len(amount_dict) == len(amounts): return
print 'Running preprocess first'
conn_1 = sqlite3.connect('outs_2017_03_18.db')
c_1 = conn_1.cursor()
if is_rct:
c_1.execute('''SELECT MAX(block_height) FROM out_table''')
top_block, = c_1.fetchone()
else:
top_block = 1220516
cmd = '''SELECT timestamp FROM out_table WHERE block_height = {ht}'''
cmd = cmd.format(ht = top_block)
c_1.execute(cmd)
timenow, = c_1.fetchone()
print 'top_block', top_block
print 'timenow', timenow
top_time = timenow
for amt in amounts:
cmd = '''SELECT MAX(g_idx) as idx from out_table WHERE amount = {denom}'''
cmd = cmd.format(denom = amt)
c_1.execute(cmd)
top = c_1.fetchone()
top_idx[amt] = top[0]
cmd = '''SELECT MIN(g_idx) as idx FROM out_table WHERE amount = {denom} AND timestamp >= {start}'''
cmd = cmd.format(denom = amt, start = timenow - recent_zone)
c_1.execute(cmd)
recent = c_1.fetchone()
recent_zones[amt] = recent[0]
cmd = '''SELECT timestamp, MAX(g_idx) FROM out_table WHERE amount={denom} GROUP BY timestamp'''
cmd = cmd.format(denom = amt)
c_1.execute(cmd)
recent = c_1.fetchall()
amount_dict_keys[amt] = []
amount_dict[amt] = {}
for row in recent:
amount_dict[amt][int(row[0])] = int(row[1])
amount_dict_keys[amt].append(int(row[0]))
cmd = '''SELECT MAX(g_idx) as idx from out_table WHERE amount = {denom}'''
cmd = cmd.format(denom = 0)
c_1.execute(cmd)
top = c_1.fetchone()
top_idx[0] = {}
top_idx[0][0] = top[0]
cmd = '''SELECT MIN(g_idx) as idx FROM out_table WHERE amount = {denom} AND timestamp >= {start}'''
cmd = cmd.format(denom = 0, start = timenow - recent_zone)
c_1.execute(cmd)
recent = c_1.fetchone()
recent_zones[0] = {}
recent_zones[0][0] = recent[0]
cmd = '''SELECT timestamp, MAX(g_idx) FROM out_table WHERE amount={denom} GROUP BY timestamp'''
cmd = cmd.format(denom = 0)
c_1.execute(cmd)
recent = c_1.fetchall()
x = np.asarray([int(row[0]) for row in recent])
y = np.asarray([int(row[1]) for row in recent])
for row in recent:
time_dict[row[0]] = row[1]
if os.path.isfile("extrap_time_dict.npz"):
npzfile = np.load("extrap_time_dict.npz")
ts = npzfile["ts"]
g = npzfile["g"]
else:
ts, g = ([] for i in range(2))
for f in range(top_time+1, top12mo+1):
ts.append(f)
done = int(extrap(f,x,y))
g.append(done)
ts = np.asarray(ts)
g = np.asarray(g)
outfile = "extrap_time_dict"
np.savez(outfile, ts = ts, g = g)
for a,b in itertools.izip(ts,g):
time_dict[a] = b
top_idx[0][6] = time_dict[top06mo]
top_idx[0][12] = time_dict[top12mo]
recent_zones[0][6] = time_dict[rec06mo]
recent_zones[0][12] = time_dict[rec12mo]
time_dict_keys = time_dict.keys()
conn = sqlite3.connect('zinput.db')
c_2 = conn.cursor()
cmd = '''SELECT tx_timestamp - mixin_timestamp as time_diff from first'''
c_2.execute(cmd)
diff = c_2.fetchall()
for row in diff:
if row[0] >= 0:
time_diff.append(row[0])
time_diff = np.asarray(time_diff)
def fetch_real_output(N, period, top_global_idx):
'''Randomly selects an output to be used as the real spend. A time difference is
selected from our list of zero-input transactions we collected from the blockchain. Next,
using the time difference, we find the closest global index if the block is in the
dictionary. If this is a RCT transaction, we also take into
account whether or not we are doing 6 month or 12 month simulations. We normalize the index
returned by dividing the real index by the top index. For instance, if the top index is Y
and the chosen index is X, we return X/Y.
'''
if N==0:
while True:
rhd = np.random.choice(time_diff)
if period == 6:
rb = top06mo - rhd
elif period == 12:
rb = top12mo - rhd
else:
rb = top_time - rhd
if rb >= rct_start:
break
closest = bisect.bisect_left(time_dict_keys, rb)
real_g_idx = time_dict[time_dict_keys[closest]]
return float(real_g_idx)/top_global_idx
else:
while True:
rhd = np.random.choice(time_diff)
rb = top_time - rhd
if rb >= 0:
break
closest = bisect.bisect_left(amount_dict_keys[N], rb)
real_g_idx = amount_dict[N][amount_dict_keys[N][closest]]
return float(real_g_idx)/top_global_idx
def sample_mixins(amount, num_mix, period, version='pre0.9', is_rct=False):
'''The mixin-sampling protocol in our simulation changes based on the version argument
that is passed into it and reflects the different protocols Monero has employed since its
inception (see https://github.com/monero-project/monero/blob/master/src/wallet/wallet2.cpp).
From a high-level, pre0.9 will select mixins from genesis to top-height uniformly,
ver0.9 will select mixins from genesis to top-height with a triangular distribution, and 0.10
will employ ver0.9 selection, but also select uniformly across a recent zone (the past 5 days),
and combine that with the other mixin candidates. The mixins are then uniformly selected from the
candidate mixins. RingCT simulations and recent zone selections are only available starting in version 0.10.
The lists for mixins, recents, and final are initialized, and a top height is selected based on
amount. The protocol expands to select more mixins than that are actually used in a
transaction, and we gauge that based on version. We normalize the mixin returned by dividing the
real index by the top index. For instance, if the top index is Y and the chosen index is X, we return X/Y.
'''
assert version in ['pre0.9','0.9','0.10']
if is_rct: assert version == '0.10', "RingCT only available after 0.10"
mixin_vector, recent_vector, final_vector = ([] for i in range(3))
top_global_idx = top_idx[amount] if not is_rct else top_idx[amount][period]
recent_idx = recent_zones[amount] if not is_rct else recent_zones[amount][period]
if (recent_idx < 0): recent_idx = 0
req = int((num_mix + 1) * 1.5) + 1
if is_rct: req += (CRYPTONOTE_MINED_MONEY_UNLOCK_WINDOW - CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE)
real = fetch_real_output(amount, period, top_global_idx)
recent_req = int(req * recent_ratio)
if(recent_req<=1):
recent_req = 1
if(recent_req> top_global_idx-recent_idx+1):
recent_req = top_global_idx-recent_idx+1
if(real>=recent_idx/float(top_global_idx)):
recent_req -= 1
if version != '0.10': recent_req = 0
num_found = 0
recent_found = 0
while (num_found < req):
while (recent_found < recent_req):
r = np.random.randint(recent_idx, top_global_idx+1)
rmixin = r/float(top_global_idx)
if (rmixin not in recent_vector and rmixin != real):
recent_vector.append(rmixin)
mixin_vector.append(rmixin)
num_found += 1
recent_found += 1
if version == 'pre0.9':
a = np.random.randint(0,top_global_idx+1)
else:
a = int(np.random.triangular(0,1,1)*top_global_idx)
amixin = a/float(top_global_idx)
if (amixin not in mixin_vector and amixin != real):
mixin_vector.append(amixin)
num_found += 1
while(num_mix!=0):
re = np.random.choice(mixin_vector)
if (re not in final_vector):
final_vector.append(re)
num_mix-=1
recent_mixins = [val for val in recent_vector if val in final_vector]
rest = filter(lambda x: x not in recent_mixins, final_vector)
return real, recent_mixins, rest
def sim(N,M,time,version='pre0.9', is_rct=False):
'''The main simulation function starts the simulations. The simulation is run N times
with M mixins chosen. We normalize the mixin returned by dividing the
real index by the top index. For instance, if the top index is Y and the chosen index is X,
we return X/Y. This is necessary because different denominations will have varying
top global indices, and our simulation needs to be generalized. We will put all the
percentages returned inside a vector and save it externally so we can run various graphing
schemes on one simulation batch in the future. The simulation is run with
sim(# of trials, # of mixins), 0 to sample all, 6 for six months, 12 for 12 months.
'''
preprocess(is_rct)
real, recents, rest = ([] for i in range(3))
for x in range(0,N):
print(x+1)
m = np.random.choice(amounts, p=weights) if is_rct == False else 0
n, p, q = sample_mixins(m,M,time, version, is_rct)
real.append(n)
recents.append(p)
rest.append(q)
real = np.asarray(real)
recents = np.asarray(recents)
rest = np.asarray(rest)
outfile = "outfile_%d_mo_%d_mixins_%s" % (time, M, version)
np.savez(outfile, real=real, recents=recents, rest=rest)
def main():
M = int(sys.argv[1])
time = int(sys.argv[2])
print 'Processing:', M, time
sim(100000, M, time, '0.10', is_rct=True)
try: __IPYTHON__
except NameError:
if __name__ == '__main__':
main()