1
1
import query
2
+ import time
2
3
import threading
3
4
import tempfile
4
5
import os
@@ -92,15 +93,15 @@ def check_ssh_with_certs(hostname=None):
92
93
result = subprocess .check_output (["ssh" , "-i" , keypath , "root@%s.%s" % (hostname , config .external_domain ), "echo confirmed" ], env = env )
93
94
except subprocess .CalledProcessError as e :
94
95
command .fail ("ssh check failed: %s" % e )
95
- if result != "confirmed" :
96
- command .fail ("unexpected result from ssh check" )
96
+ if result != b "confirmed\n " :
97
+ command .fail ("unexpected result from ssh check: '%s'" % result . decode () )
97
98
print ("ssh access confirmed!" )
98
99
99
100
100
101
def check_etcd_health ():
101
102
config = configuration .Config .load_from_project ()
102
103
result = access .call_etcdctl (["cluster-health" ], return_result = True )
103
- lines = result .strip ().split ("\n " )
104
+ lines = result .strip ().decode (). split ("\n " )
104
105
if lines .pop () != "cluster is healthy" :
105
106
command .fail ("cluster did not report as healthy!" )
106
107
member_ids = []
@@ -118,7 +119,7 @@ def check_etcd_health():
118
119
command .fail ("did not find expected healthy result info for: %s" % server_name )
119
120
member_ids .append (line .split (" " )[1 ])
120
121
121
- result = access .call_etcdctl (["member" , "list" ], return_result = True )
122
+ result = access .call_etcdctl (["member" , "list" ], return_result = True ). decode ()
122
123
found_member_ids = []
123
124
servers = []
124
125
leader_count = 0
@@ -143,8 +144,8 @@ def check_etcd_health():
143
144
if leader_count != 1 :
144
145
command .fail ("wrong number of leaders" )
145
146
146
- if sorted (servers ) != sorted (node .hostname for node in config .nodes ):
147
- command .fail ("invalid detected set of servers" )
147
+ if sorted (servers ) != sorted (node .hostname for node in config .nodes if node . kind == "master" ):
148
+ command .fail ("invalid detected set of servers: %s" % servers )
148
149
149
150
if member_ids != found_member_ids :
150
151
command .fail ("member id list mismatch" )
@@ -154,11 +155,11 @@ def check_etcd_health():
154
155
155
156
def get_kubectl_json (* params : str ):
156
157
raw = access .call_kubectl (list (params ) + ["-o" , "json" ], return_result = True )
157
- return json .loads (raw )
158
+ return json .loads (raw . decode () )
158
159
159
160
160
161
def check_kube_health ():
161
- expected_kubernetes_version = "v1.7.2 "
162
+ expected_kubernetes_version = "v1.8.0 "
162
163
config = configuration .Config .load_from_project ()
163
164
164
165
# verify nodes
@@ -178,16 +179,16 @@ def check_kube_health():
178
179
nodeID = node ["spec" ]["externalID" ]
179
180
if nodeID not in nodes_remaining :
180
181
command .fail ("invalid or duplicate node: %s" % nodeID )
181
- node = nodes_remaining [nodeID ]
182
+ node_obj = nodes_remaining [nodeID ]
182
183
del nodes_remaining [nodeID ]
183
- if node .kind == "master" :
184
+ if node_obj .kind == "master" :
184
185
if node ["spec" ].get ("unschedulable" , None ) is not True :
185
186
command .fail ("expected master node to be unschedulable" )
186
187
else :
187
- assert node .kind == "worker"
188
+ assert node_obj .kind == "worker"
188
189
if node ["spec" ].get ("unschedulable" , None ):
189
190
command .fail ("expected worker node to be schedulable" )
190
- conditions = {condobj . type : condobj . status for condobj in node ["status" ]["conditions" ]}
191
+ conditions = {condobj [ " type" ] : condobj [ " status" ] for condobj in node ["status" ]["conditions" ]}
191
192
if conditions ["DiskPressure" ] != "False" :
192
193
command .fail ("expected no disk pressure" )
193
194
if conditions ["MemoryPressure" ] != "False" :
@@ -250,14 +251,14 @@ def check_aci_pull():
250
251
container_command = "ping -c 1 8.8.8.8 && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'"
251
252
server_command = ["rkt" , "run" , "--pull-policy=update" , "homeworld.mit.edu/debian" , "--exec" , "/bin/bash" , "--" , "-c" ,
252
253
setup .escape_shell (container_command )]
253
- results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker , config .external_domain ), "--" ] + server_command )
254
- last_line = results .strip ().split (b"\n " )[- 1 ]
254
+ results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker . hostname , config .external_domain ), "--" ] + server_command )
255
+ last_line = results .replace ( b" \r \n " , b" \n " ). replace ( b" \0 " , b'' ). strip ().split (b"\n " )[- 1 ]
255
256
if b"PING RESULT FAIL" in last_line :
256
257
if b"PING RESULT SUCCESS" in last_line :
257
258
command .fail ("should not have seen both success and failure markers in last line" )
258
259
command .fail ("cluster network probably not up (could not ping 8.8.8.8)" )
259
260
elif b"PING RESULT SUCCESS" not in last_line :
260
- command .fail ("container does not seem to have launched properly; container launches are likely broken" )
261
+ command .fail ("container does not seem to have launched properly; container launches are likely broken (line = %s)" % repr ( last_line ) )
261
262
print ("container seems to be launched, with the correct network!" )
262
263
263
264
@@ -288,13 +289,11 @@ def check_flannel_kubeinfo():
288
289
if pod ["status" ]["phase" ] != "Running" :
289
290
command .fail ("pod was not running: %s: %s" % (name , pod ["status" ]["phase" ]))
290
291
291
- conditions = {condobj . type : condobj . status for condobj in pod ["status" ]["conditions" ]}
292
+ conditions = {condobj [ " type" ] : condobj [ " status" ] for condobj in pod ["status" ]["conditions" ]}
292
293
if conditions ["Initialized" ] != "True" :
293
294
command .fail ("pod not yet initialized" )
294
295
if conditions ["Ready" ] != "True" :
295
296
command .fail ("pod not yet ready" )
296
- if conditions ["PodScheduled" ] != "True" :
297
- command .fail ("pod not yet scheduled" )
298
297
299
298
if len (pod ["status" ]["containerStatuses" ]) != 1 :
300
299
command .fail ("expected only one container" )
@@ -330,41 +329,41 @@ def check_flannel_function():
330
329
331
330
def listen ():
332
331
try :
333
- container_command = "ip -o addr show dev eth0 to 172.18/16 primary && sleep 5"
334
- server_command = ["rkt" , "run" , "--net=rkt.kubernetes.io" , "homeworld.mit.edu/debian" , "--" , "-c" , container_command ]
335
- cmd = ["ssh" , "root@%s.%s" % (worker_listener , config .external_domain ), "--" ] + server_command
336
- with subprocess .Popen (cmd ) as process :
337
- stdout , stderr = process .communicate (None , timeout = 1 )
338
- if stderr :
339
- command .fail ("found data on stderr from trying to run ip addr: '%s'" % repr (stderr .decode ()))
340
- if b"scope" not in stdout :
341
- command .fail ("could not find scope line in ip addr output" )
342
- parts = stdout .split (b" " )
343
- if b"inet" not in parts :
332
+ container_command = "ip -o addr show dev eth0 to 172.18/16 primary && sleep 15"
333
+ server_command = ["rkt" , "run" , "--net=rkt.kubernetes.io" , "homeworld.mit.edu/debian" , "--" , "-c" , setup .escape_shell (container_command )]
334
+ cmd = ["ssh" , "root@%s.%s" % (worker_listener .hostname , config .external_domain ), "--" ] + server_command
335
+ with subprocess .Popen (cmd , stdout = subprocess .PIPE , bufsize = 1 , universal_newlines = True ) as process :
336
+ stdout = process .stdout .readline ()
337
+ if "scope" not in stdout :
338
+ command .fail ("could not find scope line in ip addr output (%s)" % repr (stdout ))
339
+ parts = stdout .split (" " )
340
+ if "inet" not in parts :
344
341
command .fail ("could not find inet address in ip addr output" )
345
- address = parts [parts .index (b"inet" ) + 1 ]
346
- if not address .endswith (b"/32" ):
347
- command .fail ("expected address that ended in /32, not '%s'" % address .decode ())
348
- if address .count (b"." ) != 3 :
349
- command .fail ("expected valid IPv4 address" )
350
- if not address .decode ().replace ("." , "" ).isdigit ():
351
- command .fail ("expected valid IPv4 address" )
342
+ address = parts [parts .index ("inet" ) + 1 ]
343
+ if not address .endswith ("/24" ):
344
+ command .fail ("expected address that ended in /24, not '%s'" % address )
345
+ address = address [:- 3 ]
346
+ if address .count ("." ) != 3 :
347
+ command .fail ("expected valid IPv4 address, not '%s'" % address )
348
+ if not address .replace ("." , "" ).isdigit ():
349
+ command .fail ("expected valid IPv4 address, not '%s'" % address )
352
350
found_address [0 ] = address
353
351
event .set ()
352
+ process .communicate (timeout = 20 )
354
353
finally :
355
354
event .set ()
356
355
return True
357
356
358
357
def talk ():
359
- if not event .wait (5 ):
358
+ if not event .wait (25 ):
360
359
command .fail ("timed out while waiting for IPv4 address of listener" )
361
360
address = found_address [0 ]
362
361
if address is None :
363
362
command .fail ("no address was specified by listener" )
364
363
container_command = "ping -c 1 %s && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'" % address
365
364
server_command = ["rkt" , "run" , "homeworld.mit.edu/debian" , "--exec" , "/bin/bash" , "--" , "-c" , setup .escape_shell (container_command )]
366
- results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker_talker , config .external_domain ), "--" ] + server_command )
367
- last_line = results .strip ().split (b"\n " )[- 1 ]
365
+ results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker_talker . hostname , config .external_domain ), "--" ] + server_command )
366
+ last_line = results .replace ( b" \r \n " , b" \n " ). replace ( b" \0 " , b'' ). strip ().split (b"\n " )[- 1 ]
368
367
if b"PING RESULT FAIL" in last_line :
369
368
command .fail ("was not able to ping the target container; is flannel working?" )
370
369
elif b"PING RESULT SUCCESS" not in last_line :
@@ -401,7 +400,7 @@ def check_dns_kubeinfo():
401
400
if pod ["status" ]["phase" ] != "Running" :
402
401
command .fail ("pod was not running: %s: %s" % (name , pod ["status" ]["phase" ]))
403
402
404
- conditions = {condobj . type : condobj . status for condobj in pod ["status" ]["conditions" ]}
403
+ conditions = {condobj [ " type" ] : condobj [ " status" ] for condobj in pod ["status" ]["conditions" ]}
405
404
if conditions ["Initialized" ] != "True" :
406
405
command .fail ("pod not yet initialized" )
407
406
if conditions ["Ready" ] != "True" :
@@ -434,10 +433,10 @@ def check_dns_function():
434
433
435
434
container_command = "nslookup kubernetes.default.svc.hyades.local 172.28.0.2"
436
435
server_command = ["rkt" , "run" , "homeworld.mit.edu/debian" , "--exec" , "/bin/bash" , "--" , "-c" , setup .escape_shell (container_command )]
437
- results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker , config .external_domain ), "--" ] + server_command )
438
- last_line = results .strip ().split (b"\n " )[- 1 ]
439
- if last_line != b"Address: 172.28.0.1" :
440
- command .fail ("unexpected last line: '%s' " % repr (last_line .decode ()))
436
+ results = subprocess .check_output (["ssh" , "root@%s.%s" % (worker . hostname , config .external_domain ), "--" ] + server_command )
437
+ last_line = results .replace ( b" \r \n " , b" \n " ). replace ( b" \0 " , b'' ). strip ().split (b"\n " )[- 1 ]
438
+ if not last_line . endswith ( b"Address: 172.28.0.1" ) :
439
+ command .fail ("unexpected last line: %s " % repr (last_line .decode ()))
441
440
442
441
print ("dns-addon seems to work!" )
443
442
0 commit comments