@@ -141,86 +141,37 @@ def efu_get_version(self, ipaddr, port):
141141 self .dprint ("connection reset (by peer?)" )
142142 return "connection reset (by peer?)"
143143
144+
144145 def check_fw_pipeline (self , ipaddr , port ):
145146 if self .test :
146147 return 5
147-
148- retry_attempts = 3
149- retry_delay = 2
150- last_exception = None
151-
152- for attempt in range (retry_attempts ):
153- s = None
154- try :
155- s = socket .socket (socket .AF_INET , socket .SOCK_STREAM )
156-
157- s .settimeout (3.0 )
158- s .connect ((ipaddr , port ))
159-
160- s .settimeout (20.0 )
161- s .shutdown (socket .SHUT_WR )
162-
163- buffer = ""
164- while True :
165- try :
166- chunk = s .recv (4096 ).decode ('utf-8' , errors = 'ignore' )
167- if not chunk :
168- break
169- buffer += chunk
170- except socket .timeout :
171- break
172-
173- buffer = buffer .strip ()
174- if not buffer :
175- raise ConnectionError ("No data received from socket" )
176-
177- all_objects = []
178- decoder = json .JSONDecoder ()
179- pos = 0
180- while pos < len (buffer ):
181- try :
182- obj , pos = decoder .raw_decode (buffer , pos )
183- all_objects .append (obj )
184- except json .JSONDecodeError :
185- break
186-
187- if not all_objects :
188- raise ValueError ("Could not parse any JSON objects from received data" )
189-
190- found_val = None
191- def find_in_obj (target ):
192- nonlocal found_val
193- if isinstance (target , list ):
194- for item in reversed (target ):
195- find_in_obj (item )
196- if found_val is not None : return
197- elif isinstance (target , dict ):
198- for key , value in target .items ():
199- if key .endswith (".worker_state" ):
200- found_val = int (value )
201- return
202-
203- find_in_obj (all_objects )
204-
205- if found_val is not None :
206- return found_val
207-
208- print (f"worker_state not found for { ipaddr } :{ port } . No further retries for this cycle." )
209- return - 1
210-
211- except (OSError , ConnectionError , ValueError ) as e :
212- last_exception = e
213- print (f"Attempt { attempt + 1 } /{ retry_attempts } for { ipaddr } :{ port } failed: { e } " )
214- if attempt < retry_attempts - 1 :
215- print (f"Retrying in { retry_delay } seconds..." )
216- time .sleep (retry_delay )
217-
218- finally :
219- if s :
220- s .close ()
221-
222- print (f"All { retry_attempts } attempts failed for { ipaddr } :{ port } . Last error: { last_exception } " )
223- return - 1
148+ try :
149+ s = socket .socket (socket .AF_INET , socket .SOCK_STREAM )
150+ s .settimeout (3.0 )
151+ s .connect ((ipaddr , port ))
152+
153+ s .settimeout (20.0 )
154+ s .shutdown (socket .SHUT_WR )
155+
156+ data = b""
157+ while True :
158+ chunk = s .recv (4096 )
159+ if not chunk :
160+ break
161+ data += chunk
162+ lines = data .decode ("utf-8" , errors = "ignore" ).strip ().splitlines ()
163+ for line in lines :
164+ try :
165+ obj = json .loads (line )
166+ for key , value in obj .items ():
167+ if key .endswith (".worker_state" ):
168+ return int (value )
169+ except Exception as e :
170+ self .dprint (f"JSON parse error: { e } " )
171+ return - 1
172+ except OSError as e :
173+ self .dprint (f"Socket error: { e } " )
174+ return - 1
224175
225176 def check_efu_pipeline (self , ipaddr , port ):
226177 if self .test :
@@ -262,12 +213,13 @@ def check_service(self, idx, type, ipaddr, port):
262213 self .lab .servers [idx ][9 ] = self .efu_get_version (ipaddr , port )
263214 elif type == type_fw :
264215 status = self .check_fw_pipeline (ipaddr , port )
265- if status == - 1 or status == 0 :
216+ if status :
266217 self .lab .clearstatus (
267218 idx , self .s_stage1 | self .s_stage2 | self .s_stage3
268219 )
269220 else :
270- self .lab .setstatus (idx , status )
221+ # If there is no status we change it to "server running"
222+ self .lab .setstatus (idx , 0 )
271223 else :
272224 self .lab .setstatus (idx , self .s_stage1 | self .s_stage2 | self .s_stage3 )
273225 else :
0 commit comments