Capture more exceptions during runs, collect results by error condition

- Also print a summary at the end.
This commit is contained in:
Cassowary Rusnov 2025-02-14 10:00:58 -08:00
parent 12d52179fd
commit 3519233887
2 changed files with 50 additions and 17 deletions

View File

@ -4,6 +4,8 @@
# #
# TODO # TODO
# - keep track of result status, indicate how many and which ones failed with an error, failed with a return code, and
# succeeded, print at end.
# - keep track of previous command outputs for last command, and any previous command with target list and command line # - keep track of previous command outputs for last command, and any previous command with target list and command line
# - calling commands and alieses from command line # - calling commands and alieses from command line
# - tagging and filtering commands (so #noupgrade gets a tag, and we can filter them) # - tagging and filtering commands (so #noupgrade gets a tag, and we can filter them)
@ -23,7 +25,8 @@
# - implement various commented commands in the command list # - implement various commented commands in the command list
# - implement interactive alias system # - implement interactive alias system
# - Catch more exceptions in fabtools, and also add retries # - Catch more exceptions in fabtools, and also add retries
# - Make the runner aware of multiple commands so that it can combine outputs and make 'overall success' or 'overall failure'
# - make C-c break the connections not the program
import argparse import argparse
import datetime import datetime

View File

@ -1,22 +1,39 @@
import socket
import time import time
from enum import Enum
from pathlib import Path from pathlib import Path
from threading import Thread, Lock from threading import Lock, Thread
from tqdm import tqdm
from fabric2 import ThreadingGroup, SerialGroup, Config, Connection
import paramiko import paramiko
from fabric2 import Config, Connection, SerialGroup, ThreadingGroup
from tqdm import tqdm
class RunResult(Enum):
Success = 0
CommandFailure = 1
Error = 2
Cancelled = 3
Timeout = 4 # FIXME support host timeouts
def thread_run(connection, command, result_lock, result_queue): def thread_run(connection, command, result_lock, result_queue):
runresult = RunResult.Success
try: try:
res = connection.run(command, warn=True, hide=True) res = connection.run(command, warn=True, hide=True)
except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as inst: if (res.exited != 0):
runresult = RunResult.CommandFailure
except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException, socket.gaierror) as inst:
res = f"Could not connect to host: {inst}" res = f"Could not connect to host: {inst}"
runresult = RunResult.Error
except KeyboardInterrupt as inst:
res = f"Canceled host {inst} from C-c"
runresult = RunResult.Cancelled
except Exception as inst:
res = f"Unknown error for host: {inst}"
runresult = RunResult.Error
with result_lock: with result_lock:
result_queue.append((connection, res)) result_queue.append((connection, res, runresult))
# A set of hosts we can target with a series of commands and also can collect output of each command # A set of hosts we can target with a series of commands and also can collect output of each command
class HostSet: class HostSet:
@ -58,24 +75,37 @@ class HostSet:
if ((time.time() - tupdate) > 10.0): if ((time.time() - tupdate) > 10.0):
tupdate = time.time() tupdate = time.time()
print("Still waiting on ", [thread.host for thread in threads]) print("Still waiting on ", [thread.host for thread in threads])
# FIXME have the thread killed if it's still waiting after $TIMEOUT
prog.close() prog.close()
# Gather up results by output # Gather up results by output
gathered = {} gathered = {}
for connection, result in resq: success = []
error = []
commandfail = []
for connection, result, runresult in resq:
# print(connection, result, runresult)
rstr = str(result) rstr = str(result)
if not rstr in gathered: if not rstr in gathered:
gathered[rstr] = [] gathered[rstr] = []
gathered[rstr].append(connection) gathered[rstr].append(connection)
if runresult in (RunResult.Error, RunResult.Cancelled):
error.append(connection)
elif runresult == RunResult.CommandFailure:
commandfail.append(connection)
else:
success.append(connection)
# display results # display results
for result, connections in gathered.items(): for result, connections in gathered.items():
print('-----> [{}]'.format(' '.join(connection.original_host for connection in connections))) print('-----> [{}]'.format(' '.join(connection.original_host for connection in connections)))
print(result) print(result)
# ## gather_output
# for connection in self.connections: print('#####> RUN SUMMARY <#####')
# # import pdb if (success):
# # pdb.set_trace() print(' Succeeded: ', ', '.join(connection.original_host for connection in success))
# res = connection.run(command) if (commandfail):
# print('---- {} ----'.format(connection.original_host)) print(' Command fail: ', ', '.join(connection.original_host for connection in commandfail))
# print(res) if (error):
print('Failed with Error: ', ','.join(connection.original_host for connection in error))