From 35192338872388f05156b26217b7e00c4dbde301 Mon Sep 17 00:00:00 2001 From: Cassowary Date: Fri, 14 Feb 2025 10:00:58 -0800 Subject: [PATCH] Capture more exceptions during runs, collect results by error condition - Also print a summary at the end. --- multiball/__main__.py | 5 +++- multiball/fabtools.py | 62 ++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/multiball/__main__.py b/multiball/__main__.py index 7fd834a..aacfe7c 100644 --- a/multiball/__main__.py +++ b/multiball/__main__.py @@ -4,6 +4,8 @@ # # TODO +# - keep track of result status, indicate how many and which ones failed with an error, failed with a return code, and +# succeeded, print at end. # - keep track of previous command outputs for last command, and any previous command with target list and command line # - calling commands and alieses from command line # - tagging and filtering commands (so #noupgrade gets a tag, and we can filter them) @@ -23,7 +25,8 @@ # - implement various commented commands in the command list # - implement interactive alias system # - Catch more exceptions in fabtools, and also add retries - +# - Make the runner aware of multiple commands so that it can combine outputs and make 'overall success' or 'overall failure' +# - make C-c break the connections not the program import argparse import datetime diff --git a/multiball/fabtools.py b/multiball/fabtools.py index 394bfcf..7dee436 100644 --- a/multiball/fabtools.py +++ b/multiball/fabtools.py @@ -1,22 +1,39 @@ +import socket import time - +from enum import Enum from pathlib import Path -from threading import Thread, Lock - -from tqdm import tqdm - -from fabric2 import ThreadingGroup, SerialGroup, Config, Connection +from threading import Lock, Thread import paramiko +from fabric2 import Config, Connection, SerialGroup, ThreadingGroup +from tqdm import tqdm +class RunResult(Enum): + Success = 0 + CommandFailure = 1 + Error = 2 + Cancelled = 3 + Timeout = 4 # FIXME support host timeouts + def thread_run(connection, command, result_lock, result_queue): + runresult = RunResult.Success try: res = connection.run(command, warn=True, hide=True) - except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as inst: + if (res.exited != 0): + runresult = RunResult.CommandFailure + except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException, socket.gaierror) as inst: res = f"Could not connect to host: {inst}" + runresult = RunResult.Error + except KeyboardInterrupt as inst: + res = f"Canceled host {inst} from C-c" + runresult = RunResult.Cancelled + except Exception as inst: + res = f"Unknown error for host: {inst}" + runresult = RunResult.Error with result_lock: - result_queue.append((connection, res)) + result_queue.append((connection, res, runresult)) + # A set of hosts we can target with a series of commands and also can collect output of each command class HostSet: @@ -58,24 +75,37 @@ class HostSet: if ((time.time() - tupdate) > 10.0): tupdate = time.time() print("Still waiting on ", [thread.host for thread in threads]) + # FIXME have the thread killed if it's still waiting after $TIMEOUT prog.close() # Gather up results by output gathered = {} - for connection, result in resq: + success = [] + error = [] + commandfail = [] + + for connection, result, runresult in resq: + # print(connection, result, runresult) rstr = str(result) if not rstr in gathered: gathered[rstr] = [] gathered[rstr].append(connection) + if runresult in (RunResult.Error, RunResult.Cancelled): + error.append(connection) + elif runresult == RunResult.CommandFailure: + commandfail.append(connection) + else: + success.append(connection) # display results for result, connections in gathered.items(): print('-----> [{}]'.format(' '.join(connection.original_host for connection in connections))) print(result) - # ## gather_output - # for connection in self.connections: - # # import pdb - # # pdb.set_trace() - # res = connection.run(command) - # print('---- {} ----'.format(connection.original_host)) - # print(res) + + print('#####> RUN SUMMARY <#####') + if (success): + print(' Succeeded: ', ', '.join(connection.original_host for connection in success)) + if (commandfail): + print(' Command fail: ', ', '.join(connection.original_host for connection in commandfail)) + if (error): + print('Failed with Error: ', ','.join(connection.original_host for connection in error))