Capture more exceptions during runs, collect results by error condition

- Also print a summary at the end.
2025-02-14 10:00:58 -08:00
parent 12d52179fd
commit 3519233887
2 changed files with 50 additions and 17 deletions
--- a/multiball/main.py
+++ b/multiball/main.py
@ -4,6 +4,8 @@
 #
 # TODO
 # - keep track of result status, indicate how many and which ones failed with an error, failed with a return code, and
 #   succeeded, print at end.
 # - keep track of previous command outputs for last command, and any previous command with target list and command line
 # - calling commands and alieses from command line
 # - tagging and filtering commands (so #noupgrade gets a tag, and we can filter them)
@ -23,7 +25,8 @@
 # - implement various commented commands in the command list
 # - implement interactive alias system
 # - Catch more exceptions in fabtools, and also add retries
-
+# - Make the runner aware of multiple commands so that it can combine outputs and make 'overall success' or 'overall failure'
 # - make C-c break the connections not the program
 import argparse
 import datetime
--- a/multiball/fabtools.py
+++ b/multiball/fabtools.py
@ -1,22 +1,39 @@
 import socket
 import time
-
+from enum import Enum
 from pathlib import Path
-from threading import Thread, Lock
+from threading import Lock, Thread
 from tqdm import tqdm
 from fabric2 import ThreadingGroup, SerialGroup, Config, Connection
 import paramiko
 from fabric2 import Config, Connection, SerialGroup, ThreadingGroup
 from tqdm import tqdm
 class RunResult(Enum):
    Success = 0
    CommandFailure = 1
    Error = 2
    Cancelled = 3
    Timeout = 4 # FIXME support host timeouts
 def thread_run(connection, command, result_lock, result_queue):
    runresult = RunResult.Success
    try:
        res = connection.run(command, warn=True, hide=True)
-    except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as inst:
+        if (res.exited != 0):
            runresult = RunResult.CommandFailure
    except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException, socket.gaierror) as inst:
        res = f"Could not connect to host: {inst}"
        runresult = RunResult.Error
    except KeyboardInterrupt as inst:
        res = f"Canceled host {inst} from C-c"
        runresult = RunResult.Cancelled
    except Exception as inst:
        res = f"Unknown error for host: {inst}"
        runresult = RunResult.Error
    with result_lock:
-        result_queue.append((connection, res))
+        result_queue.append((connection, res, runresult))
 # A set of hosts we can target with a series of commands and also can collect output of each command
 class HostSet:
@ -58,24 +75,37 @@ class HostSet:
                if ((time.time() - tupdate) > 10.0):
                    tupdate = time.time()
                    print("Still waiting on ", [thread.host for thread in threads])
                    # FIXME have the thread killed if it's still waiting after $TIMEOUT
        prog.close()
        # Gather up results by output
        gathered = {}
-        for connection, result in resq:
+        success = []
        error = []
        commandfail = []
        for connection, result, runresult in resq:
            # print(connection, result, runresult)
            rstr = str(result)
            if not rstr in gathered:
                gathered[rstr] = []
            gathered[rstr].append(connection)
            if runresult in (RunResult.Error, RunResult.Cancelled):
                error.append(connection)
            elif runresult == RunResult.CommandFailure:
                commandfail.append(connection)
            else:
                success.append(connection)
        # display results
        for result, connections in gathered.items():
            print('-----> [{}]'.format(' '.join(connection.original_host for connection in connections)))
            print(result)
-        # ## gather_output
+
-        # for connection in self.connections:
+        print('#####> RUN SUMMARY <#####')
-        #     # import pdb
+        if (success):
-        #     # pdb.set_trace()
+            print('        Succeeded: ', ', '.join(connection.original_host for connection in success))
-        #     res = connection.run(command)
+        if (commandfail):
-        #     print('---- {} ----'.format(connection.original_host))
+            print('     Command fail: ', ', '.join(connection.original_host for connection in commandfail))
-        #     print(res)
+        if (error):
            print('Failed with Error: ', ','.join(connection.original_host for connection in error))