Capture more exceptions during runs, collect results by error condition

- Also print a summary at the end.
This commit is contained in:
Cassowary Rusnov 2025-02-14 10:00:58 -08:00
parent 12d52179fd
commit 3519233887
2 changed files with 50 additions and 17 deletions

View File

@ -4,6 +4,8 @@
#
# TODO
# - keep track of result status, indicate how many and which ones failed with an error, failed with a return code, and
# succeeded, print at end.
# - keep track of previous command outputs for last command, and any previous command with target list and command line
# - calling commands and alieses from command line
# - tagging and filtering commands (so #noupgrade gets a tag, and we can filter them)
@ -23,7 +25,8 @@
# - implement various commented commands in the command list
# - implement interactive alias system
# - Catch more exceptions in fabtools, and also add retries
# - Make the runner aware of multiple commands so that it can combine outputs and make 'overall success' or 'overall failure'
# - make C-c break the connections not the program
import argparse
import datetime

View File

@ -1,22 +1,39 @@
import socket
import time
from enum import Enum
from pathlib import Path
from threading import Thread, Lock
from tqdm import tqdm
from fabric2 import ThreadingGroup, SerialGroup, Config, Connection
from threading import Lock, Thread
import paramiko
from fabric2 import Config, Connection, SerialGroup, ThreadingGroup
from tqdm import tqdm
class RunResult(Enum):
Success = 0
CommandFailure = 1
Error = 2
Cancelled = 3
Timeout = 4 # FIXME support host timeouts
def thread_run(connection, command, result_lock, result_queue):
runresult = RunResult.Success
try:
res = connection.run(command, warn=True, hide=True)
except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException) as inst:
if (res.exited != 0):
runresult = RunResult.CommandFailure
except (paramiko.ssh_exception.NoValidConnectionsError, paramiko.ssh_exception.SSHException, socket.gaierror) as inst:
res = f"Could not connect to host: {inst}"
runresult = RunResult.Error
except KeyboardInterrupt as inst:
res = f"Canceled host {inst} from C-c"
runresult = RunResult.Cancelled
except Exception as inst:
res = f"Unknown error for host: {inst}"
runresult = RunResult.Error
with result_lock:
result_queue.append((connection, res))
result_queue.append((connection, res, runresult))
# A set of hosts we can target with a series of commands and also can collect output of each command
class HostSet:
@ -58,24 +75,37 @@ class HostSet:
if ((time.time() - tupdate) > 10.0):
tupdate = time.time()
print("Still waiting on ", [thread.host for thread in threads])
# FIXME have the thread killed if it's still waiting after $TIMEOUT
prog.close()
# Gather up results by output
gathered = {}
for connection, result in resq:
success = []
error = []
commandfail = []
for connection, result, runresult in resq:
# print(connection, result, runresult)
rstr = str(result)
if not rstr in gathered:
gathered[rstr] = []
gathered[rstr].append(connection)
if runresult in (RunResult.Error, RunResult.Cancelled):
error.append(connection)
elif runresult == RunResult.CommandFailure:
commandfail.append(connection)
else:
success.append(connection)
# display results
for result, connections in gathered.items():
print('-----> [{}]'.format(' '.join(connection.original_host for connection in connections)))
print(result)
# ## gather_output
# for connection in self.connections:
# # import pdb
# # pdb.set_trace()
# res = connection.run(command)
# print('---- {} ----'.format(connection.original_host))
# print(res)
print('#####> RUN SUMMARY <#####')
if (success):
print(' Succeeded: ', ', '.join(connection.original_host for connection in success))
if (commandfail):
print(' Command fail: ', ', '.join(connection.original_host for connection in commandfail))
if (error):
print('Failed with Error: ', ','.join(connection.original_host for connection in error))