From: Juan Miguel Olmo Martínez Date: Thu, 19 Sep 2024 07:22:24 +0000 (+0200) Subject: mgr/callhome: management of diagnostic upload requests (#78) X-Git-Tag: testing/wip-pdonnell-testing-20260323.122957-tentacle~543 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e7e6c6362e8f8776e7d269298389a6b289948107;p=ceph-ci.git mgr/callhome: management of diagnostic upload requests (#78) Call Home stores diagnostic upload requests for 10 days Call Home does not process operations sent repeated by IBM Call Home mesh Call Home able repeat level 1 operations after 5 minutes Call Home able to repeat level2 (and upper) operations after 1 hour Resolves: rhbz#2313070 https://bugzilla.redhat.com/show_bug.cgi?id=2313070 Signed-off-by: Juan Miguel Olmo Martínez (cherry picked from commit 6e464b2ef44b4463d82407d157ede2aa16117b7d) (cherry picked from commit c593965c2762c333560f13fe20e35e4412204105) --- diff --git a/src/pybind/mgr/call_home_agent/module.py b/src/pybind/mgr/call_home_agent/module.py index a24ec10cba2..bcac9f9bc54 100644 --- a/src/pybind/mgr/call_home_agent/module.py +++ b/src/pybind/mgr/call_home_agent/module.py @@ -626,7 +626,8 @@ def notProcessed(item: dict) -> bool: return not_processed -def add_operation(item: dict, event_id: str = '') -> str: +def add_operation(item: dict, l1_cooling_window_seconds: int, + l2_cooling_window_seconds: int, event_id: str = '') -> str: """ Add an operation coming from an inbound request to the operation dicts. return the the key to locate the new operation in operations dict @@ -695,18 +696,26 @@ def add_operation(item: dict, event_id: str = '') -> str: return key # reject UPLOAD SNAP operations with same level than other being processed + # if they are inside the cooling window time interval for the level for op_key, op in operations.items(): if op['type'] == UPLOAD_SNAP and op['level'] == item['options']['level']: - operations[key] = item['options'] - operations[key]['type']= item['operation'] - operations[key]['status'] = OPERATION_STATUS_REQUEST_REJECTED - operations[key]['progress'] = 0 - operations[key]['description'] = f"Operations ({key}): <{item['operation']}> operation\ - <{item['options']['pmr']}>:There is another operation with identifier {op_key}\ - which has the same level and is already being processed" - operations[key]['status_sent'] = ST_NOT_SENT - operations[key]['event_id'] = event_id - return key + # we have another log upload operation for same level + # verify if it is inside the "cooling window for the level" + if op['level'] == 1: + cooling_window_seconds = l1_cooling_window_seconds + else: + cooling_window_seconds = l2_cooling_window_seconds + if int(time.time() - op['created']) <= cooling_window_seconds: + operations[key] = item['options'] + operations[key]['type']= item['operation'] + operations[key]['status'] = OPERATION_STATUS_REQUEST_REJECTED + operations[key]['progress'] = 0 + operations[key]['description'] = f"Operations ({key}): <{item['operation']}> operation\ + <{item['options']['pmr']}>:There is another operation with identifier {op_key}\ + which has the same level and is already being processed" + operations[key]['status_sent'] = ST_NOT_SENT + operations[key]['event_id'] = event_id + return key #reject UPLOAD SNAP operations with same si_requestid than other being processed if item['options']['si_requestid'] in operations.keys(): @@ -874,8 +883,13 @@ class Report: self.mgr.log.info(f"Operations: Added confirm response operation for {item}") # Add the operation to operations dict - key = add_operation(item, event_id) + key = add_operation(item, + self.mgr.level_one_upload_cooling_window_seconds, + self.mgr.level_two_upload_cooling_window_seconds, + event_id) self.mgr.log.info(f"Operations: Added operation {item}") + else: + self.mgr.log.info(f"Operations: Rejected already processed operation with SI request id = {item.get('options', {}).get('si_requestid', '')}") except Exception as ex: self.mgr.log.error(f"Operations: error: {ex} adding {item}") @@ -1114,10 +1128,22 @@ class CallHomeAgent(MgrModule): desc='Password obtained from the IBM Transfer ID service' ), Option( - name='upload_snap_cooling_window_seconds', + name='upload_ops_persistence_seconds', + type='int', + default=864000, + desc='Time interval during which requests with same SI request ID will not be processed' + ), + Option( + name='level_one_upload_cooling_window_seconds', + type='int', + default=300, + desc='Time interval needed to pass before a new diagnostics upload operation level one will be accepted' + ), + Option( + name='level_two_upload_cooling_window_seconds', type='int', - default=86400, - desc='Discard operations with the same Storage Insigths request id during the cooling window' + default=3600, + desc='Time interval needed to pass before a new diagnostics upload operation level two(or upper) will be accepted' ), ] @@ -1184,7 +1210,9 @@ class CallHomeAgent(MgrModule): # Other options not using env vars self.valid_container_registry = self.get_module_option('valid_container_registry') - self.upload_snap_cooling_window_seconds = self.get_module_option('upload_snap_cooling_window_seconds') + self.upload_ops_persistence_seconds = self.get_module_option('upload_ops_persistence_seconds') + self.level_one_upload_cooling_window_seconds = self.get_module_option('level_one_upload_cooling_window_seconds') + self.level_two_upload_cooling_window_seconds = self.get_module_option('level_two_upload_cooling_window_seconds') # ecurep options: self.ecurep_url = self.get_module_option('ecurep_url') @@ -1380,7 +1408,7 @@ class CallHomeAgent(MgrModule): # Do not delete operations inside the upload snap cooling window if operations[operation_key]['type'] == UPLOAD_SNAP and 'created' in operations[operation_key].keys(): - if int(time.time() - operations[operation_key]['created']) <= self.upload_snap_cooling_window_seconds: + if int(time.time() - operations[operation_key]['created']) <= self.upload_ops_persistence_seconds: continue self.log.info(f'Operations ({operation_key}): Removed finished <{operations[operation_key]["type"]}> operation with status <{operations[operation_key]["status"]}>') @@ -1712,7 +1740,9 @@ class CallHomeAgent(MgrModule): 'enable_status': 'true', 'version': 1} } - key = add_operation(request) + key = add_operation(request, + self.level_one_upload_cooling_window_seconds, + self.level_two_upload_cooling_window_seconds) except Exception as ex: return HandleCommandResult(stderr=str(ex))