]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/callhome: management of diagnostic upload requests (#78)
authorJuan Miguel Olmo Martínez <jolmomar@ibm.com>
Thu, 19 Sep 2024 07:22:24 +0000 (09:22 +0200)
committerJustin Caratzas <jcaratza@redhat.com>
Tue, 23 Sep 2025 13:07:09 +0000 (09:07 -0400)
Call Home stores diagnostic upload requests for 10 days
Call Home does not process operations sent repeated by IBM Call Home mesh
Call Home able repeat level 1 operations after 5 minutes
Call Home able to repeat level2 (and upper) operations after 1 hour

Resolves: rhbz#2313070
          https://bugzilla.redhat.com/show_bug.cgi?id=2313070

Signed-off-by: Juan Miguel Olmo Martínez <jolmomar@ibm.com>
(cherry picked from commit 6e464b2ef44b4463d82407d157ede2aa16117b7d)
(cherry picked from commit c593965c2762c333560f13fe20e35e4412204105)

src/pybind/mgr/call_home_agent/module.py

index a24ec10cba29bf210343125c8ed5dda8353dfaf5..bcac9f9bc54fcc1bc5874c72d39327a6396a07cb 100644 (file)
@@ -626,7 +626,8 @@ def notProcessed(item: dict) -> bool:
 
     return not_processed
 
-def add_operation(item: dict, event_id: str = '') -> str:
+def add_operation(item: dict, l1_cooling_window_seconds: int,
+                  l2_cooling_window_seconds: int, event_id: str = '') -> str:
     """
     Add an operation coming from an inbound request to the operation dicts.
     return the the key to locate the new operation in operations dict
@@ -695,18 +696,26 @@ def add_operation(item: dict, event_id: str = '') -> str:
         return key
 
     # reject UPLOAD SNAP operations with same level than other being processed
+    # if they are inside the cooling window time interval for the level
     for op_key, op in operations.items():
         if op['type'] == UPLOAD_SNAP and op['level'] == item['options']['level']:
-            operations[key] = item['options']
-            operations[key]['type']= item['operation']
-            operations[key]['status'] = OPERATION_STATUS_REQUEST_REJECTED
-            operations[key]['progress'] = 0
-            operations[key]['description'] = f"Operations ({key}): <{item['operation']}> operation\
-                 <{item['options']['pmr']}>:There is another operation with identifier {op_key}\
-                    which has the same level and is already being processed"
-            operations[key]['status_sent'] = ST_NOT_SENT
-            operations[key]['event_id'] = event_id
-            return key
+            # we have another log upload operation for same level
+            # verify if it is inside the "cooling window for the level"
+            if op['level'] == 1:
+                cooling_window_seconds = l1_cooling_window_seconds
+            else:
+                cooling_window_seconds = l2_cooling_window_seconds
+            if int(time.time() - op['created']) <= cooling_window_seconds:
+                operations[key] = item['options']
+                operations[key]['type']= item['operation']
+                operations[key]['status'] = OPERATION_STATUS_REQUEST_REJECTED
+                operations[key]['progress'] = 0
+                operations[key]['description'] = f"Operations ({key}): <{item['operation']}> operation\
+                    <{item['options']['pmr']}>:There is another operation with identifier {op_key}\
+                        which has the same level and is already being processed"
+                operations[key]['status_sent'] = ST_NOT_SENT
+                operations[key]['event_id'] = event_id
+                return key
 
     #reject UPLOAD SNAP operations with same si_requestid than other being processed
     if item['options']['si_requestid'] in operations.keys():
@@ -874,8 +883,13 @@ class Report:
                             self.mgr.log.info(f"Operations: Added confirm response operation for {item}")
 
                             # Add the operation to operations dict
-                            key = add_operation(item, event_id)
+                            key = add_operation(item,
+                                                self.mgr.level_one_upload_cooling_window_seconds,
+                                                self.mgr.level_two_upload_cooling_window_seconds,
+                                                event_id)
                             self.mgr.log.info(f"Operations: Added operation {item}")
+                        else:
+                            self.mgr.log.info(f"Operations: Rejected already processed operation with SI request id = {item.get('options', {}).get('si_requestid', '')}")
         except Exception as ex:
             self.mgr.log.error(f"Operations: error: {ex} adding {item}")
 
@@ -1114,10 +1128,22 @@ class CallHomeAgent(MgrModule):
             desc='Password obtained from the IBM Transfer ID service'
         ),
         Option(
-            name='upload_snap_cooling_window_seconds',
+            name='upload_ops_persistence_seconds',
+            type='int',
+            default=864000,
+            desc='Time interval during which requests with same SI request ID will not be processed'
+        ),
+        Option(
+            name='level_one_upload_cooling_window_seconds',
+            type='int',
+            default=300,
+            desc='Time interval needed to pass before a new diagnostics upload operation level one will be accepted'
+        ),
+        Option(
+            name='level_two_upload_cooling_window_seconds',
             type='int',
-            default=86400,
-            desc='Discard operations with the same Storage Insigths request id during the cooling window'
+            default=3600,
+            desc='Time interval needed to pass before a new diagnostics upload operation level two(or upper) will be accepted'
         ),
     ]
 
@@ -1184,7 +1210,9 @@ class CallHomeAgent(MgrModule):
 
         # Other options not using env vars
         self.valid_container_registry = self.get_module_option('valid_container_registry')
-        self.upload_snap_cooling_window_seconds = self.get_module_option('upload_snap_cooling_window_seconds')
+        self.upload_ops_persistence_seconds = self.get_module_option('upload_ops_persistence_seconds')
+        self.level_one_upload_cooling_window_seconds = self.get_module_option('level_one_upload_cooling_window_seconds')
+        self.level_two_upload_cooling_window_seconds = self.get_module_option('level_two_upload_cooling_window_seconds')
 
         # ecurep options:
         self.ecurep_url = self.get_module_option('ecurep_url')
@@ -1380,7 +1408,7 @@ class CallHomeAgent(MgrModule):
 
                             # Do not delete operations inside the upload snap cooling window
                             if operations[operation_key]['type'] == UPLOAD_SNAP and 'created' in operations[operation_key].keys():
-                               if int(time.time() - operations[operation_key]['created']) <= self.upload_snap_cooling_window_seconds:
+                               if int(time.time() - operations[operation_key]['created']) <= self.upload_ops_persistence_seconds:
                                    continue
 
                             self.log.info(f'Operations ({operation_key}): Removed finished  <{operations[operation_key]["type"]}> operation with status <{operations[operation_key]["status"]}>')
@@ -1712,7 +1740,9 @@ class CallHomeAgent(MgrModule):
                                 'enable_status': 'true',
                                 'version': 1}
             }
-            key = add_operation(request)
+            key = add_operation(request,
+                                self.level_one_upload_cooling_window_seconds,
+                                self.level_two_upload_cooling_window_seconds)
 
         except Exception as ex:
             return HandleCommandResult(stderr=str(ex))