]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: Replace capacity threshold data with prometheus metrics 64945/head
authorAfreen Misbah <afreen@ibm.com>
Mon, 11 Aug 2025 09:03:32 +0000 (14:33 +0530)
committerAfreen Misbah <afreen@ibm.com>
Wed, 13 Aug 2025 21:02:26 +0000 (02:32 +0530)
- Fixes https://tracker.ceph.com/issues/72519
- the osd dump metrics is used in /api/osd/settings
- this metrics creates perf bottleneck when osds are 1000s
- replacing with similar prometheus metrics
- minor refactors - including renaming, comments.

Signed-off-by: Afreen Misbah <afreen@ibm.com>
src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html
src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts
src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts
src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts
src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts

index f405c1b89f1d58254518dcc2c9a1e8a6b348293f..28d5e18e5b3e752fcfbb43902502097b53b68d2d 100644 (file)
             <ng-container class="ms-4 me-4"
                           *ngIf="capacity">
               <cd-dashboard-pie [data]="{max: capacity.total_bytes, current: capacity.total_used_raw_bytes}"
-                                [lowThreshold]="osdSettings.nearfull_ratio"
-                                [highThreshold]="osdSettings.full_ratio">
+                                [lowThreshold]="capacityCardData.osdNearfull"
+                                [highThreshold]="capacityCardData.osdFull">
               </cd-dashboard-pie>
             </ng-container>
           </cd-card>
index 6d17aacceab6acc6b0371c9899ee9b4dc003e556..c4d74c33f3ecce11a90e84d9b8b2ee1544df6db3 100644 (file)
@@ -2,12 +2,14 @@ import { Component, OnDestroy, OnInit } from '@angular/core';
 
 import _ from 'lodash';
 import { BehaviorSubject, EMPTY, Observable, Subject, Subscription, of } from 'rxjs';
-import { catchError, exhaustMap, switchMap, take, takeUntil } from 'rxjs/operators';
+import { catchError, exhaustMap, switchMap, takeUntil } from 'rxjs/operators';
 
 import { HealthService } from '~/app/shared/api/health.service';
-import { OsdService } from '~/app/shared/api/osd.service';
-import { PrometheusService } from '~/app/shared/api/prometheus.service';
-import { Promqls as queries } from '~/app/shared/enum/dashboard-promqls.enum';
+import { PrometheusService, PromqlGuageMetric } from '~/app/shared/api/prometheus.service';
+import {
+  CapacityCardQueries,
+  UtilizationCardQueries
+} from '~/app/shared/enum/dashboard-promqls.enum';
 import { Icons } from '~/app/shared/enum/icons.enum';
 import { DashboardDetails } from '~/app/shared/models/cd-details';
 import { Permissions } from '~/app/shared/models/permissions';
@@ -26,7 +28,6 @@ import { MgrModuleService } from '~/app/shared/api/mgr-module.service';
 import { AlertClass } from '~/app/shared/enum/health-icon.enum';
 import { HardwareService } from '~/app/shared/api/hardware.service';
 import { SettingsService } from '~/app/shared/api/settings.service';
-import { OsdSettings } from '~/app/shared/models/osd-settings';
 import {
   IscsiMap,
   MdsMap,
@@ -36,6 +37,11 @@ import {
   PgStatus
 } from '~/app/shared/models/health.interface';
 
+type CapacityCardData = {
+  osdNearfull: number;
+  osdFull: number;
+};
+
 @Component({
   selector: 'cd-dashboard-v3',
   templateUrl: './dashboard-v3.component.html',
@@ -43,8 +49,11 @@ import {
 })
 export class DashboardV3Component extends PrometheusListHelper implements OnInit, OnDestroy {
   detailsCardData: DashboardDetails = {};
-  osdSettingsService: any;
-  osdSettings = new OsdSettings();
+  capacityCardData: CapacityCardData = {
+    osdNearfull: null,
+    osdFull: null
+  };
+  interval = new Subscription();
   permissions: Permissions;
   enabledFeature$: FeatureTogglesMap$;
   color: string;
@@ -102,7 +111,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
   constructor(
     private summaryService: SummaryService,
     private orchestratorService: OrchestratorService,
-    private osdService: OsdService,
     private authStorageService: AuthStorageService,
     private featureToggles: FeatureTogglesService,
     private healthService: HealthService,
@@ -121,7 +129,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
   ngOnInit() {
     super.ngOnInit();
     if (this.permissions.configOpt.read) {
-      this.getOsdSettings();
       this.isHardwareEnabled$ = this.getHardwareConfig();
       this.hardwareSummary$ = this.hardwareSubject.pipe(
         switchMap(() =>
@@ -148,6 +155,7 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
     this.getPrometheusData(this.prometheusService.lastHourDateObject);
     this.getDetailsCardData();
     this.getTelemetryReport();
+    this.getCapacityCardData();
     this.prometheusAlertService.getAlerts(true);
   }
 
@@ -185,23 +193,38 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
     );
   }
 
-  private getOsdSettings() {
-    this.osdSettingsService = this.osdService
-      .getOsdSettings()
-      .pipe(take(1))
-      .subscribe((data: OsdSettings) => {
-        this.osdSettings = data;
-      });
-  }
-
   public getPrometheusData(selectedTime: any) {
-    this.queriesResults = this.prometheusService.getPrometheusQueriesData(
+    this.queriesResults = this.prometheusService.getRangeQueriesData(
       selectedTime,
-      queries,
+      UtilizationCardQueries,
       this.queriesResults
     );
   }
 
+  getCapacityQueryValues(data: PromqlGuageMetric['result']) {
+    let osdFull = null;
+    let osdNearfull = null;
+    if (data?.[0]?.metric?.['__name__'] === CapacityCardQueries.OSD_FULL) {
+      osdFull = data[0]?.value?.[1];
+      osdNearfull = data[1]?.value?.[1];
+    } else {
+      osdFull = data?.[1]?.value?.[1];
+      osdNearfull = data?.[0]?.value?.[1];
+    }
+    return [osdFull, osdNearfull];
+  }
+
+  getCapacityCardData() {
+    const CAPACITY_QUERY = `{__name__=~"${CapacityCardQueries.OSD_FULL}|${CapacityCardQueries.OSD_NEARFULL}"}`;
+    this.prometheusService
+      .getGaugeQueryData(CAPACITY_QUERY)
+      .subscribe((data: PromqlGuageMetric) => {
+        const [osdFull, osdNearfull] = this.getCapacityQueryValues(data?.result);
+        this.capacityCardData.osdFull = this.prometheusService.formatGuageMetric(osdFull);
+        this.capacityCardData.osdNearfull = this.prometheusService.formatGuageMetric(osdNearfull);
+      });
+  }
+
   private getTelemetryReport() {
     this.healthService.getTelemetryStatus().subscribe((enabled: boolean) => {
       this.telemetryEnabled = enabled;
index f7658732049a314ae965900a3479e7d8b8a52150..fee52fc13f0d13b48744e5451e55f55a977e07ad 100644 (file)
@@ -149,7 +149,7 @@ export class RgwOverviewDashboardComponent implements OnInit, OnDestroy {
   }
 
   getPrometheusData(selectedTime: any) {
-    this.queriesResults = this.prometheusService.getPrometheusQueriesData(
+    this.queriesResults = this.prometheusService.getRangeQueriesData(
       selectedTime,
       queries,
       this.queriesResults,
index 8e1151da13d0d677cf1ae8ac4f73f4f1bafbb933..cefcedca5c9a03327071dfb9c52a9558f8d92ca3 100644 (file)
@@ -1,8 +1,8 @@
 import { HttpClient } from '@angular/common/http';
 import { Injectable } from '@angular/core';
 
-import { Observable, Subscription, forkJoin, timer } from 'rxjs';
-import { map, switchMap } from 'rxjs/operators';
+import { Observable, Subscription, forkJoin, of, timer } from 'rxjs';
+import { catchError, map, switchMap } from 'rxjs/operators';
 
 import { AlertmanagerSilence } from '../models/alertmanager-silence';
 import {
@@ -12,6 +12,16 @@ import {
 } from '../models/prometheus-alerts';
 import moment from 'moment';
 
+export type PromethuesGaugeMetricResult = {
+  metric: Record<string, string>; // metric metadata
+  value: [number, string]; // timestamp, value
+};
+
+export type PromqlGuageMetric = {
+  resultType: 'vector';
+  result: PromethuesGaugeMetricResult[];
+};
+
 @Injectable({
   providedIn: 'root'
 })
@@ -38,10 +48,16 @@ export class PrometheusService {
     }
   }
 
+  // Range Queries
   getPrometheusData(params: any): any {
     return this.http.get<any>(`${this.baseURL}/data`, { params });
   }
 
+  // Guage Queries
+  getPrometheusQueryData(params: { params: string }): Observable<PromqlGuageMetric> {
+    return this.http.get<any>(`${this.baseURL}/prometheus_query_data`, { params });
+  }
+
   ifAlertmanagerConfigured(fn: (value?: string) => void, elseFn?: () => void): void {
     this.ifSettingConfigured(this.settingsKey.alertmanager, fn, elseFn);
   }
@@ -131,12 +147,20 @@ export class PrometheusService {
     return data.value || data.instance || '';
   }
 
-  getPrometheusQueriesData(
-    selectedTime: any,
-    queries: any,
-    queriesResults: any,
-    checkNan?: boolean
-  ) {
+  getGaugeQueryData(query: string): Observable<PromqlGuageMetric> {
+    return this.getPrometheusQueryData({ params: query }).pipe(
+      map((result: PromqlGuageMetric) => result),
+      catchError(() => of({ result: [] } as PromqlGuageMetric))
+    );
+  }
+
+  formatGuageMetric(data: string): number {
+    const value: number = parseFloat(data ?? '');
+    // Guage value can be "Nan", "+inf", "-inf" in case of errors
+    return isFinite(value) ? value : null;
+  }
+
+  getRangeQueriesData(selectedTime: any, queries: any, queriesResults: any, checkNan?: boolean) {
     this.ifPrometheusConfigured(() => {
       if (this.timerGetPrometheusDataSub) {
         this.timerGetPrometheusDataSub.unsubscribe();
index f1bbebed51de6e7a1b03cebd0235026e7fb74a57..08e054173952c79f158a5f55abe19d1ba15ffebd 100644 (file)
@@ -1,4 +1,4 @@
-export enum Promqls {
+export enum UtilizationCardQueries {
   USEDCAPACITY = 'ceph_cluster_total_used_bytes',
   WRITEIOPS = 'sum(rate(ceph_pool_wr[1m]))',
   READIOPS = 'sum(rate(ceph_pool_rd[1m]))',
@@ -9,6 +9,11 @@ export enum Promqls {
   RECOVERYBYTES = 'sum(rate(ceph_osd_recovery_bytes[1m]))'
 }
 
+export enum CapacityCardQueries {
+  OSD_NEARFULL = 'ceph_osd_nearfull_ratio',
+  OSD_FULL = 'ceph_osd_full_ratio'
+}
+
 export enum RgwPromqls {
   RGW_REQUEST_PER_SECOND = 'sum(rate(ceph_rgw_req[1m]))',
   AVG_GET_LATENCY = '(sum(rate(ceph_rgw_op_get_obj_lat_sum[1m])) / sum(rate(ceph_rgw_op_get_obj_lat_count[1m]))) * 1000',