From: Afreen Misbah Date: Mon, 11 Aug 2025 09:03:32 +0000 (+0530) Subject: mgr/dashboard: Replace capacity threshold data with prometheus metrics X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3281ccfe3542e22e141681cba77cc7970ba10e7b;p=ceph.git mgr/dashboard: Replace capacity threshold data with prometheus metrics - Fixes https://tracker.ceph.com/issues/72519 - the osd dump metrics is used in /api/osd/settings - this metrics creates perf bottleneck when osds are 1000s - replacing with similar prometheus metrics - minor refactors - including renaming, comments. Signed-off-by: Afreen Misbah --- diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html index f405c1b89f1d5..28d5e18e5b3e7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html @@ -229,8 +229,8 @@ + [lowThreshold]="capacityCardData.osdNearfull" + [highThreshold]="capacityCardData.osdFull"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts index 6d17aacceab6a..c4d74c33f3ecc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts @@ -2,12 +2,14 @@ import { Component, OnDestroy, OnInit } from '@angular/core'; import _ from 'lodash'; import { BehaviorSubject, EMPTY, Observable, Subject, Subscription, of } from 'rxjs'; -import { catchError, exhaustMap, switchMap, take, takeUntil } from 'rxjs/operators'; +import { catchError, exhaustMap, switchMap, takeUntil } from 'rxjs/operators'; import { HealthService } from '~/app/shared/api/health.service'; -import { OsdService } from '~/app/shared/api/osd.service'; -import { PrometheusService } from '~/app/shared/api/prometheus.service'; -import { Promqls as queries } from '~/app/shared/enum/dashboard-promqls.enum'; +import { PrometheusService, PromqlGuageMetric } from '~/app/shared/api/prometheus.service'; +import { + CapacityCardQueries, + UtilizationCardQueries +} from '~/app/shared/enum/dashboard-promqls.enum'; import { Icons } from '~/app/shared/enum/icons.enum'; import { DashboardDetails } from '~/app/shared/models/cd-details'; import { Permissions } from '~/app/shared/models/permissions'; @@ -26,7 +28,6 @@ import { MgrModuleService } from '~/app/shared/api/mgr-module.service'; import { AlertClass } from '~/app/shared/enum/health-icon.enum'; import { HardwareService } from '~/app/shared/api/hardware.service'; import { SettingsService } from '~/app/shared/api/settings.service'; -import { OsdSettings } from '~/app/shared/models/osd-settings'; import { IscsiMap, MdsMap, @@ -36,6 +37,11 @@ import { PgStatus } from '~/app/shared/models/health.interface'; +type CapacityCardData = { + osdNearfull: number; + osdFull: number; +}; + @Component({ selector: 'cd-dashboard-v3', templateUrl: './dashboard-v3.component.html', @@ -43,8 +49,11 @@ import { }) export class DashboardV3Component extends PrometheusListHelper implements OnInit, OnDestroy { detailsCardData: DashboardDetails = {}; - osdSettingsService: any; - osdSettings = new OsdSettings(); + capacityCardData: CapacityCardData = { + osdNearfull: null, + osdFull: null + }; + interval = new Subscription(); permissions: Permissions; enabledFeature$: FeatureTogglesMap$; color: string; @@ -102,7 +111,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit constructor( private summaryService: SummaryService, private orchestratorService: OrchestratorService, - private osdService: OsdService, private authStorageService: AuthStorageService, private featureToggles: FeatureTogglesService, private healthService: HealthService, @@ -121,7 +129,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit ngOnInit() { super.ngOnInit(); if (this.permissions.configOpt.read) { - this.getOsdSettings(); this.isHardwareEnabled$ = this.getHardwareConfig(); this.hardwareSummary$ = this.hardwareSubject.pipe( switchMap(() => @@ -148,6 +155,7 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit this.getPrometheusData(this.prometheusService.lastHourDateObject); this.getDetailsCardData(); this.getTelemetryReport(); + this.getCapacityCardData(); this.prometheusAlertService.getAlerts(true); } @@ -185,23 +193,38 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit ); } - private getOsdSettings() { - this.osdSettingsService = this.osdService - .getOsdSettings() - .pipe(take(1)) - .subscribe((data: OsdSettings) => { - this.osdSettings = data; - }); - } - public getPrometheusData(selectedTime: any) { - this.queriesResults = this.prometheusService.getPrometheusQueriesData( + this.queriesResults = this.prometheusService.getRangeQueriesData( selectedTime, - queries, + UtilizationCardQueries, this.queriesResults ); } + getCapacityQueryValues(data: PromqlGuageMetric['result']) { + let osdFull = null; + let osdNearfull = null; + if (data?.[0]?.metric?.['__name__'] === CapacityCardQueries.OSD_FULL) { + osdFull = data[0]?.value?.[1]; + osdNearfull = data[1]?.value?.[1]; + } else { + osdFull = data?.[1]?.value?.[1]; + osdNearfull = data?.[0]?.value?.[1]; + } + return [osdFull, osdNearfull]; + } + + getCapacityCardData() { + const CAPACITY_QUERY = `{__name__=~"${CapacityCardQueries.OSD_FULL}|${CapacityCardQueries.OSD_NEARFULL}"}`; + this.prometheusService + .getGaugeQueryData(CAPACITY_QUERY) + .subscribe((data: PromqlGuageMetric) => { + const [osdFull, osdNearfull] = this.getCapacityQueryValues(data?.result); + this.capacityCardData.osdFull = this.prometheusService.formatGuageMetric(osdFull); + this.capacityCardData.osdNearfull = this.prometheusService.formatGuageMetric(osdNearfull); + }); + } + private getTelemetryReport() { this.healthService.getTelemetryStatus().subscribe((enabled: boolean) => { this.telemetryEnabled = enabled; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts index f7658732049a3..fee52fc13f0d1 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts @@ -149,7 +149,7 @@ export class RgwOverviewDashboardComponent implements OnInit, OnDestroy { } getPrometheusData(selectedTime: any) { - this.queriesResults = this.prometheusService.getPrometheusQueriesData( + this.queriesResults = this.prometheusService.getRangeQueriesData( selectedTime, queries, this.queriesResults, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts index 8e1151da13d0d..cefcedca5c9a0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts @@ -1,8 +1,8 @@ import { HttpClient } from '@angular/common/http'; import { Injectable } from '@angular/core'; -import { Observable, Subscription, forkJoin, timer } from 'rxjs'; -import { map, switchMap } from 'rxjs/operators'; +import { Observable, Subscription, forkJoin, of, timer } from 'rxjs'; +import { catchError, map, switchMap } from 'rxjs/operators'; import { AlertmanagerSilence } from '../models/alertmanager-silence'; import { @@ -12,6 +12,16 @@ import { } from '../models/prometheus-alerts'; import moment from 'moment'; +export type PromethuesGaugeMetricResult = { + metric: Record; // metric metadata + value: [number, string]; // timestamp, value +}; + +export type PromqlGuageMetric = { + resultType: 'vector'; + result: PromethuesGaugeMetricResult[]; +}; + @Injectable({ providedIn: 'root' }) @@ -38,10 +48,16 @@ export class PrometheusService { } } + // Range Queries getPrometheusData(params: any): any { return this.http.get(`${this.baseURL}/data`, { params }); } + // Guage Queries + getPrometheusQueryData(params: { params: string }): Observable { + return this.http.get(`${this.baseURL}/prometheus_query_data`, { params }); + } + ifAlertmanagerConfigured(fn: (value?: string) => void, elseFn?: () => void): void { this.ifSettingConfigured(this.settingsKey.alertmanager, fn, elseFn); } @@ -131,12 +147,20 @@ export class PrometheusService { return data.value || data.instance || ''; } - getPrometheusQueriesData( - selectedTime: any, - queries: any, - queriesResults: any, - checkNan?: boolean - ) { + getGaugeQueryData(query: string): Observable { + return this.getPrometheusQueryData({ params: query }).pipe( + map((result: PromqlGuageMetric) => result), + catchError(() => of({ result: [] } as PromqlGuageMetric)) + ); + } + + formatGuageMetric(data: string): number { + const value: number = parseFloat(data ?? ''); + // Guage value can be "Nan", "+inf", "-inf" in case of errors + return isFinite(value) ? value : null; + } + + getRangeQueriesData(selectedTime: any, queries: any, queriesResults: any, checkNan?: boolean) { this.ifPrometheusConfigured(() => { if (this.timerGetPrometheusDataSub) { this.timerGetPrometheusDataSub.unsubscribe(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts index f1bbebed51de6..08e054173952c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts @@ -1,4 +1,4 @@ -export enum Promqls { +export enum UtilizationCardQueries { USEDCAPACITY = 'ceph_cluster_total_used_bytes', WRITEIOPS = 'sum(rate(ceph_pool_wr[1m]))', READIOPS = 'sum(rate(ceph_pool_rd[1m]))', @@ -9,6 +9,11 @@ export enum Promqls { RECOVERYBYTES = 'sum(rate(ceph_osd_recovery_bytes[1m]))' } +export enum CapacityCardQueries { + OSD_NEARFULL = 'ceph_osd_nearfull_ratio', + OSD_FULL = 'ceph_osd_full_ratio' +} + export enum RgwPromqls { RGW_REQUEST_PER_SECOND = 'sum(rate(ceph_rgw_req[1m]))', AVG_GET_LATENCY = '(sum(rate(ceph_rgw_op_get_obj_lat_sum[1m])) / sum(rate(ceph_rgw_op_get_obj_lat_count[1m]))) * 1000',