From 3378b78bca1dd9b85c48f9f335182e37cb42ae0e Mon Sep 17 00:00:00 2001 From: Prashant D Date: Mon, 8 Aug 2022 10:55:23 -0400 Subject: [PATCH] mon/LogMonitor: Use generic cluster log level config We do not control the verbosity of the LogEntry which is getting logged to stderr, graylog and journald. This causes excessive flooding of logs to /var/log, making a filesystem to fill up quickly. Also we have different config variables namely mon_cluster_log_file_level and mon_cluster_log_to_syslog_level to control verbosity at cluster log file and syslog level respectively. Add a generic cluster log level config variable which controls cluster log verbosity for all external entities. Additionally, this patch addresses the regression of `mon_cluster_log_file_level` option which doesn't take effect because of code refactoring of LogMonitor::update_from_paxos (commit : 7c84e06). Fixes: https://tracker.ceph.com/issues/57061 Fixes: https://tracker.ceph.com/issues/57049 Signed-off-by: Prashant D (cherry picked from commit cb0d4a57bd25d32919f7ed44815b4301f41e9508) --- PendingReleaseNotes | 4 + qa/standalone/ceph-helpers.sh | 16 +++ qa/standalone/mon/mon-cluster-log.sh | 205 +++++++++++++++++++++++++++ src/common/options/mon.yaml.in | 18 +-- src/mon/LogMonitor.cc | 37 +++-- src/mon/LogMonitor.h | 18 +-- 6 files changed, 251 insertions(+), 47 deletions(-) create mode 100755 qa/standalone/mon/mon-cluster-log.sh diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 318968983821b..bd8b8c871d31b 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -69,6 +69,10 @@ affected and to clean them up accordingly. * mgr/snap-schedule: For clusters with multiple CephFS file systems, all the snap-schedule commands now expect the '--fs' argument. +* The `mon_cluster_log_file_level` and `mon_cluster_log_to_syslog_level` options + have been removed. Henceforth, users should use the new generic option + `mon_cluster_log_level` to control the cluster log level verbosity for the cluster + log file as well as for all external entities. * RGW: Fixed a S3 Object Lock bug with PutObjectRetention requests that specify a RetainUntilDate after the year 2106. This date was truncated to 32 bits when stored, so a much earlier date was used for object lock enforcement. This does diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh index bf2c91bc04274..7bba1c04d3e10 100755 --- a/qa/standalone/ceph-helpers.sh +++ b/qa/standalone/ceph-helpers.sh @@ -1744,6 +1744,22 @@ function test_wait_for_peered() { teardown $dir || return 1 } +function wait_for_string() { + local logfile=$1 + local searchstr=$2 + + status=1 + for ((i=0; i < $TIMEOUT; i++)); do + echo $i + if ! grep "$searchstr" $logfile; then + sleep 1 + else + status=0 + break + fi + done + return $status +} ####################################################################### diff --git a/qa/standalone/mon/mon-cluster-log.sh b/qa/standalone/mon/mon-cluster-log.sh new file mode 100755 index 0000000000000..6e969596b2037 --- /dev/null +++ b/qa/standalone/mon/mon-cluster-log.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat +# +# Author: Prashant D +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7156" # git grep '\<7156\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_cluster_log_level() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph config set mon.a mon_cluster_log_level debug + ceph osd pool create replicated1 8 8 + ceph osd pool set replicated1 size 1 --yes-i-really-mean-it + ceph osd pool set replicated1 min_size 1 + + WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean + ERRORS=0 + truncate $dir/log -s 0 + ceph pg deep-scrub 1.0 + search_str="cluster [[]DBG[]] 1.0 deep-scrub" + TIMEOUT=60 wait_for_string $dir/log "$search_str" + grep -q "$search_str" $dir/log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : Could not find DBG log in the cluster log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph osd down 0 + TIMEOUT=20 wait_for_osd up 0 || return 1 + grep -q "cluster [[]INF[]] osd.0.*boot" $dir/log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : Could not find INF log in the cluster log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph config set mon.a mon_cluster_log_level info + ceph pg deep-scrub 1.1 + search_str="cluster [[]DBG[]] 1.1 deep-scrub" + TIMEOUT=60 wait_for_string $dir/log "$search_str" + grep -q "$search_str" $dir/log + return_code=$? + if [ $return_code -eq 0 ]; then + echo "Failed : Found DBG log in the cluster log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph config set mon.a mon_cluster_log_level warn + ceph osd set noup + ceph osd down osd.0 + ceph osd unset noup + TIMEOUT=60 wait_for_osd up 0 || return 1 + search_str="cluster [[]WRN[]] Health check failed: noup flag(s) set (OSDMAP_FLAGS)" + grep -q "$search_str" $dir/log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : No WRN entries found in the cluster log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph osd out 0 + ceph osd in 0 + WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean + search_str="cluster [[]INF[]] Client client.admin marked osd.0 out, while it was still marked up" + ceph log last 1000 | grep -q "$search_str" || return 1 + TIMEOUT=60 wait_for_string $dir/log "$search_str" + grep -q "$search_str" $dir/log + return_code=$? + if [ $return_code -eq 0 ]; then + echo "Failed : Found INF log in the cluster log file" + ERRORS=$(($ERRORS + 1)) + fi + + if [ $ERRORS -gt 0 ]; then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +function TEST_journald_cluster_log_level() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph config set mon.a mon_cluster_log_level debug + ceph osd pool create replicated1 8 8 + ceph osd pool set replicated1 size 1 --yes-i-really-mean-it + ceph osd pool set replicated1 min_size 1 + + WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean + ERRORS=0 + ceph config set mon.a mon_cluster_log_to_journald true + + ceph pg deep-scrub 1.0 + search_str="1.0 deep-scrub" + TIMEOUT=60 + sleep $TIMEOUT + journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=7 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log + grep -q "$search_str" $dir/journal.log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : Could not find DBG log in the journalctl log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph osd down 0 + TIMEOUT=20 wait_for_osd up 0 || return 1 + search_str="osd.0.*boot" + journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log + grep -q "$search_str" $dir/journal.log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : Could not find INF log in the journalctl log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph config set mon.a mon_cluster_log_level info + ceph pg deep-scrub 1.1 + TIMEOUT=60 + sleep $TIMEOUT + search_str="1.1 deep-scrub" + journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=7 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log + grep -q "$search_str" $dir/journal.log + return_code=$? + if [ $return_code -eq 0 ]; then + echo "Failed : Found $clog_entries DBG log entries in the journalctl log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph config set mon.a mon_cluster_log_level warn + ceph osd set noup + ceph osd down osd.0 + ceph osd unset noup + TIMEOUT=60 wait_for_osd up 0 || return 1 + search_str="Health check failed: noup flag(s) set (OSDMAP_FLAGS)" + journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=4 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log + grep -q "$search_str" $dir/journal.log + return_code=$? + if [ $return_code -ne 0 ]; then + echo "Failed : No WRN entries found in the journalctl log file" + ERRORS=$(($ERRORS + 1)) + fi + + ceph osd out 0 + ceph osd in 0 + WAIT_FOR_CLEAN_TIMEOUT=60 wait_for_clean + search_str="Client client.admin marked osd.0 out, while it was still marked up" + ceph log last | grep -q "$search_str" || return 1 + journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log + grep -q "$search_str" $dir/journal.log + return_code=$? + if [ $return_code -eq 0 ]; then + echo "Failed : Found $clog_entries INF log entries in the journalctl log file" + ERRORS=$(($ERRORS + 1)) + fi + + if [ $ERRORS -gt 0 ]; then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +main mon-cluster-log "$@" diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index 1cd655ad48258..6e315d78037ef 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -112,18 +112,6 @@ options: flags: - runtime with_legacy: true -- name: mon_cluster_log_to_syslog_level - type: str - level: advanced - desc: Syslog level for cluster log messages - default: info - services: - - mon - see_also: - - mon_cluster_log_to_syslog - flags: - - runtime - with_legacy: true - name: mon_cluster_log_to_syslog_facility type: str level: advanced @@ -172,10 +160,12 @@ options: flags: - runtime with_legacy: true -- name: mon_cluster_log_file_level +- name: mon_cluster_log_level type: str level: advanced - desc: Lowest level to include is cluster log file + desc: Lowest level to include in cluster log file and/or in external log server + long_desc: Log level to control the cluster log message verbosity for the cluster + log file as well as for all external entities. default: debug services: - mon diff --git a/src/mon/LogMonitor.cc b/src/mon/LogMonitor.cc index 38e5971762b19..e2e26c35845bf 100644 --- a/src/mon/LogMonitor.cc +++ b/src/mon/LogMonitor.cc @@ -208,11 +208,10 @@ ceph::logging::JournaldClusterLogger &LogMonitor::log_channel_info::get_journald void LogMonitor::log_channel_info::clear() { log_to_syslog.clear(); - syslog_level.clear(); syslog_facility.clear(); log_file.clear(); expanded_log_file.clear(); - log_file_level.clear(); + log_level.clear(); log_to_graylog.clear(); log_to_graylog_host.clear(); log_to_graylog_port.clear(); @@ -356,16 +355,25 @@ void LogMonitor::log_external(const LogEntry& le) channel = CLOG_CHANNEL_CLUSTER; } + string level = channels.get_log_level(channel); + if (int log_level = LogEntry::str_to_level(level);log_level > le.prio) { + // Do not log LogEntry to any external entity if le.prio is + // less than channel log level. + return; + } + + if (g_conf().get_val("mon_cluster_log_to_stderr")) { + cerr << channel << " " << le << std::endl; + } + if (channels.do_log_to_syslog(channel)) { - string level = channels.get_level(channel); string facility = channels.get_facility(channel); if (level.empty() || facility.empty()) { derr << __func__ << " unable to log to syslog -- level or facility" << " not defined (level: " << level << ", facility: " << facility << ")" << dendl; } else { - le.log_to_syslog(channels.get_level(channel), - channels.get_facility(channel)); + le.log_to_syslog(level, facility); } } @@ -1191,16 +1199,6 @@ void LogMonitor::update_log_channels() return; } - r = get_conf_str_map_helper( - g_conf().get_val("mon_cluster_log_to_syslog_level"), - oss, &channels.syslog_level, - CLOG_CONFIG_DEFAULT_KEY); - if (r < 0) { - derr << __func__ << " error parsing 'mon_cluster_log_to_syslog_level'" - << dendl; - return; - } - r = get_conf_str_map_helper( g_conf().get_val("mon_cluster_log_to_syslog_facility"), oss, &channels.syslog_facility, @@ -1221,11 +1219,11 @@ void LogMonitor::update_log_channels() } r = get_conf_str_map_helper( - g_conf().get_val("mon_cluster_log_file_level"), oss, - &channels.log_file_level, + g_conf().get_val("mon_cluster_log_level"), oss, + &channels.log_level, CLOG_CONFIG_DEFAULT_KEY); if (r < 0) { - derr << __func__ << " error parsing 'mon_cluster_log_file_level'" + derr << __func__ << " error parsing 'mon_cluster_log_level'" << dendl; return; } @@ -1279,10 +1277,9 @@ void LogMonitor::handle_conf_change(const ConfigProxy& conf, const std::set &changed) { if (changed.count("mon_cluster_log_to_syslog") || - changed.count("mon_cluster_log_to_syslog_level") || changed.count("mon_cluster_log_to_syslog_facility") || changed.count("mon_cluster_log_file") || - changed.count("mon_cluster_log_file_level") || + changed.count("mon_cluster_log_level") || changed.count("mon_cluster_log_to_graylog") || changed.count("mon_cluster_log_to_graylog_host") || changed.count("mon_cluster_log_to_graylog_port") || diff --git a/src/mon/LogMonitor.h b/src/mon/LogMonitor.h index 1eccaa5c20379..e9858523a73a3 100644 --- a/src/mon/LogMonitor.h +++ b/src/mon/LogMonitor.h @@ -57,11 +57,10 @@ private: struct log_channel_info { std::map log_to_syslog; - std::map syslog_level; std::map syslog_facility; std::map log_file; std::map expanded_log_file; - std::map log_file_level; + std::map log_level; std::map log_to_graylog; std::map log_to_graylog_host; std::map log_to_graylog_port; @@ -84,9 +83,8 @@ private: */ void expand_channel_meta() { expand_channel_meta(log_to_syslog); - expand_channel_meta(syslog_level); expand_channel_meta(syslog_facility); - expand_channel_meta(log_file_level); + expand_channel_meta(log_level); } void expand_channel_meta(std::map &m); std::string expand_channel_meta(const std::string &input, @@ -99,15 +97,10 @@ private: &CLOG_CONFIG_DEFAULT_KEY); } - std::string get_level(const std::string &channel) { - return get_str_map_key(syslog_level, channel, - &CLOG_CONFIG_DEFAULT_KEY); - } - std::string get_log_file(const std::string &channel); - std::string get_log_file_level(const std::string &channel) { - return get_str_map_key(log_file_level, channel, + std::string get_log_level(const std::string &channel) { + return get_str_map_key(log_level, channel, &CLOG_CONFIG_DEFAULT_KEY); } @@ -192,10 +185,9 @@ private: const char **get_tracked_conf_keys() const override { static const char* KEYS[] = { "mon_cluster_log_to_syslog", - "mon_cluster_log_to_syslog_level", "mon_cluster_log_to_syslog_facility", "mon_cluster_log_file", - "mon_cluster_log_file_level", + "mon_cluster_log_level", "mon_cluster_log_to_graylog", "mon_cluster_log_to_graylog_host", "mon_cluster_log_to_graylog_port", -- 2.39.5