osd: Log audit

author Brad Hubbard <bhubbard@redhat.com>

Thu, 15 Jun 2017 01:40:09 +0000 (11:40 +1000)

committer Brad Hubbard <bhubbard@redhat.com>

Wed, 26 Jul 2017 07:34:28 +0000 (17:34 +1000)
author Brad Hubbard <bhubbard@redhat.com>
Thu, 15 Jun 2017 01:40:09 +0000 (11:40 +1000)
committer Brad Hubbard <bhubbard@redhat.com>
Wed, 26 Jul 2017 07:34:28 +0000 (17:34 +1000)
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh

index 14b933233fac9859c63aace6688819d77e42e767..28db9476b1b83e08437f53a09d6b67a98c5b5358 100755 (executable)
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -2615,7 +2615,7 @@ function TEST_periodic_scrub_replicated() {
      pg_scrub $pg
      sleep 1
      set_config osd ${primary} osd_scrub_backoff_ratio $scrub_backoff_ratio
-    grep -q "Regular scrub request, losing deep-scrub details" $dir/osd.${primary}.log || return 1
+    grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
  
      # deep-scrub error is no longer present
      rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml

index 8401c1a303f0cd2a8644bc54a7fd243b5a9ff6b4..9afbc0417b008ac85b200bef250f0d7668cf98ea 100644 (file)
--- a/qa/suites/rados/basic/tasks/repair_test.yaml
+++ b/qa/suites/rados/basic/tasks/repair_test.yaml
@@ -16,7 +16,7 @@ overrides:
        - scrub [0-9]+ errors
        - 'size 1 != size'
        - attr name mismatch
-      - Regular scrub request, losing deep-scrub details
+      - Regular scrub request, deep-scrub details will be lost
        - overall HEALTH_
        - (OSDMAP_FLAGS)
        - (OSD_
diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py

index 26851e25e3de80d4b578ab7be0c80fe5d3162326..473af088b830593ff585dbb876b3fc69def92119 100644 (file)
--- a/qa/tasks/repair_test.py
+++ b/qa/tasks/repair_test.py
@@ -275,7 +275,7 @@ def task(ctx, config):
            - 'scrub [0-9]+ errors'
            - 'size 1 != size'
            - 'attr name mismatch'
-          - 'Regular scrub request, losing deep-scrub details'
+          - 'Regular scrub request, deep-scrub details will be lost'
          conf:
            osd:
              filestore debug inject read err: true
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc

index e03768183d4510ca897df19f3100e36a95eb4e5a..fccb834ef54fd9591eb069a3d2f5185e7dd4bb50 100644 (file)
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -996,7 +996,8 @@ void ECBackend::handle_sub_read(
        hinfo = get_hash_info(i->first);
        if (!hinfo) {
         r = -EIO;
-       get_parent()->clog_error() << __func__ << ": No hinfo for " << i->first;
+       get_parent()->clog_error() << "Corruption detected: object " << i->first
+                                   << " is missing hash_info";
         dout(5) << __func__ << ": No hinfo for " << i->first << dendl;
         goto error;
        }
@@ -1010,9 +1011,8 @@ void ECBackend::handle_sub_read(
         j->get<1>(),
         bl, j->get<2>());
        if (r < 0) {
-       get_parent()->clog_error() << __func__
-                                  << ": Error " << r
-                                  << " reading "
+       get_parent()->clog_error() << "Error " << r
+                                  << " reading object "
                                    << i->first;
         dout(5) << __func__ << ": Error " << r
                 << " reading " << i->first << dendl;
@@ -1038,7 +1038,7 @@ void ECBackend::handle_sub_read(
           bufferhash h(-1);
           h << bl;
           if (h.digest() != hinfo->get_chunk_hash(shard)) {
-           get_parent()->clog_error() << __func__ << ": Bad hash for " << i->first << " digest 0x"
+           get_parent()->clog_error() << "Bad hash for " << i->first << " digest 0x"
                                        << hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec;
             dout(5) << __func__ << ": Bad hash for " << i->first << " digest 0x"
                     << hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec << dendl;
@@ -1235,7 +1235,7 @@ void ECBackend::handle_sub_read_reply(
             err = rop.complete[iter->first].errors.begin()->second;
              rop.complete[iter->first].r = err;
           } else {
-           get_parent()->clog_error() << __func__ << ": Error(s) ignored for "
+           get_parent()->clog_warn() << "Error(s) ignored for "
                                        << iter->first << " enough copies available";
             dout(10) << __func__ << " Error(s) ignored for " << iter->first
                      << " enough copies available" << dendl;
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index b29f9b87a4c49150dfa926abce4140b05678467b..af7914749ff9c9d5c2fce6c0ec6d08763deb3cc7 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -820,11 +820,11 @@ void OSDService::check_full_status(float ratio)
      dout(10) << __func__ << " " << get_full_state_name(cur_state)
              << " -> " << get_full_state_name(new_state) << dendl;
      if (new_state == FAILSAFE) {
-      clog->error() << "failsafe engaged, dropping updates, now "
+      clog->error() << "full status failsafe engaged, dropping updates, now "
                     << (int)roundf(ratio * 100) << "% full";
      } else if (cur_state == FAILSAFE) {
-      clog->error() << "failsafe disengaged, no longer dropping updates, now "
-                   << (int)roundf(ratio * 100) << "% full";
+      clog->error() << "full status failsafe disengaged, no longer dropping "
+                    << "updates, now " << (int)roundf(ratio * 100) << "% full";
      }
      cur_state = new_state;
    }
@@ -7819,7 +7819,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
        } else if (!osdmap->get_hb_back_addr(whoami).probably_equals(
                    hb_back_server_messenger->get_myaddr())) {
         clog->error() << "map e" << osdmap->get_epoch()
-                     << " had wrong hb back addr ("
+                     << " had wrong heartbeat back addr ("
                       << osdmap->get_hb_back_addr(whoami)
                       << " != my " << hb_back_server_messenger->get_myaddr()
                       << ")";
@@ -7827,7 +7827,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
                  !osdmap->get_hb_front_addr(whoami).probably_equals(
                    hb_front_server_messenger->get_myaddr())) {
         clog->error() << "map e" << osdmap->get_epoch()
-                     << " had wrong hb front addr ("
+                     << " had wrong heartbeat front addr ("
                       << osdmap->get_hb_front_addr(whoami)
                       << " != my " << hb_front_server_messenger->get_myaddr()
                       << ")";
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 5153d5d8f32d45fb9bb09287d31132d9500b0803..5b62076212255e5b8b9e73722be2afc598269b33 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -3340,8 +3340,8 @@ void PG::log_weirdness()
      // sloppy check
      if ((pg_log.get_log().log.begin()->version <= pg_log.get_tail()))
        osd->clog->error() << info.pgid
-                       << " log bound mismatch, info (" << pg_log.get_tail() << ","
-                       << pg_log.get_head() << "]"
+                       << " log bound mismatch, info (tail,head] ("
+                       << pg_log.get_tail() << "," << pg_log.get_head() << "]"
                         << " actual ["
                         << pg_log.get_log().log.begin()->version << ","
                          << pg_log.get_log().log.rbegin()->version << "]";
@@ -3623,7 +3623,7 @@ bool PG::sched_scrub()
         } else {
           osd->clog->error() << "osd." << osd->whoami
                              << " pg " << info.pgid
-                            << " Regular scrub request, losing deep-scrub details";
+                            << " Regular scrub request, deep-scrub details will be lost";
         }
        }
        queue_scrub();
@@ -7129,9 +7129,12 @@ boost::statechart::result PG::RecoveryState::Active::react(const ActMap&)
        pg->all_unfound_are_queried_or_lost(pg->get_osdmap())) {
      if (pg->cct->_conf->osd_auto_mark_unfound_lost) {
        pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound
-                           << " objects unfound and apparently lost, would automatically marking lost but NOT IMPLEMENTED";
+                           << " objects unfound and apparently lost, would automatically "
+                           << "mark these objects lost but this feature is not yet implemented "
+                           << "(osd_auto_mark_unfound_lost)";
      } else
-      pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound << " objects unfound and apparently lost";
+      pg->osd->clog->error() << pg->info.pgid.pgid << " has "
+                             << unfound << " objects unfound and apparently lost";
    }
  
    if (pg->is_active()) {
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h

index 90275c66425541f0adcf3fcd5c5bdc8673cd2065..f244f4c2f703dcfc7cbf96e73166b40a2c2c60cd 100644 (file)
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -276,6 +276,7 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
       virtual ceph_tid_t get_tid() = 0;
  
       virtual LogClientTemp clog_error() = 0;
+     virtual LogClientTemp clog_warn() = 0;
  
       virtual bool check_failsafe_full(ostream &ss) = 0;
  
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 7c22976ac369d9bd2742482fcadeb08dd431ff60..0d21cb91a542d514f10491089b042f71f75ec940 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -3712,22 +3712,22 @@ int PrimaryLogPG::trim_object(
    } else {
      auto p = snapset.clone_snaps.find(coid.snap);
      if (p == snapset.clone_snaps.end()) {
-      osd->clog->error() << __func__ << " No clone_snaps in snapset " << snapset
-                        << " for " << coid << "\n";
+      osd->clog->error() << "No clone_snaps in snapset " << snapset
+                        << " for object " << coid << "\n";
        return -ENOENT;
      }
      old_snaps.insert(snapset.clone_snaps[coid.snap].begin(),
                      snapset.clone_snaps[coid.snap].end());
    }
    if (old_snaps.empty()) {
-    osd->clog->error() << __func__ << " No object info snaps for " << coid;
+    osd->clog->error() << "No object info snaps for object " << coid;
      return -ENOENT;
    }
  
    dout(10) << coid << " old_snaps " << old_snaps
            << " old snapset " << snapset << dendl;
    if (snapset.seq == 0) {
-    osd->clog->error() << __func__ << " No snapset.seq for " << coid;
+    osd->clog->error() << "No snapset.seq for object " << coid;
      return -ENOENT;
    }
  
@@ -3744,7 +3744,7 @@ int PrimaryLogPG::trim_object(
    if (new_snaps.empty()) {
      p = std::find(snapset.clones.begin(), snapset.clones.end(), coid.snap);
      if (p == snapset.clones.end()) {
-      osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones";
+      osd->clog->error() << "Snap " << coid.snap << " not in clones";
        return -ENOENT;
      }
    }
@@ -11379,15 +11379,16 @@ bool PrimaryLogPG::start_recovery_ops(
  
    if (missing.num_missing() > 0) {
      // this shouldn't happen!
-    osd->clog->error() << info.pgid << " recovery ending with " << missing.num_missing()
-                      << ": " << missing.get_items();
+    osd->clog->error() << info.pgid << " Unexpected Error: recovery ending with "
+                      << missing.num_missing() << ": " << missing.get_items();
      return work_in_progress;
    }
  
    if (needs_recovery()) {
      // this shouldn't happen!
      // We already checked num_missing() so we must have missing replicas
-    osd->clog->error() << info.pgid << " recovery ending with missing replicas";
+    osd->clog->error() << info.pgid 
+                       << " Unexpected Error: recovery ending with missing replicas";
      return work_in_progress;
    }
  
@@ -13623,7 +13624,8 @@ unsigned PrimaryLogPG::process_clones_to(const boost::optional<hobject_t> &head,
      if (!allow_incomplete_clones) {
        next_clone.snap = **curclone;
        clog->error() << mode << " " << pgid << " " << head.get()
-                        << " expected clone " << next_clone;
+                        << " expected clone " << next_clone << " " << missing
+                         << " missing";
        ++scrubber.shallow_errors;
        e.set_clone_missing(next_clone.snap);
      }
@@ -14002,7 +14004,7 @@ void PrimaryLogPG::scrub_snapshot_metadata(
      ObjectContextRef obc = get_object_context(p->first, false);
      if (!obc) {
        osd->clog->error() << info.pgid << " " << mode
-                        << " cannot get object context for "
+                        << " cannot get object context for object "
                          << p->first;
        continue;
      } else if (obc->obs.oi.soid != p->first) {
@@ -14059,7 +14061,7 @@ void PrimaryLogPG::scrub_snapshot_metadata(
      ObjectContextRef obc = get_object_context(p.first, true);
      if (!obc) {
        osd->clog->error() << info.pgid << " " << mode
-                        << " cannot get object context for "
+                        << " cannot get object context for object "
                          << p.first;
        continue;
      } else if (obc->obs.oi.soid != p.first) {
diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h

index c914ba2c76c1361d54b791e2580d389382972d30..328fb1ba236ce5d1fb824996fbbf89854f50ab13 100644 (file)
--- a/src/osd/PrimaryLogPG.h
+++ b/src/osd/PrimaryLogPG.h
@@ -460,6 +460,7 @@ public:
    ceph_tid_t get_tid() override { return osd->get_tid(); }
  
    LogClientTemp clog_error() override { return osd->clog->error(); }
+  LogClientTemp clog_warn() override { return osd->clog->warn(); }
  
    struct watch_disconnect_t {
      uint64_t cookie;
author	Brad Hubbard <bhubbard@redhat.com>
	Thu, 15 Jun 2017 01:40:09 +0000 (11:40 +1000)
committer	Brad Hubbard <bhubbard@redhat.com>
	Wed, 26 Jul 2017 07:34:28 +0000 (17:34 +1000)
qa/standalone/scrub/osd-scrub-repair.sh		patch \| blob \| history
qa/suites/rados/basic/tasks/repair_test.yaml		patch \| blob \| history
qa/tasks/repair_test.py		patch \| blob \| history
src/osd/ECBackend.cc		patch \| blob \| history
src/osd/OSD.cc		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PGBackend.h		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/PrimaryLogPG.h		patch \| blob \| history