]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Log audit 16281/head
authorBrad Hubbard <bhubbard@redhat.com>
Thu, 15 Jun 2017 01:40:09 +0000 (11:40 +1000)
committerBrad Hubbard <bhubbard@redhat.com>
Wed, 26 Jul 2017 07:34:28 +0000 (17:34 +1000)
Review current log messages for consistency, accuracy and necessesity as
part of usability initiative. First in a series.

Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
qa/standalone/scrub/osd-scrub-repair.sh
qa/suites/rados/basic/tasks/repair_test.yaml
qa/tasks/repair_test.py
src/osd/ECBackend.cc
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PGBackend.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index 14b933233fac9859c63aace6688819d77e42e767..28db9476b1b83e08437f53a09d6b67a98c5b5358 100755 (executable)
@@ -2615,7 +2615,7 @@ function TEST_periodic_scrub_replicated() {
     pg_scrub $pg
     sleep 1
     set_config osd ${primary} osd_scrub_backoff_ratio $scrub_backoff_ratio
-    grep -q "Regular scrub request, losing deep-scrub details" $dir/osd.${primary}.log || return 1
+    grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
 
     # deep-scrub error is no longer present
     rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
index 8401c1a303f0cd2a8644bc54a7fd243b5a9ff6b4..9afbc0417b008ac85b200bef250f0d7668cf98ea 100644 (file)
@@ -16,7 +16,7 @@ overrides:
       - scrub [0-9]+ errors
       - 'size 1 != size'
       - attr name mismatch
-      - Regular scrub request, losing deep-scrub details
+      - Regular scrub request, deep-scrub details will be lost
       - overall HEALTH_
       - (OSDMAP_FLAGS)
       - (OSD_
index 26851e25e3de80d4b578ab7be0c80fe5d3162326..473af088b830593ff585dbb876b3fc69def92119 100644 (file)
@@ -275,7 +275,7 @@ def task(ctx, config):
           - 'scrub [0-9]+ errors'
           - 'size 1 != size'
           - 'attr name mismatch'
-          - 'Regular scrub request, losing deep-scrub details'
+          - 'Regular scrub request, deep-scrub details will be lost'
         conf:
           osd:
             filestore debug inject read err: true
index e03768183d4510ca897df19f3100e36a95eb4e5a..fccb834ef54fd9591eb069a3d2f5185e7dd4bb50 100644 (file)
@@ -996,7 +996,8 @@ void ECBackend::handle_sub_read(
       hinfo = get_hash_info(i->first);
       if (!hinfo) {
        r = -EIO;
-       get_parent()->clog_error() << __func__ << ": No hinfo for " << i->first;
+       get_parent()->clog_error() << "Corruption detected: object " << i->first
+                                   << " is missing hash_info";
        dout(5) << __func__ << ": No hinfo for " << i->first << dendl;
        goto error;
       }
@@ -1010,9 +1011,8 @@ void ECBackend::handle_sub_read(
        j->get<1>(),
        bl, j->get<2>());
       if (r < 0) {
-       get_parent()->clog_error() << __func__
-                                  << ": Error " << r
-                                  << " reading "
+       get_parent()->clog_error() << "Error " << r
+                                  << " reading object "
                                   << i->first;
        dout(5) << __func__ << ": Error " << r
                << " reading " << i->first << dendl;
@@ -1038,7 +1038,7 @@ void ECBackend::handle_sub_read(
          bufferhash h(-1);
          h << bl;
          if (h.digest() != hinfo->get_chunk_hash(shard)) {
-           get_parent()->clog_error() << __func__ << ": Bad hash for " << i->first << " digest 0x"
+           get_parent()->clog_error() << "Bad hash for " << i->first << " digest 0x"
                                       << hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec;
            dout(5) << __func__ << ": Bad hash for " << i->first << " digest 0x"
                    << hex << h.digest() << " expected 0x" << hinfo->get_chunk_hash(shard) << dec << dendl;
@@ -1235,7 +1235,7 @@ void ECBackend::handle_sub_read_reply(
            err = rop.complete[iter->first].errors.begin()->second;
             rop.complete[iter->first].r = err;
          } else {
-           get_parent()->clog_error() << __func__ << ": Error(s) ignored for "
+           get_parent()->clog_warn() << "Error(s) ignored for "
                                       << iter->first << " enough copies available";
            dout(10) << __func__ << " Error(s) ignored for " << iter->first
                     << " enough copies available" << dendl;
index b29f9b87a4c49150dfa926abce4140b05678467b..af7914749ff9c9d5c2fce6c0ec6d08763deb3cc7 100644 (file)
@@ -820,11 +820,11 @@ void OSDService::check_full_status(float ratio)
     dout(10) << __func__ << " " << get_full_state_name(cur_state)
             << " -> " << get_full_state_name(new_state) << dendl;
     if (new_state == FAILSAFE) {
-      clog->error() << "failsafe engaged, dropping updates, now "
+      clog->error() << "full status failsafe engaged, dropping updates, now "
                    << (int)roundf(ratio * 100) << "% full";
     } else if (cur_state == FAILSAFE) {
-      clog->error() << "failsafe disengaged, no longer dropping updates, now "
-                   << (int)roundf(ratio * 100) << "% full";
+      clog->error() << "full status failsafe disengaged, no longer dropping "
+                    << "updates, now " << (int)roundf(ratio * 100) << "% full";
     }
     cur_state = new_state;
   }
@@ -7819,7 +7819,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
       } else if (!osdmap->get_hb_back_addr(whoami).probably_equals(
                   hb_back_server_messenger->get_myaddr())) {
        clog->error() << "map e" << osdmap->get_epoch()
-                     << " had wrong hb back addr ("
+                     << " had wrong heartbeat back addr ("
                      << osdmap->get_hb_back_addr(whoami)
                      << " != my " << hb_back_server_messenger->get_myaddr()
                      << ")";
@@ -7827,7 +7827,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
                 !osdmap->get_hb_front_addr(whoami).probably_equals(
                   hb_front_server_messenger->get_myaddr())) {
        clog->error() << "map e" << osdmap->get_epoch()
-                     << " had wrong hb front addr ("
+                     << " had wrong heartbeat front addr ("
                      << osdmap->get_hb_front_addr(whoami)
                      << " != my " << hb_front_server_messenger->get_myaddr()
                      << ")";
index 5153d5d8f32d45fb9bb09287d31132d9500b0803..5b62076212255e5b8b9e73722be2afc598269b33 100644 (file)
@@ -3340,8 +3340,8 @@ void PG::log_weirdness()
     // sloppy check
     if ((pg_log.get_log().log.begin()->version <= pg_log.get_tail()))
       osd->clog->error() << info.pgid
-                       << " log bound mismatch, info (" << pg_log.get_tail() << ","
-                       << pg_log.get_head() << "]"
+                       << " log bound mismatch, info (tail,head] ("
+                       << pg_log.get_tail() << "," << pg_log.get_head() << "]"
                        << " actual ["
                        << pg_log.get_log().log.begin()->version << ","
                         << pg_log.get_log().log.rbegin()->version << "]";
@@ -3623,7 +3623,7 @@ bool PG::sched_scrub()
        } else {
          osd->clog->error() << "osd." << osd->whoami
                             << " pg " << info.pgid
-                            << " Regular scrub request, losing deep-scrub details";
+                            << " Regular scrub request, deep-scrub details will be lost";
        }
       }
       queue_scrub();
@@ -7129,9 +7129,12 @@ boost::statechart::result PG::RecoveryState::Active::react(const ActMap&)
       pg->all_unfound_are_queried_or_lost(pg->get_osdmap())) {
     if (pg->cct->_conf->osd_auto_mark_unfound_lost) {
       pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound
-                           << " objects unfound and apparently lost, would automatically marking lost but NOT IMPLEMENTED";
+                           << " objects unfound and apparently lost, would automatically "
+                           << "mark these objects lost but this feature is not yet implemented "
+                           << "(osd_auto_mark_unfound_lost)";
     } else
-      pg->osd->clog->error() << pg->info.pgid.pgid << " has " << unfound << " objects unfound and apparently lost";
+      pg->osd->clog->error() << pg->info.pgid.pgid << " has "
+                             << unfound << " objects unfound and apparently lost";
   }
 
   if (pg->is_active()) {
index 90275c66425541f0adcf3fcd5c5bdc8673cd2065..f244f4c2f703dcfc7cbf96e73166b40a2c2c60cd 100644 (file)
@@ -276,6 +276,7 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
      virtual ceph_tid_t get_tid() = 0;
 
      virtual LogClientTemp clog_error() = 0;
+     virtual LogClientTemp clog_warn() = 0;
 
      virtual bool check_failsafe_full(ostream &ss) = 0;
 
index 7c22976ac369d9bd2742482fcadeb08dd431ff60..0d21cb91a542d514f10491089b042f71f75ec940 100644 (file)
@@ -3712,22 +3712,22 @@ int PrimaryLogPG::trim_object(
   } else {
     auto p = snapset.clone_snaps.find(coid.snap);
     if (p == snapset.clone_snaps.end()) {
-      osd->clog->error() << __func__ << " No clone_snaps in snapset " << snapset
-                        << " for " << coid << "\n";
+      osd->clog->error() << "No clone_snaps in snapset " << snapset
+                        << " for object " << coid << "\n";
       return -ENOENT;
     }
     old_snaps.insert(snapset.clone_snaps[coid.snap].begin(),
                     snapset.clone_snaps[coid.snap].end());
   }
   if (old_snaps.empty()) {
-    osd->clog->error() << __func__ << " No object info snaps for " << coid;
+    osd->clog->error() << "No object info snaps for object " << coid;
     return -ENOENT;
   }
 
   dout(10) << coid << " old_snaps " << old_snaps
           << " old snapset " << snapset << dendl;
   if (snapset.seq == 0) {
-    osd->clog->error() << __func__ << " No snapset.seq for " << coid;
+    osd->clog->error() << "No snapset.seq for object " << coid;
     return -ENOENT;
   }
 
@@ -3744,7 +3744,7 @@ int PrimaryLogPG::trim_object(
   if (new_snaps.empty()) {
     p = std::find(snapset.clones.begin(), snapset.clones.end(), coid.snap);
     if (p == snapset.clones.end()) {
-      osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones";
+      osd->clog->error() << "Snap " << coid.snap << " not in clones";
       return -ENOENT;
     }
   }
@@ -11379,15 +11379,16 @@ bool PrimaryLogPG::start_recovery_ops(
 
   if (missing.num_missing() > 0) {
     // this shouldn't happen!
-    osd->clog->error() << info.pgid << " recovery ending with " << missing.num_missing()
-                      << ": " << missing.get_items();
+    osd->clog->error() << info.pgid << " Unexpected Error: recovery ending with "
+                      << missing.num_missing() << ": " << missing.get_items();
     return work_in_progress;
   }
 
   if (needs_recovery()) {
     // this shouldn't happen!
     // We already checked num_missing() so we must have missing replicas
-    osd->clog->error() << info.pgid << " recovery ending with missing replicas";
+    osd->clog->error() << info.pgid 
+                       << " Unexpected Error: recovery ending with missing replicas";
     return work_in_progress;
   }
 
@@ -13623,7 +13624,8 @@ unsigned PrimaryLogPG::process_clones_to(const boost::optional<hobject_t> &head,
     if (!allow_incomplete_clones) {
       next_clone.snap = **curclone;
       clog->error() << mode << " " << pgid << " " << head.get()
-                        << " expected clone " << next_clone;
+                        << " expected clone " << next_clone << " " << missing
+                         << " missing";
       ++scrubber.shallow_errors;
       e.set_clone_missing(next_clone.snap);
     }
@@ -14002,7 +14004,7 @@ void PrimaryLogPG::scrub_snapshot_metadata(
     ObjectContextRef obc = get_object_context(p->first, false);
     if (!obc) {
       osd->clog->error() << info.pgid << " " << mode
-                        << " cannot get object context for "
+                        << " cannot get object context for object "
                         << p->first;
       continue;
     } else if (obc->obs.oi.soid != p->first) {
@@ -14059,7 +14061,7 @@ void PrimaryLogPG::scrub_snapshot_metadata(
     ObjectContextRef obc = get_object_context(p.first, true);
     if (!obc) {
       osd->clog->error() << info.pgid << " " << mode
-                        << " cannot get object context for "
+                        << " cannot get object context for object "
                         << p.first;
       continue;
     } else if (obc->obs.oi.soid != p.first) {
index c914ba2c76c1361d54b791e2580d389382972d30..328fb1ba236ce5d1fb824996fbbf89854f50ab13 100644 (file)
@@ -460,6 +460,7 @@ public:
   ceph_tid_t get_tid() override { return osd->get_tid(); }
 
   LogClientTemp clog_error() override { return osd->clog->error(); }
+  LogClientTemp clog_warn() override { return osd->clog->warn(); }
 
   struct watch_disconnect_t {
     uint64_t cookie;