]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: init local_connection for fast_dispatch in _send_boot()
authorMa, Jianpeng <jianpeng.ma@intel.com>
Mon, 14 Jul 2014 03:17:14 +0000 (03:17 +0000)
committerGreg Farnum <greg@inktank.com>
Mon, 21 Jul 2014 20:13:44 +0000 (13:13 -0700)
We were not properly setting up Sessions on the local_connection for
fast_dispatch'ed Messages if the cluster_addr was set explicitly: the OSD
was not in the dispatch list at bind() time (in ceph_osd.cc), and nothing
called it later on. This issue was missed in testing because Inktank only
uses unified NICs.

That led to errors like the following:

When do ec-read, i met a bug which was occured 100%. The messages are:
2014-07-14 10:03:07.318681 7f7654f6e700 -1 osd/OSD.cc: In function
'virtual void OSD::ms_fast_dispatch(Message*)' thread 7f7654f6e700 time
2014-07-14 10:03:07.316782 osd/OSD.cc: 5019: FAILED assert(session)

 ceph version 0.82-585-g79f3f67 (79f3f6749122ce2944baa70541949d7ca75525e6)
 1: (OSD::ms_fast_dispatch(Message*)+0x286) [0x6544b6]
 2: (DispatchQueue::fast_dispatch(Message*)+0x56) [0xb059d6]
 3: (DispatchQueue::run_local_delivery()+0x6b) [0xb08e0b]
 4: (DispatchQueue::LocalDeliveryThread::entry()+0xd) [0xa4a5fd]
 5: (()+0x8182) [0x7f7665670182]
 6: (clone()+0x6d) [0x7f7663a1130d]
 NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.

To resolve this, we have the OSD invoke ms_handle_fast_connect() explicitly
in send_boot(). It's not really an appropriate location, but we're already
doing a bunch of messenger twiddling there, so it's acceptable for now.

Signed-off-by: Ma Jianpeng <jianpeng.ma@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
src/osd/OSD.cc

index f211557cab4ca069818b244a686a0de7dcf5269e..1a5a395cc9d907c485042661506fa8e48502fdf1 100644 (file)
@@ -3852,29 +3852,37 @@ void OSD::_send_boot()
 {
   dout(10) << "_send_boot" << dendl;
   entity_addr_t cluster_addr = cluster_messenger->get_myaddr();
+  Connection *local_connection = cluster_messenger->get_loopback_connection().get();
   if (cluster_addr.is_blank_ip()) {
     int port = cluster_addr.get_port();
     cluster_addr = client_messenger->get_myaddr();
     cluster_addr.set_port(port);
     cluster_messenger->set_addr_unknowns(cluster_addr);
     dout(10) << " assuming cluster_addr ip matches client_addr" << dendl;
-  }
+  } else if (local_connection->get_priv() == NULL)
+      cluster_messenger->ms_deliver_handle_fast_connect(local_connection);
+
   entity_addr_t hb_back_addr = hb_back_server_messenger->get_myaddr();
+  local_connection = hb_back_server_messenger->get_loopback_connection().get();
   if (hb_back_addr.is_blank_ip()) {
     int port = hb_back_addr.get_port();
     hb_back_addr = cluster_addr;
     hb_back_addr.set_port(port);
     hb_back_server_messenger->set_addr_unknowns(hb_back_addr);
     dout(10) << " assuming hb_back_addr ip matches cluster_addr" << dendl;
-  }
+  } else if (local_connection->get_priv() == NULL)
+      hb_back_server_messenger->ms_deliver_handle_fast_connect(local_connection);
+
   entity_addr_t hb_front_addr = hb_front_server_messenger->get_myaddr();
+  local_connection = hb_front_server_messenger->get_loopback_connection().get();
   if (hb_front_addr.is_blank_ip()) {
     int port = hb_front_addr.get_port();
     hb_front_addr = client_messenger->get_myaddr();
     hb_front_addr.set_port(port);
     hb_front_server_messenger->set_addr_unknowns(hb_front_addr);
     dout(10) << " assuming hb_front_addr ip matches client_addr" << dendl;
-  }
+  } else if (local_connection->get_priv() == NULL)
+      hb_front_server_messenger->ms_deliver_handle_fast_connect(local_connection);
 
   MOSDBoot *mboot = new MOSDBoot(superblock, service.get_boot_epoch(),
                                  hb_back_addr, hb_front_addr, cluster_addr);