Too much of pain with containerized world. The autodection,
based on `getpid() == 1`, turned out to be problematic.
This patch removes it and switches the behavior to always
use a random-generated nonce.
Fixes: https://tracker.ceph.com/issues/57977
Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
(cherry picked from commit
e27f6c6a856d6ba7cad08bb933ec84e226aae6ad)
Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type,
entity_name_t::MON(rank), "mon-mgrc",
- Messenger::get_pid_nonce());
+ Messenger::get_random_nonce());
if (!mgr_msgr) {
derr << "unable to create mgr_msgr" << dendl;
prefork.exit(1);
public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type;
cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type;
- uint64_t nonce = Messenger::get_pid_nonce();
+ uint64_t nonce = Messenger::get_random_nonce();
Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type,
entity_name_t::OSD(whoami), "client", nonce);
Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type,
msgr = Messenger::create(g_ceph_context, public_msgr_type,
entity_name_t::MGR(gid),
"mgr",
- Messenger::get_pid_nonce());
+ Messenger::get_random_nonce());
msgr->set_default_policy(Messenger::Policy::stateless_server(0));
msgr->set_auth_client(monc);
cct->_conf.get_val<std::string>("ms_type") : cct->_conf.get_val<std::string>("ms_public_type"),
entity_name_t::MGR(),
"mgr",
- Messenger::get_pid_nonce())),
+ Messenger::get_random_nonce())),
objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
client{client_messenger.get(), &monc, &objecter},
mgrc(g_ceph_context, client_messenger.get(), &monc.monmap),
std::move(lname), nonce);
}
-uint64_t Messenger::get_pid_nonce()
-{
- uint64_t nonce = getpid();
- if (nonce == 1 || getenv("CEPH_USE_RANDOM_NONCE")) {
- // we're running in a container; use a random number instead!
- nonce = ceph::util::generate_random_number<uint64_t>();
- }
- return nonce;
-}
-
uint64_t Messenger::get_random_nonce()
{
+ // in the past the logic here was more complex -- we were trying
+ // to use the PID but, in the containerized world, it turned out
+ // unreliable. To deal with this, we started guessing whether we
+ // run in a container or not, and of course, got manual lever to
+ // intervene if guessed wrong (CEPH_USE_RANDOM_NONCE).
return ceph::util::generate_random_number<uint64_t>();
}
uint64_t nonce);
static uint64_t get_random_nonce();
- static uint64_t get_pid_nonce();
/**
* create a new messenger