From: caleb miles Date: Fri, 29 Jun 2012 23:38:16 +0000 (-0700) Subject: CrushTester: add Monte Carlo generator to more accurately simulate a X-Git-Tag: v0.49~35^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d15385b0fe80bad581097a06b81213757de846fe;p=ceph.git CrushTester: add Monte Carlo generator to more accurately simulate a CRUSH mapping using a RNG Signed-off-by: caleb miles --- diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc index 0317d460b2eb..57b3e01f0870 100644 --- a/src/crush/CrushTester.cc +++ b/src/crush/CrushTester.cc @@ -45,8 +45,78 @@ void CrushTester::set_device_weight(int dev, float f) device_weight[dev] = w; } +int CrushTester::get_maximum_affected_by_rule(int ruleno) +{ + // get the number of steps in RULENO + int rule_size = crush.get_rule_len(ruleno); + vector affected_types; + map replications_by_type; + + for (int i = 0; i < rule_size; i++){ + // get what operation is done by the current step + int rule_operation = crush.get_rule_op(ruleno, i); + + // if the operation specifies choosing a device type, store it + if (rule_operation >= 2 && rule_operation != 4){ + int desired_replication = crush.get_rule_arg1(ruleno,i); + int affected_type = crush.get_rule_arg2(ruleno,i); + affected_types.push_back(affected_type); + replications_by_type[affected_type] = desired_replication; + } + } + + /* + * now for each of the affected bucket types, see what is the + * maximum we are (a) requesting or (b) have + */ + + map max_devices_of_type; + // loop through the vector of affected types + for (vector::iterator it = affected_types.begin(); it != affected_types.end(); ++it){ + // loop through the number of buckets looking for affected types + for (map::iterator p = crush.name_map.begin(); p != crush.name_map.end(); p++){ + int bucket_type = crush.get_bucket_type(p->first); + if ( bucket_type == *it) + max_devices_of_type[*it]++; + } + } + for(std::vector::iterator it = affected_types.begin(); it != affected_types.end(); ++it){ + if ( replications_by_type[*it] > 0 && replications_by_type[*it] < max_devices_of_type[*it] ) + max_devices_of_type[*it] = replications_by_type[*it]; + } + + /* + * get the smallest number of buckets available of any type as this is our upper bound on + * the number of replicas we can place + */ + int max_affected = max( crush.get_max_buckets(), crush.get_max_devices() ); + + for(std::vector::iterator it = affected_types.begin(); it != affected_types.end(); ++it){ + if (max_devices_of_type[*it] > 0 && max_devices_of_type[*it] < max_affected ) + max_affected = max_devices_of_type[*it]; + } + + return max_affected; +} + + +map CrushTester::get_collapsed_mapping() +{ + int num_to_check = crush.get_max_devices(); + int next_id = 0; + map collapse_mask; + + for (int i = 0; i < num_to_check; i++){ + if (crush.check_item_present(i)){ + collapse_mask[i] = next_id; + next_id++; + } + } + + return collapse_mask; +} void CrushTester::adjust_weights(vector<__u32>& weight) { @@ -113,6 +183,147 @@ void CrushTester::adjust_weights(vector<__u32>& weight) #endif } +bool CrushTester::check_valid_placement(int ruleno, vector out, const vector<__u32>& weight){ + + bool valid_placement = true; + vector included_devices; + map seen_devices; + + // first do the easy check that all devices are "up" + for (vector::iterator it = out.begin(); it != out.end(); it++){ + if (weight[(*it)] == 0){ + valid_placement = false; + break; + } else if (weight[(*it)] > 0) { + included_devices.push_back( (*it) ); + } + } + + /* + * now do the harder test of checking that the CRUSH rule r is not violated + * we could test that none of the devices mentioned in out are unique, + * but this is a special case of this test + */ + + // get the number of steps in RULENO + int rule_size = crush.get_rule_len(ruleno); + vector affected_types; + + // get the smallest type id, and name + int min_map_type = crush.get_num_type_names(); + for (map::iterator it = crush.type_map.begin(); it != crush.type_map.end(); it++ ){ + if ( (*it).first < min_map_type ){ + min_map_type = (*it).first; + } + } + + string min_map_type_name = crush.type_map[min_map_type]; + + // get the types of devices affected by RULENO + for (int i = 0; i < rule_size; i++){ + // get what operation is done by the current step + int rule_operation = crush.get_rule_op(ruleno, i); + + // if the operation specifies choosing a device type, store it + if (rule_operation >= 2 && rule_operation != 4){ + int affected_type = crush.get_rule_arg2(ruleno,i); + affected_types.push_back( crush.get_type_name(affected_type)); + } + } + + // find out if we are only dealing with osd's + bool only_osd_affected; + if (affected_types.size() == 1){ + if ( (affected_types.back() == min_map_type_name) && (min_map_type_name == "osd") ){ + only_osd_affected = true; + } + } + + // check that we don't have any duplicate id's + for (vector::iterator it = included_devices.begin(); it != included_devices.end(); it++){ + int num_copies = count(included_devices.begin(), included_devices.end(), (*it) ); + if (num_copies > 1){ + valid_placement = false; + } + } + + // if we have more than just osd's affected we need to do a lot more work + if (!only_osd_affected){ + // loop through the devices that are "in/up" + for (vector::iterator it = included_devices.begin(); it != included_devices.end(); it++){ + if (valid_placement == false) + break; + + // create a temporary map of the form (device type, device name in map) + map device_location_hierarchy = crush.get_full_location(*it); + + // loop over the types affected by RULENO looking for duplicate bucket assignments + for (vector::iterator t = affected_types.begin(); t != affected_types.end(); t++){ + if (seen_devices.count( device_location_hierarchy[*t] ) ){ + valid_placement = false; + break; + } else { + // store the devices we have seen in the form of (device name, device type) + seen_devices[ device_location_hierarchy[*t] ] = *t ; + } + } + } + } + + return valid_placement; +} + +int CrushTester::random_placement(int ruleno, vector& out, int maxout, vector<__u32>& weight) +{ + // get the total weight of the system + int total_weight = 0; + for (unsigned i = 0; i < weight.size(); i++) + total_weight += weight[i]; + + // compute each device's proportional weight + vector proportional_weights( weight.size() ); + for (unsigned i = 0; i < weight.size(); i++) + proportional_weights[i] = (float) weight[i] / (float) total_weight; + + +#ifdef HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION + // create a random number generator with the device weights to use for simulating placements + boost::random::discrete_distribution<> dist(proportional_weights); +#endif + + // determine the real maximum number of devices to return + int devices_requested = min(maxout, get_maximum_affected_by_rule(ruleno)); + bool accept_placement = false; + + vector trial_placement(devices_requested); + int attempted_tries = 0; + int max_tries = 100; + do { + // create a vector to hold our trial mappings + int temp_array[devices_requested]; + for (int i = 0; i < devices_requested; i++){ + temp_array[i] = dist(gen); + } + + trial_placement.assign(temp_array, temp_array + devices_requested); + accept_placement = check_valid_placement(ruleno, trial_placement, weight); + attempted_tries++; + } while (accept_placement == false && attempted_tries < max_tries); + + // save our random placement to the out vector + if (accept_placement) + out.assign(trial_placement.begin(), trial_placement.end()); + + // or don't.... + else if (attempted_tries == max_tries) + return -EINVAL; + + return 0; +} + + + + int CrushTester::test() { if (min_rule < 0 || max_rule < 0) { @@ -139,18 +350,14 @@ int CrushTester::test() if (output_utilization_all) err << "devices weights (hex): " << hex << weight << dec << std::endl; - // test ability to retrieve item parent information - if (output_utilization_all) - for (unsigned j = 0; j < weight.size(); j++) - err << "device " << j << " is located at " << crush.get_immediate_parent(j) << endl; - // make adjustments adjust_weights(weight); int num_devices_active = 0; for (vector<__u32>::iterator p = weight.begin(); p != weight.end(); ++p) - num_devices_active++; + if (*p > 0) + num_devices_active++; if (output_choose_tries) crush.start_choose_profile(); @@ -176,13 +383,13 @@ int CrushTester::test() for (int nr = minr; nr <= maxr; nr++) { vector per(crush.get_max_devices()); map sizes; - + int num_objects = ((max_x - min_x) + 1); float num_devices = (float) per.size(); // get the total number of devices, better to cast as a float here #ifdef HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION float test_chi_statistic = 0.0; // our observed chi squared statistic - + // look up the maximum expected chi squared statistic for the 5% and 1% confidence levels float chi_statistic_five_percent = quantile(complement(chi_squared(num_devices_active-1), 0.05)); float chi_statistic_one_percent = quantile(complement(chi_squared(num_devices_active-1), 0.01)); @@ -214,7 +421,7 @@ int CrushTester::test() total_weight += weight[i]; // compute the expected number of objects stored per device in the absence of weighting - float expected_objects = min(nr, num_devices_active) * num_objects; + float expected_objects = min(nr, get_maximum_affected_by_rule(r)) * num_objects; // compute each device's proportional weight vector proportional_weights( per.size() ); @@ -240,7 +447,7 @@ int CrushTester::test() objects_per_batch = (batch_max - batch_min + 1); } - float batch_expected_objects = min(nr, num_devices_active) * objects_per_batch; + float batch_expected_objects = min(nr, get_maximum_affected_by_rule(r)) * objects_per_batch; vector batch_num_objects_expected( per.size() ); for (unsigned i = 0; i < per.size() ; i++) @@ -259,14 +466,13 @@ int CrushTester::test() err << "CRUSH"; // prepend CRUSH to placement output crush.do_rule(r, x, out, nr, weight); } else { + +#ifdef HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION if (output_statistics) err << "RNG"; // prepend RNG to placement output to denote simulation -#ifdef HAVE_BOOST_RANDOM_DISCRETE_DISTRIBUTION - // fill our vector with random numbers representing an OSD ID - // one day we'll worry about duplicate entries, probably - for (int j = 0; j < nr; j++) - out.push_back( dist(gen) ); + // test our new monte carlo placement generator + random_placement(r, out, nr, weight); #endif } diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h index 8c46b5e53381..7c3ca4cf4529 100644 --- a/src/crush/CrushTester.h +++ b/src/crush/CrushTester.h @@ -30,7 +30,10 @@ class CrushTester { void adjust_weights(vector<__u32>& weight); - + int get_maximum_affected_by_rule(int ruleno); + map get_collapsed_mapping(); + bool check_valid_placement(int ruleno, vector out, const vector<__u32>& weight); + int random_placement(int ruleno, vector& out, int maxout, vector<__u32>& weight); public: CrushTester(CrushWrapper& c, ostream& eo, int verbosity=0) diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 9c6deba32a18..341304a7b7bc 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -113,9 +113,81 @@ bool CrushWrapper::check_item_loc(CephContext *cct, int item, map return false; } +map CrushWrapper::get_full_location(int id){ + + map full_location; + pair parent_coord; + parent_coord = get_immediate_parent(id); + int parent_id; + + // read the type map and get the name of the type with the largest ID + int high_type = 0; + for (map::iterator it = type_map.begin(); it != type_map.end(); it++){ + if ( (*it).first > high_type ) + high_type = (*it).first; + } + + string high_type_name = type_map[high_type]; + + full_location[ parent_coord.first ] = parent_coord.second; + parent_id = get_item_id( (parent_coord.second).c_str() ); + + + while (parent_coord.first != high_type_name) { + parent_coord = get_immediate_parent(parent_id); + full_location[ parent_coord.first ] = parent_coord.second; + if ( parent_coord.first != high_type_name ){ + parent_id = get_item_id( (parent_coord.second).c_str() ); + } + } + + return full_location; +} + + +map CrushWrapper::get_parent_hierarchy(int id) +{ + map parent_hierarchy; + pair parent_coord = get_immediate_parent(id); + int parent_id; + + // get the integer type for id and create a counter from there + int type_counter = get_bucket_type(id); + + // if we get a negative type then we can assume that we have an OSD + // change behavior in get_item_type FIXME + if (type_counter < 0) + type_counter = 0; + + // read the type map and get the name of the type with the largest ID + int high_type = 0; + for (map::iterator it = type_map.begin(); it != type_map.end(); it++){ + if ( (*it).first > high_type ) + high_type = (*it).first; + } + + parent_id = get_item_id((parent_coord.second).c_str()); + + while (type_counter < high_type) { + type_counter++; + parent_hierarchy[ type_counter ] = parent_coord.first; + + if (type_counter < high_type){ + // get the coordinate information for the next parent + parent_coord = get_immediate_parent(parent_id); + parent_id = get_item_id(parent_coord.second.c_str()); + } + } + + return parent_hierarchy; +} + + + int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name, map& loc) // typename -> bucketname { + ldout(cct, 5) << "insert_item item " << item << " weight " << weight << " name " << name << " loc " << loc << dendl; @@ -152,7 +224,7 @@ int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string n // add to an existing bucket int id = get_item_id(loc[p->second].c_str()); if (!bucket_exists(id)) { - ldout(cct, 1) << "insert_item don't have bucket " << id << dendl; + ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl; return -EINVAL; } diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 847574165630..3a6acdde9c41 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -217,8 +217,28 @@ public: } + /** + * returns the (type, name) of the parent bucket of id + */ pair get_immediate_parent(int id); + /** + * get the fully qualified location of a device by successively finding + * parents beginning at ID and ending at highest type number specified in + * the CRUSH map which assumes that if device foo is under device bar, the + * type_id of foo < bar where type_id is the integer specified in the CRUSH map + * + * returns the location in the form of (type=foo) where type is a type of bucket + * specified in the CRUSH map and foo is a name specified in the CRUSH map + */ + map get_full_location(int id); + + /** + * returns (type_id, type) of all parent buckets between id and + * default, can be used to check for anomolous CRUSH maps + */ + map get_parent_hierarchy(int id); + /** * insert an item into the map at a specific position