#define STRINGIFY(x) #x
-typedef enum {
- NONE, INT, LONGLONG, STR, DOUBLE, FLOAT, BOOL
-} opt_type_t;
-
-
-
struct config_option {
const char *section;
const char *conf_name;
{ STRINGIFY(section), NULL, STRINGIFY(name), \
&g_conf.name, STRINGIFY(def_val), type, schar }
-#define OPTION_STR(section, name, schar, type, def_val) \
+#define OPTION_OPT_STR(section, name, schar, type, def_val) \
{ STRINGIFY(section), NULL, STRINGIFY(name), \
&g_conf.name, def_val, type, schar }
-#define OPTION_BOOL OPTION_DEF
-#define OPTION_INT OPTION_DEF
-#define OPTION_LONGLONG OPTION_DEF
-#define OPTION_FLOAT OPTION_DEF
-#define OPTION_DOUBLE OPTION_DEF
+#define OPTION_OPT_BOOL OPTION_DEF
+#define OPTION_OPT_INT OPTION_DEF
+#define OPTION_OPT_LONGLONG OPTION_DEF
+#define OPTION_OPT_FLOAT OPTION_DEF
+#define OPTION_OPT_DOUBLE OPTION_DEF
#define OPTION(name, schar, type, def_val) OPTION_##type("global", name, schar, type, def_val)
&g_conf.name, STRINGIFY(def_val), type, schar }
static struct config_option config_optionsp[] = {
- OPTION(num_mon, 0, INT, 1),
- OPTION(num_mds, 0, INT, 1),
- OPTION(num_osd, 0, INT, 4),
- OPTION(num_client, 0, INT, 1),
- OPTION(monmap, 'M', STR, 0),
- OPTION(mon_host, 'm', STR, 0),
- OPTION(daemonize, 'd', BOOL, false),
- OPTION(logger, 0, BOOL, true),
- OPTION(logger_interval, 0, INT, 1),
- OPTION(logger_calc_variance, 0, BOOL, true),
- OPTION(logger_subdir, 0, STR, 0),
- OPTION(logger_dir, 0, STR, INSTALL_PREFIX "/var/log/ceph/stat"),
- OPTION(log_dir, 0, STR, INSTALL_PREFIX "/var/log/ceph"), // if daemonize == true
- OPTION(log_sym_dir, 0, STR, INSTALL_PREFIX "/var/log/ceph"), // if daemonize == true
- OPTION(log_to_stdout, 0, BOOL, true),
- OPTION(pid_file, 'p', STR, 0),
- OPTION(conf, 'c', STR, INSTALL_PREFIX "/etc/ceph/ceph.conf"),
- OPTION(chdir, 0, STR, "/"),
- OPTION(fake_clock, 0, BOOL, false),
- OPTION(fakemessenger_serialize, 0, BOOL, true),
- OPTION(kill_after, 0, INT, 0),
- OPTION(debug, 0, INT, 0),
- OPTION(debug_lockdep, 0, INT, 0),
- OPTION(debug_mds, 0, INT, 1),
- OPTION(debug_mds_balancer, 0, INT, 1),
- OPTION(debug_mds_log, 0, INT, 1),
- OPTION(debug_mds_log_expire, 0, INT, 1),
- OPTION(debug_mds_migrator, 0, INT, 1),
- OPTION(debug_buffer, 0, INT, 0),
- OPTION(debug_timer, 0, INT, 0),
- OPTION(debug_filer, 0, INT, 0),
- OPTION(debug_objecter, 0, INT, 0),
- OPTION(debug_journaler, 0, INT, 0),
- OPTION(debug_objectcacher, 0, INT, 0),
- OPTION(debug_client, 0, INT, 0),
- OPTION(debug_osd, 0, INT, 0),
- OPTION(debug_ebofs, 0, INT, 1),
- OPTION(debug_filestore, 0, INT, 1),
- OPTION(debug_journal, 0, INT, 1),
- OPTION(debug_bdev, 0, INT, 1), // block device
- OPTION(debug_ns, 0, INT, 0),
- OPTION(debug_ms, 0, INT, 0),
- OPTION(debug_mon, 0, INT, 1),
- OPTION(debug_paxos, 0, INT, 0),
- OPTION(debug_tp, 0, INT, 0),
- OPTION(clock_lock, 0, BOOL, false),
- OPTION(clock_tare, 0, BOOL, false),
- OPTION(ms_tcp_nodelay, 0, BOOL, true),
- OPTION(ms_retry_interval, 0, DOUBLE, 2.0), // how often to attempt reconnect
- OPTION(ms_fail_interval, 0, DOUBLE, 15.0), // fail after this long
- OPTION(ms_die_on_failure, 0, BOOL, false),
- OPTION(ms_nocrc, 0, BOOL, false),
- OPTION(mon_data, 0, STR, ""),
- OPTION(mon_tick_interval, 0, INT, 5),
- OPTION(mon_osd_down_out_interval, 0, INT, 5), // seconds
- OPTION(mon_lease, 0, FLOAT, 5), // lease interval
- OPTION(mon_lease_renew_interval, 0, FLOAT, 3), // on leader, to renew the lease
- OPTION(mon_lease_ack_timeout, 0, FLOAT, 10.0), // on leader, if lease isn't acked by all peons
- OPTION(mon_lease_timeout, 0, FLOAT, 10.0), // on peon, if lease isn't extended
- OPTION(mon_accept_timeout, 0, FLOAT, 10.0), // on leader, if paxos update isn't accepted
- OPTION(mon_stop_on_last_unmount, 0, BOOL, false),
- OPTION(mon_stop_with_last_mds, 0, BOOL, false),
- OPTION(mon_allow_mds_bully, 0, BOOL, false), // allow a booting mds to (forcibly) claim an mds # .. FIXME
- OPTION(mon_pg_create_interval, 0, FLOAT, 30.0), // no more than every 30s
- OPTION(paxos_propose_interval, 0, DOUBLE, 1.0), // gather updates for this long before proposing a map update
- OPTION(paxos_observer_timeout, 0, DOUBLE, 5*60), // gather updates for this long before proposing a map update
- OPTION(client_cache_size, 0, INT, 1000),
- OPTION(client_cache_mid, 0, FLOAT, .5),
- OPTION(client_cache_stat_ttl, 0, INT, 0), // seconds until cached stat results become invalid
- OPTION(client_cache_readdir_ttl, 0, INT, 1), // 1 second only
- OPTION(client_use_random_mds, 0, BOOL, false),
- OPTION(client_mount_timeout, 0, DOUBLE, 10.0), // retry every N seconds
- OPTION(client_tick_interval, 0, DOUBLE, 1.0),
- OPTION(client_hack_balance_reads, 0, BOOL, false),
- OPTION(client_trace, 0, STR, 0),
- OPTION(client_readahead_min, 0, LONGLONG, 128*1024), // readahead at _least_ this much.
- OPTION(client_readahead_max_bytes, 0, LONGLONG, 0), //8 * 1024*1024,
- OPTION(client_readahead_max_periods, 0, LONGLONG, 4), // as multiple of file layout period (object size * num stripes)
- OPTION(client_snapdir, 0, STR, ".snap"),
- OPTION(fuse_direct_io, 0, INT, 0),
- OPTION(fuse_ll, 0, BOOL, true),
- OPTION(client_oc, 0, BOOL, true),
- OPTION(client_oc_size, 0, INT, 1024*1024* 64), // MB * n
- OPTION(client_oc_max_dirty, 0, INT, 1024*1024* 48), // MB * n (dirty OR tx.. bigish)
- OPTION(client_oc_target_dirty, 0, INT, 1024*1024* 8), // target dirty (keep this smallish)
+ OPTION(num_mon, 0, OPT_INT, 1),
+ OPTION(num_mds, 0, OPT_INT, 1),
+ OPTION(num_osd, 0, OPT_INT, 4),
+ OPTION(num_client, 0, OPT_INT, 1),
+ OPTION(monmap, 'M', OPT_STR, 0),
+ OPTION(mon_host, 'm', OPT_STR, 0),
+ OPTION(daemonize, 'd', OPT_BOOL, false),
+ OPTION(logger, 0, OPT_BOOL, true),
+ OPTION(logger_interval, 0, OPT_INT, 1),
+ OPTION(logger_calc_variance, 0, OPT_BOOL, true),
+ OPTION(logger_subdir, 0, OPT_STR, 0),
+ OPTION(logger_dir, 0, OPT_STR, INSTALL_PREFIX "/var/log/ceph/stat"),
+ OPTION(log_dir, 0, OPT_STR, INSTALL_PREFIX "/var/log/ceph"), // if daemonize == true
+ OPTION(log_sym_dir, 0, OPT_STR, INSTALL_PREFIX "/var/log/ceph"), // if daemonize == true
+ OPTION(log_to_stdout, 0, OPT_BOOL, true),
+ OPTION(pid_file, 'p', OPT_STR, 0),
+ OPTION(conf, 'c', OPT_STR, INSTALL_PREFIX "/etc/ceph/ceph.conf"),
+ OPTION(chdir, 0, OPT_STR, "/"),
+ OPTION(fake_clock, 0, OPT_BOOL, false),
+ OPTION(fakemessenger_serialize, 0, OPT_BOOL, true),
+ OPTION(kill_after, 0, OPT_INT, 0),
+ OPTION(debug, 0, OPT_INT, 0),
+ OPTION(debug_lockdep, 0, OPT_INT, 0),
+ OPTION(debug_mds, 0, OPT_INT, 1),
+ OPTION(debug_mds_balancer, 0, OPT_INT, 1),
+ OPTION(debug_mds_log, 0, OPT_INT, 1),
+ OPTION(debug_mds_log_expire, 0, OPT_INT, 1),
+ OPTION(debug_mds_migrator, 0, OPT_INT, 1),
+ OPTION(debug_buffer, 0, OPT_INT, 0),
+ OPTION(debug_timer, 0, OPT_INT, 0),
+ OPTION(debug_filer, 0, OPT_INT, 0),
+ OPTION(debug_objecter, 0, OPT_INT, 0),
+ OPTION(debug_journaler, 0, OPT_INT, 0),
+ OPTION(debug_objectcacher, 0, OPT_INT, 0),
+ OPTION(debug_client, 0, OPT_INT, 0),
+ OPTION(debug_osd, 0, OPT_INT, 0),
+ OPTION(debug_ebofs, 0, OPT_INT, 1),
+ OPTION(debug_filestore, 0, OPT_INT, 1),
+ OPTION(debug_journal, 0, OPT_INT, 1),
+ OPTION(debug_bdev, 0, OPT_INT, 1), // block device
+ OPTION(debug_ns, 0, OPT_INT, 0),
+ OPTION(debug_ms, 0, OPT_INT, 0),
+ OPTION(debug_mon, 0, OPT_INT, 1),
+ OPTION(debug_paxos, 0, OPT_INT, 0),
+ OPTION(debug_tp, 0, OPT_INT, 0),
+ OPTION(clock_lock, 0, OPT_BOOL, false),
+ OPTION(clock_tare, 0, OPT_BOOL, false),
+ OPTION(ms_tcp_nodelay, 0, OPT_BOOL, true),
+ OPTION(ms_retry_interval, 0, OPT_DOUBLE, 2.0), // how often to attempt reconnect
+ OPTION(ms_fail_interval, 0, OPT_DOUBLE, 15.0), // fail after this long
+ OPTION(ms_die_on_failure, 0, OPT_BOOL, false),
+ OPTION(ms_nocrc, 0, OPT_BOOL, false),
+ OPTION(mon_data, 0, OPT_STR, ""),
+ OPTION(mon_tick_interval, 0, OPT_INT, 5),
+ OPTION(mon_osd_down_out_interval, 0, OPT_INT, 5), // seconds
+ OPTION(mon_lease, 0, OPT_FLOAT, 5), // lease interval
+ OPTION(mon_lease_renew_interval, 0, OPT_FLOAT, 3), // on leader, to renew the lease
+ OPTION(mon_lease_ack_timeout, 0, OPT_FLOAT, 10.0), // on leader, if lease isn't acked by all peons
+ OPTION(mon_lease_timeout, 0, OPT_FLOAT, 10.0), // on peon, if lease isn't extended
+ OPTION(mon_accept_timeout, 0, OPT_FLOAT, 10.0), // on leader, if paxos update isn't accepted
+ OPTION(mon_stop_on_last_unmount, 0, OPT_BOOL, false),
+ OPTION(mon_stop_with_last_mds, 0, OPT_BOOL, false),
+ OPTION(mon_allow_mds_bully, 0, OPT_BOOL, false), // allow a booting mds to (forcibly) claim an mds # .. FIXME
+ OPTION(mon_pg_create_interval, 0, OPT_FLOAT, 30.0), // no more than every 30s
+ OPTION(paxos_propose_interval, 0, OPT_DOUBLE, 1.0), // gather updates for this long before proposing a map update
+ OPTION(paxos_observer_timeout, 0, OPT_DOUBLE, 5*60), // gather updates for this long before proposing a map update
+ OPTION(client_cache_size, 0, OPT_INT, 1000),
+ OPTION(client_cache_mid, 0, OPT_FLOAT, .5),
+ OPTION(client_cache_stat_ttl, 0, OPT_INT, 0), // seconds until cached stat results become invalid
+ OPTION(client_cache_readdir_ttl, 0, OPT_INT, 1), // 1 second only
+ OPTION(client_use_random_mds, 0, OPT_BOOL, false),
+ OPTION(client_mount_timeout, 0, OPT_DOUBLE, 10.0), // retry every N seconds
+ OPTION(client_tick_interval, 0, OPT_DOUBLE, 1.0),
+ OPTION(client_hack_balance_reads, 0, OPT_BOOL, false),
+ OPTION(client_trace, 0, OPT_STR, 0),
+ OPTION(client_readahead_min, 0, OPT_LONGLONG, 128*1024), // readahead at _least_ this much.
+ OPTION(client_readahead_max_bytes, 0, OPT_LONGLONG, 0), //8 * 1024*1024,
+ OPTION(client_readahead_max_periods, 0, OPT_LONGLONG, 4), // as multiple of file layout period (object size * num stripes)
+ OPTION(client_snapdir, 0, OPT_STR, ".snap"),
+ OPTION(fuse_direct_io, 0, OPT_INT, 0),
+ OPTION(fuse_ll, 0, OPT_BOOL, true),
+ OPTION(client_oc, 0, OPT_BOOL, true),
+ OPTION(client_oc_size, 0, OPT_INT, 1024*1024* 64), // MB * n
+ OPTION(client_oc_max_dirty, 0, OPT_INT, 1024*1024* 48), // MB * n (dirty OR tx.. bigish)
+ OPTION(client_oc_target_dirty, 0, OPT_INT, 1024*1024* 8), // target dirty (keep this smallish)
// note: the max amount of "in flight" dirty data is roughly (max - target)
- OPTION(client_oc_max_sync_write, 0, LONGLONG, 128*1024), // sync writes >= this use wrlock
- OPTION(objecter_buffer_uncommitted, 0, BOOL, true), // this must be true for proper failure handling
- OPTION(objecter_map_request_interval, 0, DOUBLE, 15.0), // request a new map every N seconds, if we have pending io
- OPTION(objecter_tick_interval, 0, DOUBLE, 5.0),
- OPTION(objecter_timeout, 0, DOUBLE, 10.0), // before we ask for a map
- OPTION(journaler_allow_split_entries, 0, BOOL, true),
- OPTION(journaler_safe, 0, BOOL, true), // wait for COMMIT on journal writes
- OPTION(journaler_write_head_interval, 0, INT, 15),
- OPTION(journaler_cache, 0, BOOL, false), // cache writes for later readback
- OPTION(journaler_prefetch_periods, 0, INT, 50), // * journal object size (1~MB? see above)
- OPTION(journaler_batch_interval, 0, DOUBLE, .001), // seconds.. max add'l latency we artificially incur
- OPTION(journaler_batch_max, 0, LONGLONG, 0), // max bytes we'll delay flushing; disable, for now....
- OPTION(mds_cache_size, 0, INT, 300000),
- OPTION(mds_cache_mid, 0, FLOAT, .7),
- OPTION(mds_decay_halflife, 0, FLOAT, 5),
- OPTION(mds_beacon_interval, 0, FLOAT, 4),
- OPTION(mds_beacon_grace, 0, FLOAT, 15),
- OPTION(mds_blacklist_interval, 0, FLOAT, 24.0*60.0), // how long to blacklist failed nodes
- OPTION(mds_session_timeout, 0, FLOAT, 60), // cap bits and leases time out if client idle
- OPTION(mds_session_autoclose, 0, FLOAT, 300), // autoclose idle session
- OPTION(mds_client_lease, 0, FLOAT, 120), // (assuming session stays alive)
- OPTION(mds_reconnect_timeout, 0, FLOAT, 30), // seconds to wait for clients during mds restart
+ OPTION(client_oc_max_sync_write, 0, OPT_LONGLONG, 128*1024), // sync writes >= this use wrlock
+ OPTION(objecter_buffer_uncommitted, 0, OPT_BOOL, true), // this must be true for proper failure handling
+ OPTION(objecter_map_request_interval, 0, OPT_DOUBLE, 15.0), // request a new map every N seconds, if we have pending io
+ OPTION(objecter_tick_interval, 0, OPT_DOUBLE, 5.0),
+ OPTION(objecter_timeout, 0, OPT_DOUBLE, 10.0), // before we ask for a map
+ OPTION(journaler_allow_split_entries, 0, OPT_BOOL, true),
+ OPTION(journaler_safe, 0, OPT_BOOL, true), // wait for COMMIT on journal writes
+ OPTION(journaler_write_head_interval, 0, OPT_INT, 15),
+ OPTION(journaler_cache, 0, OPT_BOOL, false), // cache writes for later readback
+ OPTION(journaler_prefetch_periods, 0, OPT_INT, 50), // * journal object size (1~MB? see above)
+ OPTION(journaler_batch_interval, 0, OPT_DOUBLE, .001), // seconds.. max add'l latency we artificially incur
+ OPTION(journaler_batch_max, 0, OPT_LONGLONG, 0), // max bytes we'll delay flushing; disable, for now....
+ OPTION(mds_cache_size, 0, OPT_INT, 300000),
+ OPTION(mds_cache_mid, 0, OPT_FLOAT, .7),
+ OPTION(mds_decay_halflife, 0, OPT_FLOAT, 5),
+ OPTION(mds_beacon_interval, 0, OPT_FLOAT, 4),
+ OPTION(mds_beacon_grace, 0, OPT_FLOAT, 15),
+ OPTION(mds_blacklist_interval, 0, OPT_FLOAT, 24.0*60.0), // how long to blacklist failed nodes
+ OPTION(mds_session_timeout, 0, OPT_FLOAT, 60), // cap bits and leases time out if client idle
+ OPTION(mds_session_autoclose, 0, OPT_FLOAT, 300), // autoclose idle session
+ OPTION(mds_client_lease, 0, OPT_FLOAT, 120), // (assuming session stays alive)
+ OPTION(mds_reconnect_timeout, 0, OPT_FLOAT, 30), // seconds to wait for clients during mds restart
// make it (mds_session_timeout - mds_beacon_grace)
- OPTION(mds_tick_interval, 0, FLOAT, 5),
- OPTION(mds_scatter_nudge_interval, 0, FLOAT, 5), // how quickly dirstat changes propagate up the hierarchy
- OPTION(mds_client_prealloc_inos, 0, INT, 1000),
- OPTION(mds_early_reply, 0, BOOL, true),
- OPTION(mds_rdcap_ttl_ms, 0, INT, 60*1000),
- OPTION(mds_log, 0, BOOL, true),
- OPTION(mds_log_unsafe, 0, BOOL, false), // only wait for log sync, when it's mostly safe to do so
- OPTION(mds_log_max_events, 0, INT, -1),
- OPTION(mds_log_max_segments, 0, INT, 100), // segment size defined by FileLayout, above
- OPTION(mds_log_max_expiring, 0, INT, 20),
- OPTION(mds_log_pad_entry, 0, INT, 128),
- OPTION(mds_log_eopen_size, 0, INT, 100), // # open inodes per log entry
- OPTION(mds_bal_sample_interval, 0, FLOAT, 3.0), // every 5 seconds
- OPTION(mds_bal_replicate_threshold, 0, FLOAT, 8000),
- OPTION(mds_bal_unreplicate_threshold, 0, FLOAT, 0),
- OPTION(mds_bal_frag, 0, BOOL, true),
- OPTION(mds_bal_split_size, 0, INT, 10000),
- OPTION(mds_bal_split_rd, 0, FLOAT, 25000),
- OPTION(mds_bal_split_wr, 0, FLOAT, 10000),
- OPTION(mds_bal_merge_size, 0, INT, 50),
- OPTION(mds_bal_merge_rd, 0, FLOAT, 1000),
- OPTION(mds_bal_merge_wr, 0, FLOAT, 1000),
- OPTION(mds_bal_interval, 0, INT, 10), // seconds
- OPTION(mds_bal_fragment_interval, 0, INT, -1), // seconds
- OPTION(mds_bal_idle_threshold, 0, FLOAT, 0),
- OPTION(mds_bal_max, 0, INT, -1),
- OPTION(mds_bal_max_until, 0, INT, -1),
- OPTION(mds_bal_mode, 0, INT, 0),
- OPTION(mds_bal_min_rebalance, 0, FLOAT, .1), // must be this much above average before we export anything
- OPTION(mds_bal_min_start, 0, FLOAT, .2), // if we need less than this, we don't do anything
- OPTION(mds_bal_need_min, 0, FLOAT, .8), // take within this range of what we need
- OPTION(mds_bal_need_max, 0, FLOAT, 1.2),
- OPTION(mds_bal_midchunk, 0, FLOAT, .3), // any sub bigger than this taken in full
- OPTION(mds_bal_minchunk, 0, FLOAT, .001), // never take anything smaller than this
- OPTION(mds_trim_on_rejoin, 0, BOOL, true),
- OPTION(mds_shutdown_check, 0, INT, 0),
- OPTION(mds_verify_export_dirauth, 0, BOOL, true),
- OPTION(mds_local_osd, 0, BOOL, false),
- OPTION(mds_thrash_exports, 0, INT, 0),
- OPTION(mds_thrash_fragments, 0, INT, 0),
- OPTION(mds_dump_cache_on_map, 0, BOOL, false),
- OPTION(mds_dump_cache_after_rejoin, 0, BOOL, true),
- OPTION(mds_hack_log_expire_for_better_stats, 0, BOOL, false),
- OPTION(osd_data, 0, STR, ""),
- OPTION(osd_journal, 0, STR, ""),
- OPTION(osd_balance_reads, 0, BOOL, false),
- OPTION(osd_flash_crowd_iat_threshold, 0, INT, 0),
- OPTION(osd_flash_crowd_iat_alpha, 0, DOUBLE, 0.125),
- OPTION(osd_balance_reads_temp, 0, DOUBLE, 100), // send from client to replica
- OPTION(osd_shed_reads, 0, INT, false), // forward from primary to replica
- OPTION(osd_shed_reads_min_latency, 0, DOUBLE, .01), // min local latency
- OPTION(osd_shed_reads_min_latency_diff, 0, DOUBLE, .01), // min latency difference
- OPTION(osd_shed_reads_min_latency_ratio, 0, DOUBLE, 1.5), // 1.2 == 20% higher than peer
- OPTION(osd_immediate_read_from_cache, 0, BOOL, false), // osds to read from the cache immediately?
- OPTION(osd_exclusive_caching, 0, BOOL, true), // replicas evict replicated writes
- OPTION(osd_stat_refresh_interval, 0, DOUBLE, .5),
- OPTION(osd_min_pg_size_without_alive, 0, INT, 2), // smallest pg we allow to activate without telling the monitor
- OPTION(osd_pg_bits, 0, INT, 6), // bits per osd
- OPTION(osd_lpg_bits, 0, INT, 1), // bits per osd
- OPTION(osd_object_layout, 0, INT, CEPH_OBJECT_LAYOUT_HASHINO),
- OPTION(osd_pg_layout, 0, INT, CEPH_PG_LAYOUT_CRUSH),
- OPTION(osd_min_rep, 0, INT, 2),
- OPTION(osd_max_rep, 0, INT, 3),
- OPTION(osd_min_raid_width, 0, INT, 3),
- OPTION(osd_max_raid_width, 0, INT, 2),
- OPTION(osd_maxthreads, 0, INT, 2), // 0 == no threading
- OPTION(osd_max_opq, 0, INT, 10),
- OPTION(osd_age, 0, FLOAT, .8),
- OPTION(osd_age_time, 0, INT, 0),
- OPTION(osd_heartbeat_interval, 0, INT, 1),
- OPTION(osd_mon_heartbeat_interval, 0, INT, 30), // if no peers, ping monitor
- OPTION(osd_heartbeat_grace, 0, INT, 20),
- OPTION(osd_mon_report_interval, 0, INT, 5), // pg stats, failures, up_thru, boot.
- OPTION(osd_replay_window, 0, INT, 45),
- OPTION(osd_max_pull, 0, INT, 2),
- OPTION(osd_preserve_trimmed_log, 0, BOOL, true),
- OPTION(osd_recovery_delay_start, 0, FLOAT, 15),
- OPTION(osd_recovery_max_active, 0, INT, 5),
- OPTION(osd_auto_weight, 0, BOOL, false),
- OPTION(filestore, 0, BOOL, false),
- OPTION(filestore_max_sync_interval, 0, DOUBLE, .2), // seconds
- OPTION(filestore_min_sync_interval, 0, DOUBLE, .001), // seconds
- OPTION(filestore_fake_attrs, 0, BOOL, false),
- OPTION(filestore_fake_collections, 0, BOOL, false),
- OPTION(filestore_dev, 0, STR, 0),
- OPTION(filestore_btrfs_trans, 0, BOOL, true),
- OPTION(ebofs, 0, BOOL, false),
- OPTION(ebofs_cloneable, 0, BOOL, true),
- OPTION(ebofs_verify, 0, BOOL, false),
- OPTION(ebofs_commit_ms, 0, INT, 200), // 0 = no forced commit timeout (for debugging/tracing)
- OPTION(ebofs_oc_size, 0, INT, 10000), // onode cache
- OPTION(ebofs_cc_size, 0, INT, 10000), // cnode cache
- OPTION(ebofs_bc_size, 0, LONGLONG, 50*256), // 4k blocks, *256 for MB
- OPTION(ebofs_bc_max_dirty, 0, LONGLONG, 30*256), // before write() will block
- OPTION(ebofs_max_prefetch, 0, INT, 1000), // 4k blocks
- OPTION(ebofs_realloc, 0, BOOL, false), // hrm, this can cause bad fragmentation, don't use!
- OPTION(ebofs_verify_csum_on_read, 0, BOOL, true),
- OPTION(journal_dio, 0, BOOL, false),
- OPTION(journal_max_write_bytes, 0, INT, 0),
- OPTION(journal_max_write_entries, 0, INT, 100),
- OPTION(bdev_lock, 0, BOOL, true),
- OPTION(bdev_iothreads, 0, INT, 1), // number of ios to queue with kernel
- OPTION(bdev_idle_kick_after_ms, 0, INT, 100), // ms
- OPTION(bdev_el_fw_max_ms, 0, INT, 10000), // restart elevator at least once every 1000 ms
- OPTION(bdev_el_bw_max_ms, 0, INT, 3000), // restart elevator at least once every 300 ms
- OPTION(bdev_el_bidir, 0, BOOL, false), // bidirectional elevator?
- OPTION(bdev_iov_max, 0, INT, 512), // max # iov's to collect into a single readv()/writev() call
- OPTION(bdev_debug_check_io_overlap, 0, BOOL, true), // [DEBUG] check for any pending io overlaps
- OPTION(bdev_fake_mb, 0, INT, 0),
- OPTION(bdev_fake_max_mb, 0, INT, 0),
+ OPTION(mds_tick_interval, 0, OPT_FLOAT, 5),
+ OPTION(mds_scatter_nudge_interval, 0, OPT_FLOAT, 5), // how quickly dirstat changes propagate up the hierarchy
+ OPTION(mds_client_prealloc_inos, 0, OPT_INT, 1000),
+ OPTION(mds_early_reply, 0, OPT_BOOL, true),
+ OPTION(mds_rdcap_ttl_ms, 0, OPT_INT, 60*1000),
+ OPTION(mds_log, 0, OPT_BOOL, true),
+ OPTION(mds_log_unsafe, 0, OPT_BOOL, false), // only wait for log sync, when it's mostly safe to do so
+ OPTION(mds_log_max_events, 0, OPT_INT, -1),
+ OPTION(mds_log_max_segments, 0, OPT_INT, 100), // segment size defined by FileLayout, above
+ OPTION(mds_log_max_expiring, 0, OPT_INT, 20),
+ OPTION(mds_log_pad_entry, 0, OPT_INT, 128),
+ OPTION(mds_log_eopen_size, 0, OPT_INT, 100), // # open inodes per log entry
+ OPTION(mds_bal_sample_interval, 0, OPT_FLOAT, 3.0), // every 5 seconds
+ OPTION(mds_bal_replicate_threshold, 0, OPT_FLOAT, 8000),
+ OPTION(mds_bal_unreplicate_threshold, 0, OPT_FLOAT, 0),
+ OPTION(mds_bal_frag, 0, OPT_BOOL, true),
+ OPTION(mds_bal_split_size, 0, OPT_INT, 10000),
+ OPTION(mds_bal_split_rd, 0, OPT_FLOAT, 25000),
+ OPTION(mds_bal_split_wr, 0, OPT_FLOAT, 10000),
+ OPTION(mds_bal_merge_size, 0, OPT_INT, 50),
+ OPTION(mds_bal_merge_rd, 0, OPT_FLOAT, 1000),
+ OPTION(mds_bal_merge_wr, 0, OPT_FLOAT, 1000),
+ OPTION(mds_bal_interval, 0, OPT_INT, 10), // seconds
+ OPTION(mds_bal_fragment_interval, 0, OPT_INT, -1), // seconds
+ OPTION(mds_bal_idle_threshold, 0, OPT_FLOAT, 0),
+ OPTION(mds_bal_max, 0, OPT_INT, -1),
+ OPTION(mds_bal_max_until, 0, OPT_INT, -1),
+ OPTION(mds_bal_mode, 0, OPT_INT, 0),
+ OPTION(mds_bal_min_rebalance, 0, OPT_FLOAT, .1), // must be this much above average before we export anything
+ OPTION(mds_bal_min_start, 0, OPT_FLOAT, .2), // if we need less than this, we don't do anything
+ OPTION(mds_bal_need_min, 0, OPT_FLOAT, .8), // take within this range of what we need
+ OPTION(mds_bal_need_max, 0, OPT_FLOAT, 1.2),
+ OPTION(mds_bal_midchunk, 0, OPT_FLOAT, .3), // any sub bigger than this taken in full
+ OPTION(mds_bal_minchunk, 0, OPT_FLOAT, .001), // never take anything smaller than this
+ OPTION(mds_trim_on_rejoin, 0, OPT_BOOL, true),
+ OPTION(mds_shutdown_check, 0, OPT_INT, 0),
+ OPTION(mds_verify_export_dirauth, 0, OPT_BOOL, true),
+ OPTION(mds_local_osd, 0, OPT_BOOL, false),
+ OPTION(mds_thrash_exports, 0, OPT_INT, 0),
+ OPTION(mds_thrash_fragments, 0, OPT_INT, 0),
+ OPTION(mds_dump_cache_on_map, 0, OPT_BOOL, false),
+ OPTION(mds_dump_cache_after_rejoin, 0, OPT_BOOL, true),
+ OPTION(mds_hack_log_expire_for_better_stats, 0, OPT_BOOL, false),
+ OPTION(osd_data, 0, OPT_STR, ""),
+ OPTION(osd_journal, 0, OPT_STR, ""),
+ OPTION(osd_balance_reads, 0, OPT_BOOL, false),
+ OPTION(osd_flash_crowd_iat_threshold, 0, OPT_INT, 0),
+ OPTION(osd_flash_crowd_iat_alpha, 0, OPT_DOUBLE, 0.125),
+ OPTION(osd_balance_reads_temp, 0, OPT_DOUBLE, 100), // send from client to replica
+ OPTION(osd_shed_reads, 0, OPT_INT, false), // forward from primary to replica
+ OPTION(osd_shed_reads_min_latency, 0, OPT_DOUBLE, .01), // min local latency
+ OPTION(osd_shed_reads_min_latency_diff, 0, OPT_DOUBLE, .01), // min latency difference
+ OPTION(osd_shed_reads_min_latency_ratio, 0, OPT_DOUBLE, 1.5), // 1.2 == 20% higher than peer
+ OPTION(osd_immediate_read_from_cache, 0, OPT_BOOL, false), // osds to read from the cache immediately?
+ OPTION(osd_exclusive_caching, 0, OPT_BOOL, true), // replicas evict replicated writes
+ OPTION(osd_stat_refresh_interval, 0, OPT_DOUBLE, .5),
+ OPTION(osd_min_pg_size_without_alive, 0, OPT_INT, 2), // smallest pg we allow to activate without telling the monitor
+ OPTION(osd_pg_bits, 0, OPT_INT, 6), // bits per osd
+ OPTION(osd_lpg_bits, 0, OPT_INT, 1), // bits per osd
+ OPTION(osd_object_layout, 0, OPT_INT, CEPH_OBJECT_LAYOUT_HASHINO),
+ OPTION(osd_pg_layout, 0, OPT_INT, CEPH_PG_LAYOUT_CRUSH),
+ OPTION(osd_min_rep, 0, OPT_INT, 2),
+ OPTION(osd_max_rep, 0, OPT_INT, 3),
+ OPTION(osd_min_raid_width, 0, OPT_INT, 3),
+ OPTION(osd_max_raid_width, 0, OPT_INT, 2),
+ OPTION(osd_maxthreads, 0, OPT_INT, 2), // 0 == no threading
+ OPTION(osd_max_opq, 0, OPT_INT, 10),
+ OPTION(osd_age, 0, OPT_FLOAT, .8),
+ OPTION(osd_age_time, 0, OPT_INT, 0),
+ OPTION(osd_heartbeat_interval, 0, OPT_INT, 1),
+ OPTION(osd_mon_heartbeat_interval, 0, OPT_INT, 30), // if no peers, ping monitor
+ OPTION(osd_heartbeat_grace, 0, OPT_INT, 20),
+ OPTION(osd_mon_report_interval, 0, OPT_INT, 5), // pg stats, failures, up_thru, boot.
+ OPTION(osd_replay_window, 0, OPT_INT, 45),
+ OPTION(osd_max_pull, 0, OPT_INT, 2),
+ OPTION(osd_preserve_trimmed_log, 0, OPT_BOOL, true),
+ OPTION(osd_recovery_delay_start, 0, OPT_FLOAT, 15),
+ OPTION(osd_recovery_max_active, 0, OPT_INT, 5),
+ OPTION(osd_auto_weight, 0, OPT_BOOL, false),
+ OPTION(filestore, 0, OPT_BOOL, false),
+ OPTION(filestore_max_sync_interval, 0, OPT_DOUBLE, .2), // seconds
+ OPTION(filestore_min_sync_interval, 0, OPT_DOUBLE, .001), // seconds
+ OPTION(filestore_fake_attrs, 0, OPT_BOOL, false),
+ OPTION(filestore_fake_collections, 0, OPT_BOOL, false),
+ OPTION(filestore_dev, 0, OPT_STR, 0),
+ OPTION(filestore_btrfs_trans, 0, OPT_BOOL, true),
+ OPTION(ebofs, 0, OPT_BOOL, false),
+ OPTION(ebofs_cloneable, 0, OPT_BOOL, true),
+ OPTION(ebofs_verify, 0, OPT_BOOL, false),
+ OPTION(ebofs_commit_ms, 0, OPT_INT, 200), // 0 = no forced commit timeout (for debugging/tracing)
+ OPTION(ebofs_oc_size, 0, OPT_INT, 10000), // onode cache
+ OPTION(ebofs_cc_size, 0, OPT_INT, 10000), // cnode cache
+ OPTION(ebofs_bc_size, 0, OPT_LONGLONG, 50*256), // 4k blocks, *256 for MB
+ OPTION(ebofs_bc_max_dirty, 0, OPT_LONGLONG, 30*256), // before write() will block
+ OPTION(ebofs_max_prefetch, 0, OPT_INT, 1000), // 4k blocks
+ OPTION(ebofs_realloc, 0, OPT_BOOL, false), // hrm, this can cause bad fragmentation, don't use!
+ OPTION(ebofs_verify_csum_on_read, 0, OPT_BOOL, true),
+ OPTION(journal_dio, 0, OPT_BOOL, false),
+ OPTION(journal_max_write_bytes, 0, OPT_INT, 0),
+ OPTION(journal_max_write_entries, 0, OPT_INT, 100),
+ OPTION(bdev_lock, 0, OPT_BOOL, true),
+ OPTION(bdev_iothreads, 0, OPT_INT, 1), // number of ios to queue with kernel
+ OPTION(bdev_idle_kick_after_ms, 0, OPT_INT, 100), // ms
+ OPTION(bdev_el_fw_max_ms, 0, OPT_INT, 10000), // restart elevator at least once every 1000 ms
+ OPTION(bdev_el_bw_max_ms, 0, OPT_INT, 3000), // restart elevator at least once every 300 ms
+ OPTION(bdev_el_bidir, 0, OPT_BOOL, false), // bidirectional elevator?
+ OPTION(bdev_iov_max, 0, OPT_INT, 512), // max # iov's to collect into a single readv()/writev() call
+ OPTION(bdev_debug_check_io_overlap, 0, OPT_BOOL, true), // [DEBUG] check for any pending io overlaps
+ OPTION(bdev_fake_mb, 0, OPT_INT, 0),
+ OPTION(bdev_fake_max_mb, 0, OPT_INT, 0),
};
static bool set_conf_val(void *field, opt_type_t type, const char *val)
{
switch (type) {
- case BOOL:
+ case OPT_BOOL:
if (strcasecmp(val, "false") == 0)
*(bool *)field = false;
else if (strcasecmp(val, "true") == 0)
else
*(bool *)field = (bool)atoi(val);
break;
- case INT:
+ case OPT_INT:
*(int *)field = atoi(val);
break;
- case LONGLONG:
+ case OPT_LONGLONG:
*(long long *)field = atoll(val);
break;
- case STR:
+ case OPT_STR:
if (val)
*(char **)field = strdup(val);
else
*(char **)field = NULL;
break;
- case FLOAT:
+ case OPT_FLOAT:
*(float *)field = atof(val);
break;
- case DOUBLE:
+ case OPT_DOUBLE:
*(double *)field = strtod(val, NULL);
break;
default:
}
#define OPT_READ_TYPE(section, var, type, inout) \
- cf->read(section, var, (type *)inout, *(type *)inout)
+ cf->read(section, var, (type *)inout, NULL)
-void parse_config_file(ConfFile *cf, bool auto_update, const char *module_type, const char *module_id)
+int conf_read_key(const char *alt_section, const char *key, opt_type_t type, void *out)
{
- int opt_len = sizeof(config_optionsp)/sizeof(config_option);
- int s = 0;
+ int s;
int ret;
- char *module_name = NULL, *module_alt_name = NULL;
-
- cf->set_auto_update(false);
- cf->set_post_process_func(conf_post_process_val);
- cf->parse();
-
- if (module_id) {
- module_name = (char *)malloc(strlen(module_type) + strlen(module_id) + 2);
- sprintf(module_name, "%s.%s", module_type, module_id);
- module_alt_name = (char *)malloc(strlen(module_type) + strlen(module_id) + 1);
- sprintf(module_alt_name, "%s%s", module_type, module_id);
- } else {
- if (module_type) {
- module_name = strdup(module_type);
- }
- }
-
- for (int i=0; i<opt_len; i++) {
- for (s=0; s<5; s++) {
- config_option *opt = &config_optionsp[i];
- const char *section;
-
- switch (s) {
- case 0:
- section = module_name;
- if (section)
- break;
- case 1:
- section = module_alt_name;
- if (section)
- break;
- case 2:
+ for (s=0; s<5; s++) {
+ const char *section;
+
+ switch (s) {
+ case 0:
+ section = g_conf.name;
+ if (section)
+ break;
+ case 1:
+ section = g_conf.alt_name;
+ if (section)
+ break;
+ case 2:
s = 2;
- section = module_type;
+ section = g_conf.type;
if (section)
break;
- case 3:
+ case 3:
s = 3;
- section = opt->section;
+ section = alt_section;
if (section)
break;
- default:
- cf->set_auto_update(true);
+ default:
s = 4;
section = "global";
- }
+ }
- switch (opt->type) {
- case STR:
- ret = OPT_READ_TYPE(section, opt->conf_name, char *, opt->val_ptr);
- break;
- case BOOL:
- ret = OPT_READ_TYPE(section, opt->conf_name, bool, opt->val_ptr);
- break;
- case INT:
- ret = OPT_READ_TYPE(section, opt->conf_name, int, opt->val_ptr);
- break;
- case FLOAT:
- ret = OPT_READ_TYPE(section, opt->conf_name, float, opt->val_ptr);
- break;
- case DOUBLE:
- ret = OPT_READ_TYPE(section, opt->conf_name, double, opt->val_ptr);
- break;
- default:
+ switch (type) {
+ case OPT_STR:
+ ret = OPT_READ_TYPE(section, key, char *, out);
+ break;
+ case OPT_BOOL:
+ ret = OPT_READ_TYPE(section, key, bool, out);
+ break;
+ case OPT_INT:
+ ret = OPT_READ_TYPE(section, key, int, out);
+ break;
+ case OPT_FLOAT:
+ ret = OPT_READ_TYPE(section, key, float, out);
+ break;
+ case OPT_DOUBLE:
+ ret = OPT_READ_TYPE(section, key, double, out);
+ break;
+ default:
ret = 0;
break;
- }
+ }
- if (ret)
+ if (ret)
break;
- }
}
-
+
+ return ret;
}
+void parse_config_file(ConfFile *cf, bool auto_update)
+{
+ int opt_len = sizeof(config_optionsp)/sizeof(config_option);
+
+ cf->set_auto_update(false);
+ cf->set_post_process_func(conf_post_process_val);
+ cf->parse();
+
+ for (int i=0; i<opt_len; i++) {
+ config_option *opt = &config_optionsp[i];
+ conf_read_key(NULL, opt->conf_name, opt->type, opt->val_ptr);
+ }
+}
+
+
+
+
void parse_startup_config_options(std::vector<const char*>& args, const char *module_type)
{
unsigned int val_pos;
SET_ARG_VAL(dest, type); \
} while (0)
#define SET_BOOL_ARG_VAL(dest) \
- set_conf_val(dest, BOOL, (val_pos ? &args[i][val_pos] : "true"))
+ set_conf_val(dest, OPT_BOOL, (val_pos ? &args[i][val_pos] : "true"))
#define CMD_EQ(str_cmd, char_cmd) \
cmd_equals(args[i], str_cmd, char_cmd, &val_pos)
if (CMD_EQ("conf", 'c')) {
- SAFE_SET_ARG_VAL(&g_conf.conf, STR);
+ SAFE_SET_ARG_VAL(&g_conf.conf, OPT_STR);
} else if (CMD_EQ("monmap", 'M')) {
- SAFE_SET_ARG_VAL(&g_conf.monmap, STR);
+ SAFE_SET_ARG_VAL(&g_conf.monmap, OPT_STR);
} else if (CMD_EQ("bind", 0)) {
assert_warn(parse_ip_port(args[++i], g_my_addr));
} else if (CMD_EQ("nodaemon", 'D')) {
} else if (CMD_EQ("show_conf", 'S')) {
show_config = true;
} else if (CMD_EQ("id", 'i')) {
- SAFE_SET_ARG_VAL(&g_conf.id, STR);
+ SAFE_SET_ARG_VAL(&g_conf.id, OPT_STR);
} else {
nargs.push_back(args[i]);
}
if (g_conf.id) {
g_conf.name = (char *)malloc(strlen(module_type) + strlen(g_conf.id) + 2);
sprintf(g_conf.name, "%s.%s", g_conf.type, g_conf.id);
+ g_conf.alt_name = (char *)malloc(strlen(module_type) + strlen(g_conf.id) + 1);
+ sprintf(g_conf.alt_name, "%s%s", module_type, g_conf.id);
} else {
g_conf.name = g_conf.type;
}
cf = new ConfFile(g_conf.conf);
- parse_config_file(cf, true, g_conf.type, g_conf.id);
+ parse_config_file(cf, true);
if (show_config) {
cf->dump();
for (optn = 0; optn < opt_len; optn++) {
if (CMD_EQ("lockdep", '\0')) {
- SAFE_SET_ARG_VAL(&g_lockdep, INT);
+ SAFE_SET_ARG_VAL(&g_lockdep, OPT_INT);
} else if (cmd_equals(args[i],
config_optionsp[optn].name,
config_optionsp[optn].char_option,
&val_pos)) {
- if (isarg || val_pos || config_optionsp[optn].type == BOOL)
+ if (isarg || val_pos || config_optionsp[optn].type == OPT_BOOL)
SET_ARG_VAL(config_optionsp[optn].val_ptr, config_optionsp[optn].type);
else
continue;