From: John Spray Date: Fri, 17 Apr 2015 20:23:23 +0000 (+0100) Subject: tools: refactor objectstore tool X-Git-Tag: v9.0.3~203^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d26086d35aa869807b70539d2871753cd3be18b2;p=ceph.git tools: refactor objectstore tool ...to expose the definitions of the object dump format (RadosDump) for use in rados import/export. The serialized->RADOS code is now RadosImport, and the serialized->ObjectStore code is ObjectStoreTool. This is a step toward #9964, which should use the same on serialization format as the existing objectstore-tool dumps. Signed-off-by: John Spray --- diff --git a/src/tools/Makefile-server.am b/src/tools/Makefile-server.am index 12d22f011da4..2dc27e176cc9 100644 --- a/src/tools/Makefile-server.am +++ b/src/tools/Makefile-server.am @@ -14,7 +14,7 @@ bin_DEBUGPROGRAMS += ceph-kvstore-tool if WITH_OSD -ceph_objectstore_tool_SOURCES = tools/ceph_objectstore_tool.cc +ceph_objectstore_tool_SOURCES = tools/ceph_objectstore_tool.cc tools/RadosImport.cc tools/RadosDump.cc ceph_objectstore_tool_LDADD = $(LIBOSD) $(LIBOS) $(CEPH_GLOBAL) $(BOOST_PROGRAM_OPTIONS_LIBS) $(LIBRADOS) if LINUX ceph_objectstore_tool_LDADD += -ldl diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am index 6633a271dd07..df3ed892aac3 100644 --- a/src/tools/Makefile.am +++ b/src/tools/Makefile.am @@ -54,4 +54,7 @@ noinst_HEADERS += \ tools/cephfs/Dumper.h \ tools/cephfs/TableTool.h \ tools/cephfs/MDSUtility.h \ - tools/rados/rados_sync.h + tools/rados/rados_sync.h \ + tools/RadosDump.h \ + tools/RadosImport.h\ + tools/ceph_objectstore_tool.h diff --git a/src/tools/RadosDump.cc b/src/tools/RadosDump.cc new file mode 100644 index 000000000000..695f40826425 --- /dev/null +++ b/src/tools/RadosDump.cc @@ -0,0 +1,167 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "RadosDump.h" + +int RadosDump::read_super() +{ + bufferlist ebl; + bufferlist::iterator ebliter = ebl.begin(); + ssize_t bytes; + + bytes = ebl.read_fd(file_fd, super_header::FIXED_LENGTH); + if ((size_t)bytes != super_header::FIXED_LENGTH) { + cerr << "Unexpected EOF" << std::endl; + return EFAULT; + } + + sh.decode(ebliter); + + return 0; +} + + +int RadosDump::get_header(header *h) +{ + assert (h != NULL); + + bufferlist ebl; + bufferlist::iterator ebliter = ebl.begin(); + ssize_t bytes; + + bytes = ebl.read_fd(file_fd, sh.header_size); + if ((size_t)bytes != sh.header_size) { + cerr << "Unexpected EOF" << std::endl; + return EFAULT; + } + + h->decode(ebliter); + + return 0; +} + +int RadosDump::get_footer(footer *f) +{ + assert(f != NULL); + + bufferlist ebl; + bufferlist::iterator ebliter = ebl.begin(); + ssize_t bytes; + + bytes = ebl.read_fd(file_fd, sh.footer_size); + if ((size_t)bytes != sh.footer_size) { + cerr << "Unexpected EOF" << std::endl; + return EFAULT; + } + + f->decode(ebliter); + + if (f->magic != endmagic) { + cerr << "Bad footer magic" << std::endl; + return EFAULT; + } + + return 0; +} + +int RadosDump::read_section(sectiontype_t *type, bufferlist *bl) +{ + header hdr; + ssize_t bytes; + + int ret = get_header(&hdr); + if (ret) + return ret; + + *type = hdr.type; + + bl->clear(); + bytes = bl->read_fd(file_fd, hdr.size); + if (bytes != hdr.size) { + cerr << "Unexpected EOF" << std::endl; + return EFAULT; + } + + if (hdr.size > 0) { + footer ft; + ret = get_footer(&ft); + if (ret) + return ret; + } + + return 0; +} + + +int RadosDump::skip_object(bufferlist &bl) +{ + bufferlist::iterator ebliter = bl.begin(); + bufferlist ebl; + bool done = false; + while(!done) { + sectiontype_t type; + int ret = read_section(&type, &ebl); + if (ret) + return ret; + + ebliter = ebl.begin(); + if (type >= END_OF_TYPES) { + cout << "Skipping unknown object section type" << std::endl; + continue; + } + switch(type) { + case TYPE_DATA: + case TYPE_ATTRS: + case TYPE_OMAP_HDR: + case TYPE_OMAP: +#ifdef DIAGNOSTIC + cerr << "Skip type " << (int)type << std::endl; +#endif + break; + case TYPE_OBJECT_END: + done = true; + break; + default: + return EFAULT; + } + } + return 0; +} + +//Write super_header with its fixed 16 byte length +void RadosDump::write_super() +{ + if (dry_run) { + return; + } + + bufferlist superbl; + super_header sh; + footer ft; + + header hdr(TYPE_NONE, 0); + hdr.encode(superbl); + + sh.magic = super_header::super_magic; + sh.version = super_header::super_ver; + sh.header_size = superbl.length(); + superbl.clear(); + ft.encode(superbl); + sh.footer_size = superbl.length(); + superbl.clear(); + + sh.encode(superbl); + assert(super_header::FIXED_LENGTH == superbl.length()); + superbl.write_fd(file_fd); +} diff --git a/src/tools/RadosDump.h b/src/tools/RadosDump.h new file mode 100644 index 000000000000..60086ffd4f71 --- /dev/null +++ b/src/tools/RadosDump.h @@ -0,0 +1,397 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef RADOS_DUMP_H_ +#define RADOS_DUMP_H_ + +#include + +#include "include/buffer.h" +#include "include/encoding.h" + +#include "osd/osd_types.h" +#include "osd/OSDMap.h" + +typedef uint8_t sectiontype_t; +typedef uint32_t mymagic_t; +typedef int64_t mysize_t; + +enum { + TYPE_NONE = 0, + TYPE_PG_BEGIN, + TYPE_PG_END, + TYPE_OBJECT_BEGIN, + TYPE_OBJECT_END, + TYPE_DATA, + TYPE_ATTRS, + TYPE_OMAP_HDR, + TYPE_OMAP, + TYPE_PG_METADATA, + TYPE_POOL_BEGIN, + TYPE_POOL_END, + END_OF_TYPES, //Keep at the end +}; + +const uint16_t shortmagic = 0xffce; //goes into stream as "ceff" +//endmagic goes into stream as "ceff ffec" +const mymagic_t endmagic = (0xecff << 16) | shortmagic; + +//The first FIXED_LENGTH bytes are a fixed +//portion of the export output. This includes the overall +//version number, and size of header and footer. +//THIS STRUCTURE CAN ONLY BE APPENDED TO. If it needs to expand, +//the version can be bumped and then anything +//can be added to the export format. +struct super_header { + static const uint32_t super_magic = (shortmagic << 16) | shortmagic; + // ver = 1, Initial version + // ver = 2, Add OSDSuperblock to pg_begin + static const uint32_t super_ver = 2; + static const uint32_t FIXED_LENGTH = 16; + uint32_t magic; + uint32_t version; + uint32_t header_size; + uint32_t footer_size; + + super_header() : magic(0), version(0), header_size(0), footer_size(0) { } + + void encode(bufferlist& bl) const { + ::encode(magic, bl); + ::encode(version, bl); + ::encode(header_size, bl); + ::encode(footer_size, bl); + } + void decode(bufferlist::iterator& bl) { + ::decode(magic, bl); + ::decode(version, bl); + ::decode(header_size, bl); + ::decode(footer_size, bl); + } +}; + +struct header { + sectiontype_t type; + mysize_t size; + header(sectiontype_t type, mysize_t size) : + type(type), size(size) { } + header(): type(0), size(0) { } + + void encode(bufferlist& bl) const { + uint32_t debug_type = (type << 24) | (type << 16) | shortmagic; + ENCODE_START(1, 1, bl); + ::encode(debug_type, bl); + ::encode(size, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + uint32_t debug_type; + DECODE_START(1, bl); + ::decode(debug_type, bl); + type = debug_type >> 24; + ::decode(size, bl); + DECODE_FINISH(bl); + } +}; + +struct footer { + mymagic_t magic; + footer() : magic(endmagic) { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(magic, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(magic, bl); + DECODE_FINISH(bl); + } +}; + +struct pg_begin { + spg_t pgid; + OSDSuperblock superblock; + + pg_begin(spg_t pg, const OSDSuperblock& sb): + pgid(pg), superblock(sb) { } + pg_begin() { } + + void encode(bufferlist& bl) const { + // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then + // shard will be NO_SHARD for a replicated pool. This means + // that we allow the decode by struct_v 2. + ENCODE_START(3, 2, bl); + ::encode(pgid.pgid, bl); + ::encode(superblock, bl); + ::encode(pgid.shard, bl); + ENCODE_FINISH(bl); + } + // NOTE: New super_ver prevents decode from ver 1 + void decode(bufferlist::iterator& bl) { + DECODE_START(3, bl); + ::decode(pgid.pgid, bl); + if (struct_v > 1) { + ::decode(superblock, bl); + } + if (struct_v > 2) { + ::decode(pgid.shard, bl); + } else { + pgid.shard = shard_id_t::NO_SHARD; + } + DECODE_FINISH(bl); + } +}; + +struct object_begin { + ghobject_t hoid; + + // Duplicate what is in the OI_ATTR so we have it at the start + // of object processing. + object_info_t oi; + + object_begin(const ghobject_t &hoid): hoid(hoid) { } + object_begin() { } + + // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then + // generation will be NO_GEN, shard_id will be NO_SHARD for a replicated + // pool. This means we will allow the decode by struct_v 1. + void encode(bufferlist& bl) const { + ENCODE_START(3, 1, bl); + ::encode(hoid.hobj, bl); + ::encode(hoid.generation, bl); + ::encode(hoid.shard_id, bl); + ::encode(oi, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(3, bl); + ::decode(hoid.hobj, bl); + if (struct_v > 1) { + ::decode(hoid.generation, bl); + ::decode(hoid.shard_id, bl); + } else { + hoid.generation = ghobject_t::NO_GEN; + hoid.shard_id = shard_id_t::NO_SHARD; + } + if (struct_v > 2) { + ::decode(oi, bl); + } + DECODE_FINISH(bl); + } +}; + +struct data_section { + uint64_t offset; + uint64_t len; + bufferlist databl; + data_section(uint64_t offset, uint64_t len, bufferlist bl): + offset(offset), len(len), databl(bl) { } + data_section(): offset(0), len(0) { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(offset, bl); + ::encode(len, bl); + ::encode(databl, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(offset, bl); + ::decode(len, bl); + ::decode(databl, bl); + DECODE_FINISH(bl); + } +}; + +struct attr_section { + map data; + attr_section(const map &data) : data(data) { } + attr_section(map &data_) + { + for (std::map::iterator i = data_.begin(); + i != data_.end(); ++i) { + bufferlist bl; + bl.push_front(i->second); + data[i->first] = bl; + } + } + + attr_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(data, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(data, bl); + DECODE_FINISH(bl); + } +}; + +struct omap_hdr_section { + bufferlist hdr; + omap_hdr_section(bufferlist hdr) : hdr(hdr) { } + omap_hdr_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(hdr, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(hdr, bl); + DECODE_FINISH(bl); + } +}; + +struct omap_section { + map omap; + omap_section(const map &omap) : + omap(omap) { } + omap_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(omap, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(omap, bl); + DECODE_FINISH(bl); + } +}; + +struct metadata_section { + // struct_ver is the on-disk version of original pg + __u8 struct_ver; // for reference + epoch_t map_epoch; + pg_info_t info; + pg_log_t log; + map past_intervals; + OSDMap osdmap; + bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking + map divergent_priors; + + metadata_section(__u8 struct_ver, epoch_t map_epoch, const pg_info_t &info, + const pg_log_t &log, map &past_intervals, + map &divergent_priors) + : struct_ver(struct_ver), + map_epoch(map_epoch), + info(info), + log(log), + past_intervals(past_intervals), + divergent_priors(divergent_priors) { } + metadata_section() + : struct_ver(0), + map_epoch(0) { } + + void encode(bufferlist& bl) const { + ENCODE_START(4, 1, bl); + ::encode(struct_ver, bl); + ::encode(map_epoch, bl); + ::encode(info, bl); + ::encode(log, bl); + ::encode(past_intervals, bl); + // Equivalent to osdmap.encode(bl, features); but + // preserving exact layout for CRC checking. + bl.append(osdmap_bl); + ::encode(divergent_priors, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(4, bl); + ::decode(struct_ver, bl); + ::decode(map_epoch, bl); + ::decode(info, bl); + ::decode(log, bl); + if (struct_v > 1) { + ::decode(past_intervals, bl); + } else { + cout << "NOTICE: Older export without past_intervals" << std::endl; + } + if (struct_v > 2) { + osdmap.decode(bl); + } else { + cout << "WARNING: Older export without OSDMap information" << std::endl; + } + if (struct_v > 3) { + ::decode(divergent_priors, bl); + } + DECODE_FINISH(bl); + } +}; + +/** + * Superclass for classes that will need to handle a serialized RADOS + * dump. Requires that the serialized dump be opened with a known FD. + */ +class RadosDump +{ + protected: + int file_fd; + super_header sh; + bool debug; // FIXME set this somewhere + bool dry_run; + + public: + RadosDump(int file_fd_, bool dry_run_) + : file_fd(file_fd_), debug(true), dry_run(dry_run_) + {} + + int read_super(); + int get_header(header *h); + int get_footer(footer *f); + int read_section(sectiontype_t *type, bufferlist *bl); + int skip_object(bufferlist &bl); + void write_super(); + + // Define this in .h because it's templated + template + int write_section(sectiontype_t type, const T& obj, int fd) { + if (dry_run) + return 0; + bufferlist blhdr, bl, blftr; + obj.encode(bl); + header hdr(type, bl.length()); + hdr.encode(blhdr); + footer ft; + ft.encode(blftr); + + int ret = blhdr.write_fd(fd); + if (ret) return ret; + ret = bl.write_fd(fd); + if (ret) return ret; + ret = blftr.write_fd(fd); + return ret; + } + + int write_simple(sectiontype_t type, int fd) + { + if (dry_run) + return 0; + bufferlist hbl; + + header hdr(type, 0); + hdr.encode(hbl); + return hbl.write_fd(fd); + } +}; + +#endif diff --git a/src/tools/RadosImport.cc b/src/tools/RadosImport.cc new file mode 100644 index 000000000000..475822bbfed3 --- /dev/null +++ b/src/tools/RadosImport.cc @@ -0,0 +1,356 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "common/errno.h" + +#include "osd/PGLog.h" +#include "RadosImport.h" + +int RadosImport::import(std::string pool, bool no_overwrite) +{ + bufferlist ebl; + pg_info_t info; + PGLog::IndexedLog log; + + int ret = read_super(); + if (ret) + return ret; + + if (sh.magic != super_header::super_magic) { + cerr << "Invalid magic number: 0x" + << std::hex << sh.magic << " vs. 0x" << super_header::super_magic + << std::dec << std::endl; + return -EFAULT; + } + + if (sh.version > super_header::super_ver) { + cerr << "Can't handle export format version=" << sh.version << std::endl; + return -EINVAL; + } + + //First section must be TYPE_PG_BEGIN + sectiontype_t type; + ret = read_section(&type, &ebl); + if (ret) + return ret; + if (type != TYPE_PG_BEGIN) { + cerr << "Invalid first section type " << type << std::endl; + return -EFAULT; + } + + bufferlist::iterator ebliter = ebl.begin(); + pg_begin pgb; + pgb.decode(ebliter); + spg_t pgid = pgb.pgid; + + if (!pgid.is_no_shard()) { + cerr << "Importing Erasure Coded shard is not supported" << std::endl; + return -EOPNOTSUPP; + } + + if (debug) { + cerr << "Exported features: " << pgb.superblock.compat_features << std::endl; + } + + // XXX: How to check export features? +#if 0 + if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) { + cerr << "Export has incompatible features set " + << pgb.superblock.compat_features << std::endl; + return -EINVAL; + } +#endif + + librados::IoCtx ioctx; + librados::Rados cluster; + + char *id = getenv("CEPH_CLIENT_ID"); + if (id) cerr << "Client id is: " << id << std::endl; + ret = cluster.init(id); + if (ret) { + cerr << "Error " << ret << " in cluster.init" << std::endl; + return ret; + } + ret = cluster.conf_read_file(NULL); + if (ret) { + cerr << "Error " << ret << " in cluster.conf_read_file" << std::endl; + return ret; + } + ret = cluster.conf_parse_env(NULL); + if (ret) { + cerr << "Error " << ret << " in cluster.conf_read_env" << std::endl; + return ret; + } + cluster.connect(); + + ret = cluster.ioctx_create(pool.c_str(), ioctx); + if (ret < 0) { + cerr << "ioctx_create " << pool << " failed with " << ret << std::endl; + return ret; + } + + cout << "Importing from pgid " << pgid << std::endl; + + bool done = false; + bool found_metadata = false; + metadata_section ms; + while(!done) { + ret = read_section(&type, &ebl); + if (ret) + return ret; + + //cout << "do_import: Section type " << hex << type << dec << std::endl; + if (type >= END_OF_TYPES) { + cout << "Skipping unknown section type" << std::endl; + continue; + } + switch(type) { + case TYPE_OBJECT_BEGIN: + ret = get_object_rados(ioctx, ebl, no_overwrite); + if (ret) return ret; + break; + case TYPE_PG_METADATA: + if (debug) + cout << "Don't care about the old metadata" << std::endl; + found_metadata = true; + break; + case TYPE_PG_END: + done = true; + break; + default: + return -EFAULT; + } + } + + if (!found_metadata) { + cerr << "Missing metadata section, ignored" << std::endl; + } + + return 0; +} + +int RadosImport::get_object_rados(librados::IoCtx &ioctx, bufferlist &bl, bool no_overwrite) +{ + bufferlist::iterator ebliter = bl.begin(); + object_begin ob; + ob.decode(ebliter); + map::iterator i; + bufferlist abl; + bool skipping; + + data_section ds; + attr_section as; + omap_hdr_section oh; + omap_section os; + + assert(g_ceph_context); + if (ob.hoid.hobj.nspace == g_ceph_context->_conf->osd_hit_set_namespace) { + cout << "Skipping internal object " << ob.hoid << std::endl; + skip_object(bl); + return 0; + } + + if (!ob.hoid.hobj.is_head()) { + cout << "Skipping non-head for " << ob.hoid << std::endl; + skip_object(bl); + return 0; + } + + ioctx.set_namespace(ob.hoid.hobj.get_namespace()); + + string msg("Write"); + skipping = false; + if (dry_run) { + uint64_t psize; + time_t pmtime; + int ret = ioctx.stat(ob.hoid.hobj.oid.name, &psize, &pmtime); + if (ret == 0) { + if (no_overwrite) + // Could set skipping, but dry-run doesn't change anything either + msg = "Skipping existing"; + else + msg = "***Overwrite***"; + } + } else { + int ret = ioctx.create(ob.hoid.hobj.oid.name, true); + if (ret && ret != -EEXIST) { + cerr << "create failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + if (ret == -EEXIST) { + if (no_overwrite) { + msg = "Skipping existing"; + skipping = true; + } else { + msg = "***Overwrite***"; + ret = ioctx.remove(ob.hoid.hobj.oid.name); + if (ret < 0) { + cerr << "remove failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + ret = ioctx.create(ob.hoid.hobj.oid.name, true); + // If object re-appeared after removal, let's just skip it + if (ret == -EEXIST) { + skipping = true; + msg = "Skipping in-use object"; + ret = 0; + } + if (ret < 0) { + cerr << "create failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + } + } + } + + cout << msg << " " << ob.hoid << std::endl; + + bool need_align = false; + uint64_t alignment = 0; + if (align) { + need_align = true; + alignment = align; + } else { + if ((need_align = ioctx.pool_requires_alignment())) + alignment = ioctx.pool_required_alignment(); + } + + if (debug && need_align) + cerr << "alignment = " << alignment << std::endl; + + bufferlist ebl, databl; + uint64_t in_offset = 0, out_offset = 0; + bool done = false; + while(!done) { + sectiontype_t type; + int ret = read_section(&type, &ebl); + if (ret) + return ret; + + ebliter = ebl.begin(); + //cout << "\tdo_object: Section type " << hex << type << dec << std::endl; + //cout << "\t\tsection size " << ebl.length() << std::endl; + if (type >= END_OF_TYPES) { + cout << "Skipping unknown object section type" << std::endl; + continue; + } + switch(type) { + case TYPE_DATA: + ds.decode(ebliter); + if (debug) + cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl; + if (need_align) { + if (ds.offset != in_offset) { + cerr << "Discontiguous object data in export" << std::endl; + return -EFAULT; + } + assert(ds.databl.length() == ds.len); + databl.claim_append(ds.databl); + in_offset += ds.len; + if (databl.length() >= alignment) { + uint64_t rndlen = uint64_t(databl.length() / alignment) * alignment; + if (debug) cerr << "write offset=" << out_offset << " len=" << rndlen << std::endl; + if (!dry_run && !skipping) { + ret = ioctx.write(ob.hoid.hobj.oid.name, databl, rndlen, out_offset); + if (ret) { + cerr << "write failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + } + out_offset += rndlen; + bufferlist n; + if (databl.length() > rndlen) { + assert(databl.length() - rndlen < alignment); + n.substr_of(databl, rndlen, databl.length() - rndlen); + } + databl = n; + } + break; + } + if (!dry_run && !skipping) { + ret = ioctx.write(ob.hoid.hobj.oid.name, ds.databl, ds.len, ds.offset); + if (ret) { + cerr << "write failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + } + break; + case TYPE_ATTRS: + as.decode(ebliter); + + if (debug) + cerr << "\tattrs: len " << as.data.size() << std::endl; + if (dry_run || skipping) + break; + for (i = as.data.begin(); i != as.data.end(); ++i) { + if (i->first == "_" || i->first == "snapset") + continue; + ret = ioctx.setxattr(ob.hoid.hobj.oid.name, i->first.substr(1).c_str(), i->second); + if (ret) { + cerr << "setxattr failed: " << cpp_strerror(ret) << std::endl; + if (ret != -EOPNOTSUPP) + return ret; + } + } + break; + case TYPE_OMAP_HDR: + oh.decode(ebliter); + + if (debug) + cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length()) + << std::endl; + if (dry_run || skipping) + break; + ret = ioctx.omap_set_header(ob.hoid.hobj.oid.name, oh.hdr); + if (ret) { + cerr << "omap_set_header failed: " << cpp_strerror(ret) << std::endl; + if (ret != -EOPNOTSUPP) + return ret; + } + break; + case TYPE_OMAP: + os.decode(ebliter); + + if (debug) + cerr << "\tomap: size " << os.omap.size() << std::endl; + if (dry_run || skipping) + break; + ret = ioctx.omap_set(ob.hoid.hobj.oid.name, os.omap); + if (ret) { + cerr << "omap_set failed: " << cpp_strerror(ret) << std::endl; + if (ret != -EOPNOTSUPP) + return ret; + } + break; + case TYPE_OBJECT_END: + done = true; + if (need_align && databl.length() > 0) { + assert(databl.length() < alignment); + if (debug) cerr << "END write offset=" << out_offset << " len=" << databl.length() << std::endl; + if (dry_run || skipping) + break; + ret = ioctx.write(ob.hoid.hobj.oid.name, databl, databl.length(), out_offset); + if (ret) { + cerr << "write failed: " << cpp_strerror(ret) << std::endl; + return ret; + } + } + break; + default: + return -EFAULT; + } + } + return 0; +} diff --git a/src/tools/RadosImport.h b/src/tools/RadosImport.h new file mode 100644 index 000000000000..4a2696a51b38 --- /dev/null +++ b/src/tools/RadosImport.h @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef RADOS_IMPORT_H_ +#define RADOS_IMPORT_H_ + +#include + +#include "include/rados/librados.hpp" +#include "include/buffer.h" + +#include "RadosDump.h" + +/** + * Specialization of RadosDump that adds + * methods for importing objects from a stream + * to a live cluster. + */ +class RadosImport : public RadosDump +{ + protected: + uint64_t align; + int get_object_rados(librados::IoCtx &ioctx, bufferlist &bl, bool no_overwrite); + + public: + RadosImport(int file_fd_, uint64_t align_, bool dry_run_) + : RadosDump(file_fd_, dry_run_), align(align_) + {} + + int import(std::string pool, bool no_overwrite); + int import(librados::IoCtx &io_ctx, bool no_overwrite); +}; + +#endif // RADOS_IMPORT_H_ diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index 32ac466a5b15..cd7329493d2b 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -37,29 +37,14 @@ #include "include/rados/librados.hpp" +#include "ceph_objectstore_tool.h" +#include "RadosImport.h" + namespace po = boost::program_options; using namespace std; static coll_t META_COLL("meta"); -enum { - TYPE_NONE = 0, - TYPE_PG_BEGIN, - TYPE_PG_END, - TYPE_OBJECT_BEGIN, - TYPE_OBJECT_END, - TYPE_DATA, - TYPE_ATTRS, - TYPE_OMAP_HDR, - TYPE_OMAP, - TYPE_PG_METADATA, - END_OF_TYPES, //Keep at the end -}; - -//#define INTERNAL_TEST -//#define INTERNAL_TEST2 -//#define INTERNAL_TEST3 - #ifdef INTERNAL_TEST CompatSet get_test_compat_set() { CompatSet::FeatureSet ceph_osd_feature_compat; @@ -83,303 +68,11 @@ CompatSet get_test_compat_set() { } #endif -typedef uint8_t sectiontype_t; -typedef uint32_t mymagic_t; -typedef int64_t mysize_t; const ssize_t max_read = 1024 * 1024; -const uint16_t shortmagic = 0xffce; //goes into stream as "ceff" -//endmagic goes into stream as "ceff ffec" -const mymagic_t endmagic = (0xecff << 16) | shortmagic; const int fd_none = INT_MIN; bool outistty; bool dry_run = false; -//The first FIXED_LENGTH bytes are a fixed -//portion of the export output. This includes the overall -//version number, and size of header and footer. -//THIS STRUCTURE CAN ONLY BE APPENDED TO. If it needs to expand, -//the version can be bumped and then anything -//can be added to the export format. -struct super_header { - static const uint32_t super_magic = (shortmagic << 16) | shortmagic; - // ver = 1, Initial version - // ver = 2, Add OSDSuperblock to pg_begin - static const uint32_t super_ver = 2; - static const uint32_t FIXED_LENGTH = 16; - uint32_t magic; - uint32_t version; - uint32_t header_size; - uint32_t footer_size; - - super_header() : magic(0), version(0), header_size(0), footer_size(0) { } - int read_super(); - - void encode(bufferlist& bl) const { - ::encode(magic, bl); - ::encode(version, bl); - ::encode(header_size, bl); - ::encode(footer_size, bl); - } - void decode(bufferlist::iterator& bl) { - ::decode(magic, bl); - ::decode(version, bl); - ::decode(header_size, bl); - ::decode(footer_size, bl); - } -}; - -struct header { - sectiontype_t type; - mysize_t size; - header(sectiontype_t type, mysize_t size) : - type(type), size(size) { } - header(): type(0), size(0) { } - - int get_header(); - - void encode(bufferlist& bl) const { - uint32_t debug_type = (type << 24) | (type << 16) | shortmagic; - ENCODE_START(1, 1, bl); - ::encode(debug_type, bl); - ::encode(size, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - uint32_t debug_type; - DECODE_START(1, bl); - ::decode(debug_type, bl); - type = debug_type >> 24; - ::decode(size, bl); - DECODE_FINISH(bl); - } -}; - -struct footer { - mymagic_t magic; - footer() : magic(endmagic) { } - - int get_footer(); - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(magic, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); - ::decode(magic, bl); - DECODE_FINISH(bl); - } -}; - -struct pg_begin { - spg_t pgid; - OSDSuperblock superblock; - - pg_begin(spg_t pg, const OSDSuperblock& sb): - pgid(pg), superblock(sb) { } - pg_begin() { } - - void encode(bufferlist& bl) const { - // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then - // shard will be NO_SHARD for a replicated pool. This means - // that we allow the decode by struct_v 2. - ENCODE_START(3, 2, bl); - ::encode(pgid.pgid, bl); - ::encode(superblock, bl); - ::encode(pgid.shard, bl); - ENCODE_FINISH(bl); - } - // NOTE: New super_ver prevents decode from ver 1 - void decode(bufferlist::iterator& bl) { - DECODE_START(3, bl); - ::decode(pgid.pgid, bl); - if (struct_v > 1) { - ::decode(superblock, bl); - } - if (struct_v > 2) { - ::decode(pgid.shard, bl); - } else { - pgid.shard = shard_id_t::NO_SHARD; - } - DECODE_FINISH(bl); - } -}; - -struct object_begin { - ghobject_t hoid; - - // Duplicate what is in the OI_ATTR so we have it at the start - // of object processing. - object_info_t oi; - - object_begin(const ghobject_t &hoid): hoid(hoid) { } - object_begin() { } - - // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then - // generation will be NO_GEN, shard_id will be NO_SHARD for a replicated - // pool. This means we will allow the decode by struct_v 1. - void encode(bufferlist& bl) const { - ENCODE_START(3, 1, bl); - ::encode(hoid.hobj, bl); - ::encode(hoid.generation, bl); - ::encode(hoid.shard_id, bl); - ::encode(oi, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(3, bl); - ::decode(hoid.hobj, bl); - if (struct_v > 1) { - ::decode(hoid.generation, bl); - ::decode(hoid.shard_id, bl); - } else { - hoid.generation = ghobject_t::NO_GEN; - hoid.shard_id = shard_id_t::NO_SHARD; - } - if (struct_v > 2) { - ::decode(oi, bl); - } - DECODE_FINISH(bl); - } -}; - -struct data_section { - uint64_t offset; - uint64_t len; - bufferlist databl; - data_section(uint64_t offset, uint64_t len, bufferlist bl): - offset(offset), len(len), databl(bl) { } - data_section(): offset(0), len(0) { } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(offset, bl); - ::encode(len, bl); - ::encode(databl, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); - ::decode(offset, bl); - ::decode(len, bl); - ::decode(databl, bl); - DECODE_FINISH(bl); - } -}; - -struct attr_section { - map data; - attr_section(const map &data) : data(data) { } - attr_section() { } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(data, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); - ::decode(data, bl); - DECODE_FINISH(bl); - } -}; - -struct omap_hdr_section { - bufferlist hdr; - omap_hdr_section(bufferlist hdr) : hdr(hdr) { } - omap_hdr_section() { } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(hdr, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); - ::decode(hdr, bl); - DECODE_FINISH(bl); - } -}; - -struct omap_section { - map omap; - omap_section(const map &omap) : - omap(omap) { } - omap_section() { } - - void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); - ::encode(omap, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); - ::decode(omap, bl); - DECODE_FINISH(bl); - } -}; - -struct metadata_section { - // struct_ver is the on-disk version of original pg - __u8 struct_ver; // for reference - epoch_t map_epoch; - pg_info_t info; - pg_log_t log; - map past_intervals; - OSDMap osdmap; - bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking - map divergent_priors; - - metadata_section(__u8 struct_ver, epoch_t map_epoch, const pg_info_t &info, - const pg_log_t &log, map &past_intervals, - map &divergent_priors) - : struct_ver(struct_ver), - map_epoch(map_epoch), - info(info), - log(log), - past_intervals(past_intervals), - divergent_priors(divergent_priors) { } - metadata_section() - : struct_ver(0), - map_epoch(0) { } - - void encode(bufferlist& bl) const { - ENCODE_START(4, 1, bl); - ::encode(struct_ver, bl); - ::encode(map_epoch, bl); - ::encode(info, bl); - ::encode(log, bl); - ::encode(past_intervals, bl); - // Equivalent to osdmap.encode(bl, features); but - // preserving exact layout for CRC checking. - bl.append(osdmap_bl); - ::encode(divergent_priors, bl); - ENCODE_FINISH(bl); - } - void decode(bufferlist::iterator& bl) { - DECODE_START(4, bl); - ::decode(struct_ver, bl); - ::decode(map_epoch, bl); - ::decode(info, bl); - ::decode(log, bl); - if (struct_v > 1) { - ::decode(past_intervals, bl); - } else { - cout << "NOTICE: Older export without past_intervals" << std::endl; - } - if (struct_v > 2) { - osdmap.decode(bl); - } else { - cout << "WARNING: Older export without OSDMap information" << std::endl; - } - if (struct_v > 3) { - ::decode(divergent_priors, bl); - } - DECODE_FINISH(bl); - } -}; - struct action_on_object_t { virtual ~action_on_object_t() {} virtual int call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0; @@ -554,25 +247,6 @@ bool debug = false; super_header sh; uint64_t testalign; -template -int write_section(sectiontype_t type, const T& obj, int fd) { - if (dry_run) - return 0; - bufferlist blhdr, bl, blftr; - obj.encode(bl); - header hdr(type, bl.length()); - hdr.encode(blhdr); - footer ft; - ft.encode(blftr); - - int ret = blhdr.write_fd(fd); - if (ret) return ret; - ret = bl.write_fd(fd); - if (ret) return ret; - ret = blftr.write_fd(fd); - return ret; -} - // Convert non-printable characters to '\###' static void cleanbin(string &str) { @@ -596,17 +270,6 @@ static void cleanbin(string &str) return; } -int write_simple(sectiontype_t type, int fd) -{ - if (dry_run) - return 0; - bufferlist hbl; - - header hdr(type, 0); - hdr.encode(hbl); - return hbl.write_fd(fd); -} - static int get_fd_data(int fd, bufferlist &bl) { uint64_t total = 0; @@ -837,45 +500,6 @@ int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid) return r; } -int header::get_header() -{ - bufferlist ebl; - bufferlist::iterator ebliter = ebl.begin(); - ssize_t bytes; - - bytes = ebl.read_fd(file_fd, sh.header_size); - if ((size_t)bytes != sh.header_size) { - cerr << "Unexpected EOF" << std::endl; - return -EFAULT; - } - - decode(ebliter); - - return 0; -} - -int footer::get_footer() -{ - bufferlist ebl; - bufferlist::iterator ebliter = ebl.begin(); - ssize_t bytes; - - bytes = ebl.read_fd(file_fd, sh.footer_size); - if ((size_t)bytes != sh.footer_size) { - cerr << "Unexpected EOF" << std::endl; - return -EFAULT; - } - - decode(ebliter); - - if (magic != endmagic) { - cerr << "Bad footer magic" << std::endl; - return -EFAULT; - } - - return 0; -} - int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info, map &past_intervals) { @@ -917,7 +541,7 @@ void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map } } -int export_file(ObjectStore *store, coll_t cid, ghobject_t &obj) +int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj) { struct stat st; mysize_t total; @@ -1036,7 +660,7 @@ int export_file(ObjectStore *store, coll_t cid, ghobject_t &obj) return 0; } -int export_files(ObjectStore *store, coll_t coll) +int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll) { ghobject_t next; @@ -1079,33 +703,9 @@ int add_osdmap(ObjectStore *store, metadata_section &ms) return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); } -//Write super_header with its fixed 16 byte length -void write_super() -{ - if (dry_run) - return; - bufferlist superbl; - super_header sh; - footer ft; - - header hdr(TYPE_NONE, 0); - hdr.encode(superbl); - - sh.magic = super_header::super_magic; - sh.version = super_header::super_ver; - sh.header_size = superbl.length(); - superbl.clear(); - ft.encode(superbl); - sh.footer_size = superbl.length(); - superbl.clear(); - - sh.encode(superbl); - assert(super_header::FIXED_LENGTH == superbl.length()); - superbl.write_fd(file_fd); -} - -int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, pg_info_t &info, - epoch_t map_epoch, __u8 struct_ver, const OSDSuperblock& superblock, +int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, map &past_intervals) { PGLog::IndexedLog log; @@ -1159,51 +759,6 @@ int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, pg_info_t &info, return 0; } -int super_header::read_super() -{ - bufferlist ebl; - bufferlist::iterator ebliter = ebl.begin(); - ssize_t bytes; - - bytes = ebl.read_fd(file_fd, super_header::FIXED_LENGTH); - if ((size_t)bytes != super_header::FIXED_LENGTH) { - cerr << "Unexpected EOF" << std::endl; - return -EFAULT; - } - - decode(ebliter); - - return 0; -} - -int read_section(int fd, sectiontype_t *type, bufferlist *bl) -{ - header hdr; - ssize_t bytes; - - int ret = hdr.get_header(); - if (ret) - return ret; - - *type = hdr.type; - - bl->clear(); - bytes = bl->read_fd(fd, hdr.size); - if (bytes != hdr.size) { - cerr << "Unexpected EOF" << std::endl; - return -EFAULT; - } - - if (hdr.size > 0) { - footer ft; - ret = ft.get_footer(); - if (ret) - return ret; - } - - return 0; -} - int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid, ObjectStore::Transaction *t, bufferlist &bl) { @@ -1232,11 +787,9 @@ int get_attrs(ObjectStore *store, coll_t coll, ghobject_t hoid, // This could have been handled in the caller if we didn't need to // support exports that didn't include object_info_t in object_begin. if (hoid.hobj.snap < CEPH_MAXSNAP && hoid.generation == ghobject_t::NO_GEN) { - map::iterator mi = as.data.find(OI_ATTR); + map::iterator mi = as.data.find(OI_ATTR); if (mi != as.data.end()) { - bufferlist attr_bl; - attr_bl.push_back(mi->second); - object_info_t oi(attr_bl); + object_info_t oi(mi->second); if (debug) cerr << "object_info " << oi << std::endl; @@ -1277,259 +830,8 @@ int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid, return 0; } -int skip_object(bufferlist &bl) -{ - bufferlist::iterator ebliter = bl.begin(); - bufferlist ebl; - bool done = false; - while(!done) { - sectiontype_t type; - int ret = read_section(file_fd, &type, &ebl); - if (ret) - return ret; - - ebliter = ebl.begin(); - if (type >= END_OF_TYPES) { - cout << "Skipping unknown object section type" << std::endl; - continue; - } - switch(type) { - case TYPE_DATA: - case TYPE_ATTRS: - case TYPE_OMAP_HDR: - case TYPE_OMAP: -#ifdef DIAGNOSTIC - cerr << "Skip type " << (int)type << std::endl; -#endif - break; - case TYPE_OBJECT_END: - done = true; - break; - default: - return -EFAULT; - } - } - return 0; -} - -int get_object_rados(librados::IoCtx &ioctx, bufferlist &bl, bool no_overwrite) -{ - bufferlist::iterator ebliter = bl.begin(); - object_begin ob; - ob.decode(ebliter); - map::iterator i; - bufferlist abl; - bool skipping; - - data_section ds; - attr_section as; - omap_hdr_section oh; - omap_section os; - - assert(g_ceph_context); - if (ob.hoid.hobj.nspace == g_ceph_context->_conf->osd_hit_set_namespace) { - cout << "Skipping internal object " << ob.hoid << std::endl; - skip_object(bl); - return 0; - } - - if (!ob.hoid.hobj.is_head()) { - cout << "Skipping non-head for " << ob.hoid << std::endl; - skip_object(bl); - return 0; - } - - ioctx.set_namespace(ob.hoid.hobj.get_namespace()); - - string msg("Write"); - skipping = false; - if (dry_run) { - uint64_t psize; - time_t pmtime; - int ret = ioctx.stat(ob.hoid.hobj.oid.name, &psize, &pmtime); - if (ret == 0) { - if (no_overwrite) - // Could set skipping, but dry-run doesn't change anything either - msg = "Skipping existing"; - else - msg = "***Overwrite***"; - } - } else { - int ret = ioctx.create(ob.hoid.hobj.oid.name, true); - if (ret && ret != -EEXIST) { - cerr << "create failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - if (ret == -EEXIST) { - if (no_overwrite) { - msg = "Skipping existing"; - skipping = true; - } else { - msg = "***Overwrite***"; - ret = ioctx.remove(ob.hoid.hobj.oid.name); - if (ret < 0) { - cerr << "remove failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - ret = ioctx.create(ob.hoid.hobj.oid.name, true); - // If object re-appeared after removal, let's just skip it - if (ret == -EEXIST) { - skipping = true; - msg = "Skipping in-use object"; - ret = 0; - } - if (ret < 0) { - cerr << "create failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - } - } - } - - cout << msg << " " << ob.hoid << std::endl; - - bool need_align = false; - uint64_t alignment = 0; - if (testalign) { - need_align = true; - alignment = testalign; - } else { - if ((need_align = ioctx.pool_requires_alignment())) - alignment = ioctx.pool_required_alignment(); - } - - if (debug && need_align) - cerr << "alignment = " << alignment << std::endl; - - bufferlist ebl, databl; - uint64_t in_offset = 0, out_offset = 0; - bool done = false; - while(!done) { - sectiontype_t type; - int ret = read_section(file_fd, &type, &ebl); - if (ret) - return ret; - - ebliter = ebl.begin(); - //cout << "\tdo_object: Section type " << hex << type << dec << std::endl; - //cout << "\t\tsection size " << ebl.length() << std::endl; - if (type >= END_OF_TYPES) { - cout << "Skipping unknown object section type" << std::endl; - continue; - } - switch(type) { - case TYPE_DATA: - ds.decode(ebliter); - if (debug) - cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl; - if (need_align) { - if (ds.offset != in_offset) { - cerr << "Discontiguous object data in export" << std::endl; - return -EFAULT; - } - assert(ds.databl.length() == ds.len); - databl.claim_append(ds.databl); - in_offset += ds.len; - if (databl.length() >= alignment) { - uint64_t rndlen = uint64_t(databl.length() / alignment) * alignment; - if (debug) cerr << "write offset=" << out_offset << " len=" << rndlen << std::endl; - if (!dry_run && !skipping) { - ret = ioctx.write(ob.hoid.hobj.oid.name, databl, rndlen, out_offset); - if (ret) { - cerr << "write failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - } - out_offset += rndlen; - bufferlist n; - if (databl.length() > rndlen) { - assert(databl.length() - rndlen < alignment); - n.substr_of(databl, rndlen, databl.length() - rndlen); - } - databl = n; - } - break; - } - if (!dry_run && !skipping) { - ret = ioctx.write(ob.hoid.hobj.oid.name, ds.databl, ds.len, ds.offset); - if (ret) { - cerr << "write failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - } - break; - case TYPE_ATTRS: - as.decode(ebliter); - - if (debug) - cerr << "\tattrs: len " << as.data.size() << std::endl; - if (dry_run || skipping) - break; - for (i = as.data.begin(); i != as.data.end(); ++i) { - if (i->first == "_" || i->first == "snapset") - continue; - abl.clear(); - abl.push_front(i->second); - ret = ioctx.setxattr(ob.hoid.hobj.oid.name, i->first.substr(1).c_str(), abl); - if (ret) { - cerr << "setxattr failed: " << cpp_strerror(ret) << std::endl; - if (ret != -EOPNOTSUPP) - return ret; - } - } - break; - case TYPE_OMAP_HDR: - oh.decode(ebliter); - - if (debug) - cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length()) - << std::endl; - if (dry_run || skipping) - break; - ret = ioctx.omap_set_header(ob.hoid.hobj.oid.name, oh.hdr); - if (ret) { - cerr << "omap_set_header failed: " << cpp_strerror(ret) << std::endl; - if (ret != -EOPNOTSUPP) - return ret; - } - break; - case TYPE_OMAP: - os.decode(ebliter); - - if (debug) - cerr << "\tomap: size " << os.omap.size() << std::endl; - if (dry_run || skipping) - break; - ret = ioctx.omap_set(ob.hoid.hobj.oid.name, os.omap); - if (ret) { - cerr << "omap_set failed: " << cpp_strerror(ret) << std::endl; - if (ret != -EOPNOTSUPP) - return ret; - } - break; - case TYPE_OBJECT_END: - done = true; - if (need_align && databl.length() > 0) { - assert(databl.length() < alignment); - if (debug) cerr << "END write offset=" << out_offset << " len=" << databl.length() << std::endl; - if (dry_run || skipping) - break; - ret = ioctx.write(ob.hoid.hobj.oid.name, databl, databl.length(), out_offset); - if (ret) { - cerr << "write failed: " << cpp_strerror(ret) << std::endl; - return ret; - } - } - break; - default: - return -EFAULT; - } - } - return 0; -} - -int get_object(ObjectStore *store, coll_t coll, bufferlist &bl, OSDMap &curmap, - bool *skipped_objects) +int ObjectStoreTool::get_object(ObjectStore *store, coll_t coll, + bufferlist &bl, OSDMap &curmap, bool *skipped_objects) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1580,7 +882,7 @@ int get_object(ObjectStore *store, coll_t coll, bufferlist &bl, OSDMap &curmap, bool done = false; while(!done) { sectiontype_t type; - int ret = read_section(file_fd, &type, &ebl); + int ret = read_section(&type, &ebl); if (ret) return ret; @@ -1756,128 +1058,6 @@ int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms, return 0; } -int do_import_rados(string pool, bool no_overwrite) -{ - bufferlist ebl; - pg_info_t info; - PGLog::IndexedLog log; - - int ret = sh.read_super(); - if (ret) - return ret; - - if (sh.magic != super_header::super_magic) { - cerr << "Invalid magic number" << std::endl; - return -EFAULT; - } - - if (sh.version > super_header::super_ver) { - cerr << "Can't handle export format version=" << sh.version << std::endl; - return -EINVAL; - } - - //First section must be TYPE_PG_BEGIN - sectiontype_t type; - ret = read_section(file_fd, &type, &ebl); - if (ret) - return ret; - if (type != TYPE_PG_BEGIN) { - cerr << "Invalid first section type " << type << std::endl; - return -EFAULT; - } - - bufferlist::iterator ebliter = ebl.begin(); - pg_begin pgb; - pgb.decode(ebliter); - spg_t pgid = pgb.pgid; - - if (!pgid.is_no_shard()) { - cerr << "Importing Erasure Coded shard is not supported" << std::endl; - myexit(1); - } - - if (debug) { - cerr << "Exported features: " << pgb.superblock.compat_features << std::endl; - } - - // XXX: How to check export features? -#if 0 - if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) { - cerr << "Export has incompatible features set " - << pgb.superblock.compat_features << std::endl; - return -EINVAL; - } -#endif - - librados::IoCtx ioctx; - librados::Rados cluster; - - char *id = getenv("CEPH_CLIENT_ID"); - if (id) cerr << "Client id is: " << id << std::endl; - ret = cluster.init(id); - if (ret) { - cerr << "Error " << ret << " in cluster.init" << std::endl; - return ret; - } - ret = cluster.conf_read_file(NULL); - if (ret) { - cerr << "Error " << ret << " in cluster.conf_read_file" << std::endl; - return ret; - } - ret = cluster.conf_parse_env(NULL); - if (ret) { - cerr << "Error " << ret << " in cluster.conf_read_env" << std::endl; - return ret; - } - cluster.connect(); - - ret = cluster.ioctx_create(pool.c_str(), ioctx); - if (ret < 0) { - cerr << "ioctx_create " << pool << " failed with " << ret << std::endl; - return ret; - } - - cout << "Importing from pgid " << pgid << std::endl; - - bool done = false; - bool found_metadata = false; - metadata_section ms; - while(!done) { - ret = read_section(file_fd, &type, &ebl); - if (ret) - return ret; - - //cout << "do_import: Section type " << hex << type << dec << std::endl; - if (type >= END_OF_TYPES) { - cout << "Skipping unknown section type" << std::endl; - continue; - } - switch(type) { - case TYPE_OBJECT_BEGIN: - ret = get_object_rados(ioctx, ebl, no_overwrite); - if (ret) return ret; - break; - case TYPE_PG_METADATA: - if (debug) - cout << "Don't care about the old metadata" << std::endl; - found_metadata = true; - break; - case TYPE_PG_END: - done = true; - break; - default: - return -EFAULT; - } - } - - if (!found_metadata) { - cerr << "Missing metadata section, ignored" << std::endl; - } - - return 0; -} - - typedef map divergent_priors_t; // out: pg_log_t that only has entries that apply to import_pgid using curmap @@ -1909,7 +1089,8 @@ void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap, } } -int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, string pgidstr) +int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, + bool force, std::string pgidstr) { bufferlist ebl; pg_info_t info; @@ -1919,7 +1100,7 @@ int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, string pgidstr) if (!dry_run) finish_remove_pgs(store); - int ret = sh.read_super(); + int ret = read_super(); if (ret) return ret; @@ -1935,7 +1116,7 @@ int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, string pgidstr) //First section must be TYPE_PG_BEGIN sectiontype_t type; - ret = read_section(file_fd, &type, &ebl); + ret = read_section(&type, &ebl); if (ret) return ret; if (type != TYPE_PG_BEGIN) { @@ -2045,7 +1226,7 @@ int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, string pgidstr) bool found_metadata = false; metadata_section ms; while(!done) { - ret = read_section(file_fd, &type, &ebl); + ret = read_section(&type, &ebl); if (ret) return ret; @@ -2716,7 +1897,7 @@ int main(int argc, char **argv) global_init(NULL, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); - int ret = do_import_rados(pool, no_overwrite); + int ret = RadosImport(file_fd, testalign, dry_run).import(pool, no_overwrite); if (ret == 0) cout << "Import successful" << std::endl; myexit(ret != 0); @@ -2776,6 +1957,8 @@ int main(int argc, char **argv) } } + ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run); + if (vm.count("file") && file_fd == fd_none && !dry_run) { cerr << "--file option only applies to import or export" << std::endl; myexit(1); @@ -3109,7 +2292,7 @@ int main(int argc, char **argv) if (op == "import") { try { - ret = do_import(fs, superblock, force, pgidstr); + ret = tool.do_import(fs, superblock, force, pgidstr); } catch (const buffer::error &e) { cerr << "do_import threw exception error " << e.what() << std::endl; @@ -3423,7 +2606,7 @@ int main(int argc, char **argv) cerr << "struct_v " << (int)struct_ver << std::endl; if (op == "export") { - ret = do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals); + ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals); if (ret == 0) cerr << "Export successful" << std::endl; } else if (op == "info") { diff --git a/src/tools/ceph_objectstore_tool.h b/src/tools/ceph_objectstore_tool.h new file mode 100644 index 000000000000..336fce37b583 --- /dev/null +++ b/src/tools/ceph_objectstore_tool.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_OBJECTSTORE_TOOL_H_ +#define CEPH_OBJECTSTORE_TOOL_H_ + +#include "RadosDump.h" + +class ObjectStoreTool : public RadosDump +{ + public: + ObjectStoreTool(int file_fd, bool dry_run) + : RadosDump(file_fd, dry_run) + {} + + int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, + std::string pgidstr); + int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + map &past_intervals); + int get_object(ObjectStore *store, coll_t coll, + bufferlist &bl, OSDMap &curmap, bool *skipped_objects); + int export_file( + ObjectStore *store, coll_t cid, ghobject_t &obj); + int export_files(ObjectStore *store, coll_t coll); +}; + +#endif // CEPH_OBJECSTORE_TOOL_H_