]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd_recover_tool: move rbd_recover_tool directory to src/tools subdirectory 3611/head
authorMin Chen <minchen@ubuntukylin.com>
Thu, 5 Feb 2015 00:25:39 +0000 (08:25 +0800)
committerMin Chen <minchen@ubuntukylin.com>
Thu, 5 Feb 2015 00:25:39 +0000 (08:25 +0800)
Signed-off-by: Min Chen <minchen@ubuntukylin.com>
26 files changed:
src/rbd_recover_tool/FAQ [deleted file]
src/rbd_recover_tool/README [deleted file]
src/rbd_recover_tool/TODO [deleted file]
src/rbd_recover_tool/common_h [deleted file]
src/rbd_recover_tool/config/mds_host [deleted file]
src/rbd_recover_tool/config/mon_host [deleted file]
src/rbd_recover_tool/config/osd_host_path [deleted file]
src/rbd_recover_tool/database_h [deleted file]
src/rbd_recover_tool/epoch_h [deleted file]
src/rbd_recover_tool/metadata_h [deleted file]
src/rbd_recover_tool/osd_job [deleted file]
src/rbd_recover_tool/rbd-recover-tool [deleted file]
src/rbd_recover_tool/test_rbd_recover_tool.sh [deleted file]
src/tools/rbd_recover_tool/FAQ [new file with mode: 0644]
src/tools/rbd_recover_tool/README [new file with mode: 0644]
src/tools/rbd_recover_tool/TODO [new file with mode: 0644]
src/tools/rbd_recover_tool/common_h [new file with mode: 0644]
src/tools/rbd_recover_tool/config/mds_host [new file with mode: 0644]
src/tools/rbd_recover_tool/config/mon_host [new file with mode: 0644]
src/tools/rbd_recover_tool/config/osd_host_path [new file with mode: 0644]
src/tools/rbd_recover_tool/database_h [new file with mode: 0644]
src/tools/rbd_recover_tool/epoch_h [new file with mode: 0644]
src/tools/rbd_recover_tool/metadata_h [new file with mode: 0644]
src/tools/rbd_recover_tool/osd_job [new file with mode: 0755]
src/tools/rbd_recover_tool/rbd-recover-tool [new file with mode: 0755]
src/tools/rbd_recover_tool/test_rbd_recover_tool.sh [new file with mode: 0755]

diff --git a/src/rbd_recover_tool/FAQ b/src/rbd_recover_tool/FAQ
deleted file mode 100644 (file)
index b94b37e..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# author: min chen(minchen@ubuntukylin.com) 2014 2015
-
-1. error "get_image_metadata_v2: no meta_header_seq input"
-cause: 
-  database is old, refresh database
-solution:
-  ./admin_job database
-
-2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable
-   ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted
-cause: 
-  when ./admin_job database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED
-  if run ./admin_job database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command
-  are failed.
-solution:
-  wait until all previous command finished.
diff --git a/src/rbd_recover_tool/README b/src/rbd_recover_tool/README
deleted file mode 100644 (file)
index 2e45ad2..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-# author: min chen(minchen@ubuntukylin.com) 2014 2015
-
-------------- ceph rbd recovery tool -------------
-
-  ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed.
-it is based on ceph-0.80.x (Firefly and newer)
-  currently, ceph service(ceph-mon, ceph-osd) evently are not avaiable caused by bugs or sth else
-, especially on large scale ceph cluster, so that the ceph cluster can not supply service 
-and rbd images can not be accessed. In this case, a tool to recover rbd image is nessecary.
-  ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed
-osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure
-object data is complete, this tool does flush osd journal on each osd node before recovering.
-  but, there are some limitions:
--need ssh service and unobstructed network 
--osd data must be accessed on local disk
--clone image is not supported, while snapshot is supported
--only support relicated pool
-
-before you run this tool, you should make sure that:
-1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown
-2). ssh deamon is running & network is ok (ssh to each node without password)
-3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test)
-4). osd disk is not crashed and data can be accessed on local filesystem
-
--architecture:
-
-                      +---- osd.0
-                      |
-admin_node -----------+---- osd.1
-                      |
-                      +---- osd.2
-                     |
-                      ......
-
--files:
-admin_node: {rbd-recover-tool  common_h  epoch_h  metadata_h  database_h}
-osd:        {osd_job           common_h  epoch_h  metadata_h} #/var/rbd_tool/osd_job
-in this architecture, admin_node acts as client, osds act as server.
-so, they run diffrent files: 
-on admin_node run:  rbd-recover-tool <action> [<parameters>]
-on osd node run:    ./osd_job <funtion> <parameters>
-admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node
-
-
--config file
-before you run this tool, make sure write config files first
-osd_host_path: osd hostnames and osd data path #user input
-  osdhost0     /var/lib/ceph/osd/ceph-0
-  osdhost1     /var/lib/ceph/osd/ceph-1
-  ......
-mon_host: all mon node hostname #user input
-  monhost0
-  monhost1
-  ......
-mds_host: all mds node hostname #user input
-  mdshost0
-  mdshost1
-  ......
-then, init_env_admin function will create file: osd_host
-osd_host: all osd node hostname #generated by admin_job, user ignore it
-  osdhost0
-  osdhost1
-  ......
-
-
--usage:
-rbd-recovert-tool <operation>
-<operation> :
-database               #generating offline database: hobject path, node hostname, pg_epoch and image metadata
-list                   #list all images from offline database
-lookup <pool_id>/<image_name>[@[<snap_name>]]  #lookup image metadata in offline database
-recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image]   #recover image data according to image metadata
-
--steps:
-1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds
-2. setup config files: osd_host_path, mon_host, mds_host
-3. rbd-recover-tool database   # wait a long time 
-4. rbd-recover-tool list
-4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image]
-
-
--debug & error check
-if admin_node operation is failed, you can check it on osd node
-cd /var/rbd_tool/osd_job
-./osd_job <operation>
-<opeartion> :
-do_image_id <image_id_hobject>         #get image id of image format v2 
-do_image_id <image_header_hobject>     #get image id of image format v1
-do_image_metadata_v1 <image_header_hobject>    #get image metadata of image format v1, maybe pg epoch is not latest
-do_image_metadata_v2 <image_header_hobject>    #get image metadata of image format v2, maybe pg epoch is not latest
-do_image_list                          #get all images on this osd(image head hobject)
-do_pg_epoch                            #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch
-do_omap_list                           #list all omap headers and omap entries on this osd
-
-
--FAQ
-file FAQ lists some common confusing cases while testing
diff --git a/src/rbd_recover_tool/TODO b/src/rbd_recover_tool/TODO
deleted file mode 100644 (file)
index c36d4c9..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-
-1.support clone imag
diff --git a/src/rbd_recover_tool/common_h b/src/rbd_recover_tool/common_h
deleted file mode 100644 (file)
index 4a0d3fc..0000000
+++ /dev/null
@@ -1,412 +0,0 @@
-#!/bin/bash
-# file: common_h
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-my_dir=$(dirname "$0")
-
-# admin node init path
-rbd_image=/var/rbd_tool/rbd_image
-database=$rbd_image/database
-image_coll_v1=$rbd_image/image_coll_v1
-image_coll_v2=$rbd_image/image_coll_v2
-pg_coll=$rbd_image/pg_coll
-images=$rbd_image/images
-images_meta=$rbd_image/images_meta
-default_backup_dir=/var/rbd_tool/default_backup_dir
-
-# admin node: image snap & nosnap
-nosnap= #$rbd_image/<image_name>/nosnap
-snap= #rbd_image/<image_name>/<snap_name>
-
-# osd node init path
-job_path=/var/rbd_tool/osd_job
-single_node=/var/rbd_tool/single_node
-
-# osd node vars
-osd_env= #single_node/$cluster$id/osd_env
-osd_data= #/var/lib/ceph/osd/$cluster-$id
-omap_path= #$osd_data/current/omap
-image_list_v1= #single_node/$cluster-$id/image_list_v1
-image_list_v2= #single_node/$cluster-$id/image_list_v2
-image_v1= #$single_node/$cluster-$id/image_v1
-image_v2= #$single_node/$cluster-$id/image_v2
-pgid_list= #$single_node/$cluster-$id/pgid_list
-node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch
-omap_list= #$single_node/$cluster-$id/omap_list 
-
-# admin node config file
-osd_host_path=$my_dir/config/osd_host_path
-osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin()
-osd_host=$my_dir/config/osd_host #generated by function init_env_admin()
-mon_host=$my_dir/config/mon_host
-mds_host=$my_dir/config/mds_host
-
-# ssh option
-ssh_option="-o ConnectTimeout=1"
-
-# gen md5sum
-function gen_md5()
-{
-  echo $1|md5sum|awk '{print $1}'
-}
-
-# on each osd node
-# check ceph enviroment: ssh, ceph-kvstore-tool, osd_data_path 
-function check_ceph_env()
-{
-  local func="check_ceph_env"
-  if [ $# -lt 2 ];then
-    echo "$func: parameters: <node> <data_path>" 
-    exit
-  fi
-  local node=$1
-  local data_path=$2
-  local res=
-  local cmd=
-
-  trap 'echo [$node]: ssh failed; exit' INT HUP
-  ssh -o ConnectTimeout=1 $node "echo -n" </dev/null
-  res=$?
-  if [ $res -ne 0 ];then
-    echo "[$node]: ssh failed"
-    exit
-  fi
-
-  cmd=ceph-kvstore-tool
-  trap 'echo [$node]: $cmd failed; exit' INT HUP
-  ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null 
-  res=$?
-  # ceph-kvstore-tool will return 1 with no parameters input
-  if [ $res -ne 1 ];then
-    echo "[$node]: $cmd not installed"
-    exit
-  fi
-
-  trap 'echo [$node]: stat $data_path failed; exit' INT HUP
-  ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;"  </dev/null
-  res=$?
-  if [ $res -ne 0 ];then
-    echo "[$node]: $data_path not exists"
-    exit
-  fi
-}
-
-# osd node context : osd_data_path
-function init_env_osd()
-{
-  local func="init_env_osd"
-  if [ "$1"x = ""x ];then
-    echo "$func: no osd_data_path input" 
-    exit
-  fi
-  osd_data=$1
-  omap_path=$osd_data/current/omap
-
-  if [ ! -e $single_node ];then
-    mkdir -p $single_node
-  fi
-
-  local osd_id=`gen_md5 $osd_data`
-  local osd_dir=$single_node/$osd_id
-
-  if [ ! -e $osd_dir ];then
-    mkdir -p $osd_dir
-  fi
-  image_list_v1=$osd_dir/image_list_v1
-  image_list_v2=$osd_dir/image_list_v2
-  image_v1=$osd_dir/image_v1
-  image_v2=$osd_dir/image_v2
-  pgid_list=$osd_dir/pgid_list
-  node_pg_epoch=$osd_dir/node_pg_epoch
-  omap_list=$osd_dir/omap_list
-}
-
-# admin node process file: osd_host_path
-function init_env_admin()
-{
-  local func="init_env_admin" 
-  local pwd_path=`pwd`
-  osd_host_mapping=$pwd_path/config/osd_host_mapping
-  if [ ! -s $osd_host_path ];then
-    echo "$func: config/osd_host_path not exists or empty"
-    exit
-  fi
-  if [ ! -e $rbd_image ];then
-    mkdir -p $rbd_image
-  fi
-  if [ ! -e $images ];then
-    mkdir -p $images
-  fi
-
-  if [ ! -s $mon_host ];then
-    echo "$func: config/mon_host not exists or empty"
-    exit
-  fi
-  if [ ! -e $mds_host ];then
-    echo "$func: config/mds_host not exists"
-    exit
-  fi
-
-  # we just judge if osd_host is needed to be updated
-  if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then
-    return  
-  fi
-  echo "$func: create osd_host ..."
-  # create file: osd_host and osd_host_mapping
-  >$osd_host
-  >$osd_host_mapping
-  local lines=0
-  local lineno=0
-  while read line
-  do
-    lineno=$(($lineno + 1))
-    if [ "$line"x = ""x ];then
-      continue;
-    fi
-    local node=`echo $line|awk '{print $1}'`
-    if [ "$node"x = ""x ];then
-      echo "$func: osd_host_path : line $lineno: osd hostname not input"
-      rm -rf $osd_host $osd_host_mapping
-      exit
-    fi
-    local data_path=`echo $line|awk '{print $2}'`
-    if [ "$data_path"x = ""x ];then
-      echo "$func: osd_host_path : line $lineno: osd data_path not input"
-      rm -rf $osd_host $osd_host_mapping
-      exit
-    fi
-    lines=$(($lines + 1))
-    # in case : there are servral hostnames on the same node
-    # just need output of `hostname`
-    local hostname_alias=
-    hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null`
-    if [ "$hostname_alias"x = ""x ];then
-      echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed"
-      rm -rf $osd_host $osd_host_mapping
-      exit
-    fi
-    echo "$node $hostname_alias" >>$osd_host_mapping
-    echo $node >> $osd_host
-    # check ceph env on remote osd
-    check_ceph_env $node $data_path
-  done < $osd_host_path
-
-  if [ $lines = 0 ];then
-    echo "$func: no osd host path valid"
-    exit
-  fi
-}
-
-function admin_parse_osd()
-{
-  local func="admin_parse_osd"
-  if [ -s $osd_host ];then
-    return  
-  fi
-  # create file: osd_host
-  >$osd_host
-  local lines=0
-  local lineno=0
-  while read line
-  do
-    lineno=$(($lineno + 1))
-    if [ "$line"x = ""x ];then
-      continue;
-    fi
-    local node=`echo $line|awk '{print $1}'`
-    if [ "$node"x = ""x ];then
-      echo "$func: osd_host_path : line $lineno: osd_host not input"
-      exit
-    fi
-    local data_path=`echo $line|awk '{print $2}'`
-    if [ "$data_path"x = ""x ];then
-      echo "$func: osd_host_path : line $lineno: osd_data not input"
-      exit
-    fi
-    lines=$(($lines + 1))
-    echo $node >> $osd_host
-  done < $osd_host_path
-}
-
-# for osd node
-function get_omap_list()
-{
-  ceph-kvstore-tool $omap_path list > $omap_list
-}
-
-function convert_underline()
-{
-  if [ "$1"x = ""x ];then
-    return
-  fi
-
-  echo $1|sed -e 's/_/\\u/gp'|head -n 1
-}
-
-function dump_backslash()
-{
-  echo $*|sed -e 's/\\/\\\\/gp'|head -n 1
-}
-
-function dump_dump_backslash()
-{
-  echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1
-}
-
-function char_convert()
-{
-  if [ "$1"x = ""x ];then
-    return
-  fi
-
-  echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1
-}
-
-function check_osd_process()
-{
-  local func="check_osd_process"
-  local host=$1
-  if [ "$1"x = ""x ];then
-    exit
-  fi
-  local cmds="ps aux|grep ceph-osd|grep -v grep"
-  local ret=/tmp/ret.$$$$
-  ssh $ssh_option $host $cmds |tee $ret
-  if [ -s $ret ];then
-    echo "$func: [$host] ceph-osd process is not killed"
-    exit
-  fi
-  rm -f $ret 
-}
-
-function get_map_header_prefix()
-{
-  echo "_HOBJTOSEQ_"
-}
-
-function get_map_header_key()
-{
-  local func="get_map_header_key"
-  if [ "$1"x = ""x ];then
-    #echo $func': no keyword input'
-    exit 
-  fi 
-  local keyword=$1
-  local res=`cat $omap_list| grep $keyword`
-  if [ "$res"x = ""x ];then
-    #echo "$func: map_header_key = $keyword not exisits"
-    exit
-  fi
-  echo $res|awk -F ":" '{print $2}'
-}
-
-function get_header_seq() 
-{
-  local func="get_header_seq"
-  if [ "$1"x == ""x ];then
-    #echo "$func: no prefix input"
-    exit;
-  elif [ "$2"x == ""x ];then
-    #echo "$func: no key input"
-    exit;
-  fi
-  local prefix=$1;
-  local key=$2;
-  local res=/tmp/header_seq.$$$$
-
-  ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
-  if [ $? != 0 ]; then
-    #echo "$func: <$prefix , $key> not exists" ;
-    exit;
-  fi
-
-  # ceph-kvstore-tool get result like this:
-  # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00
-  # get header seq bytes: 
-  # 12 44 00 00 00 00 00 00 
-  # -> 00 00 00 00 00 00 44 12 
-  # echo $((16#0000000000004412)) -> 17426 == header_seq
-  local seq=`cat $res |head -n 2|tail -n 1| \
-  awk '
-  BEGIN {
-    FS=":"
-    seq="";
-    i=7;
-  } {
-    split($2, arr, " ")  
-    # header_seq uint64 : 8 bytes
-    for (x=7; x>=0; --x) {
-      seq=seq""arr[i+x];
-   }
-  }
-  END {
-   print seq
-  }'`
-  if [ "$seq"x = ""x ];then
-    #echo "$func: get <$prefix , $key> failed"
-    exit;
-  fi
-  rm -f $res
-  echo $((16#$seq))
-}
-
-# get header info key/value
-function get_header_kv()
-{
-  local func="get_header_kv"
-  if [ "$1"x = ""x ];then
-    #echo "$func: no prefix input"
-    exit
-  elif [ "$2"x = ""x ];then
-    #echo "$func: no key input"
-    exit
-  elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then
-    #echo "$func: no valid type input, use type (string|int)"
-    exit
-  fi
-
-  local prefix=$1
-  local key=$2
-  local types=$3
-  local res=/tmp/kv.$$$$
-
-  ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
-  if [ $? != 0 ];then
-    #echo "$func: <$prefix , $key> not exists" 
-    exit
-  fi
-
-  if [ "$types"x = "string"x ];then
-    local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'`
-    echo $value
-  elif [ "$types"x = "int"x ];then
-    local value=`cat $res |tail -n +2|head -n -1| \
-      awk '
-        BEGIN{
-          FS=":"
-        } {
-          split($2, arr, " ");
-          len=length(arr)
-          for (i=len; i>0; --i) { 
-                printf arr[i];
-          }
-        }'`
-    echo $((16#$value))
-  fi
-  rm -f $res
-}
diff --git a/src/rbd_recover_tool/config/mds_host b/src/rbd_recover_tool/config/mds_host
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/rbd_recover_tool/config/mon_host b/src/rbd_recover_tool/config/mon_host
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/rbd_recover_tool/config/osd_host_path b/src/rbd_recover_tool/config/osd_host_path
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/rbd_recover_tool/database_h b/src/rbd_recover_tool/database_h
deleted file mode 100644 (file)
index 75d0aa9..0000000
+++ /dev/null
@@ -1,1134 +0,0 @@
-#!/bin/bash
-# file: database_h
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-my_dir=$(dirname "$0")
-
-. $my_dir/common_h
-. $my_dir/metadata_h
-. $my_dir/epoch_h
-
-db_image_prefix=
-db_image_size=
-db_order=
-db_snap_id=
-db_snap_image_size=
-found=0
-
-#init osd_data and get all objects path
-function gen_database()
-{
-  local func="gen_database"
-  rm -rf $database/*
-  rm -rf $images
-  rm -rf $raw
-  mkdir -p $database
-  local host=
-  local data_path=
-
-  trap 'echo $func failed; exit;' INT HUP
-  while read line
-  do
-    {
-      host=`echo $line|awk '{print $1}'`
-      data_path=`echo $line|awk '{print $2}'`
-      if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then
-       continue
-      fi
-      local cmds="find $data_path/current -type f"
-      ssh $ssh_option $host $cmds > $database/$host
-    } &
-  done < $osd_host_path
-  wait
-  echo "$func: finish"
-}
-
-# collect hobjects from database 
-# and choose the object whose epoch is latest
-# then, sort the objects by their offsets in image 
-function gather_hobject_common()
-{
-  func="gather_hobject_common"
-
-  trap 'echo $func failed; exit;' INT HUP
-  if [ $# -lt 2 ];then
-    echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]"  
-    exit
-  fi
-
-  local pool_id=$1
-  local image_prefix=$2
-  pool_id=$(($pool_id))
-  local hex_pool_id=`printf "%x" $pool_id`
-  # NOSNAP = uint64(-2)
-  local snap_id=`printf "%u" -2`
-  local hex_snap_id="head"
-  local psuffix=
-  local fsuffix="_head"
-  if [ $# = 3 ];then
-    snap_id=$(($3))
-    hex_snap_id=`printf "%x" $snap_id`
-    psuffix="_"$snap_id
-    fsuffix="_"$snap_id
-  fi
-  local underline_image_prefix=`convert_underline $image_prefix`
-  local dump_image_prefix=`dump_backslash $underline_image_prefix`
-  local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix`
-  local images_raw_dir=$rbd_image/raw
-  local image_hobjects_dir=$images/pool_$pool_id/$image_prefix
-  # $images/raw/$image_prefix"_head"
-  local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix"
-  # $images/$image_prefix/$image_prefix"_head"
-  local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix"
-
-  if [ ! -e $images_raw_dir ];then
-    mkdir -p $images_raw_dir
-  fi
-  if [ ! -e $image_hobjects_dir ];then
-  local image_metadata=$images_meta/$image_name_in
-    mkdir -p $image_hobjects_dir
-  fi
-
-  pushd $database >/dev/null
-  local  pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id
-  >$image_hobjects_raw
-  grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw
-  if [ ! -s $image_hobjects_raw ];then
-    echo "$func: image snap [ $image_prefix"$psuffix" ] is empty"
-    return 1 #no data available
-  fi
-  popd >/dev/null
-
-  local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp"
-  rm -rf $offset_dir_temp
-  mkdir -p $offset_dir_temp
-
-  echo "gather hobjects from database: snapid=$snap_id ..."
-
-  # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
-  local tmp_image=$offset_dir_temp/tmpimage.$$$$
-  >$tmp_image
-  cat $image_hobjects_raw | 
-  awk -F ':' '
-  BEGIN {
-    pg_coll="'$pg_coll'"
-    tmp_image="'$tmp_image'"
-    osd_host_mapping="'$osd_host_mapping'"
-    snapid="'$snap_id'"
-  }{ 
-      # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
-
-      split($2, arr1, "/current/");   # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2}
-      split(arr1[2], arr2, "/");     # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} 
-      split(arr2[1], arr3, "_head"); # {2.d3,} 
-
-      hobject=$2;
-      data_path=arr1[1];
-      gsub(/\\u/, "\\\\\\\\\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\)
-      "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node
-      pgid = arr3[1];
-
-      len=length(arr2);
-      offset_hobject=arr2[len]             # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
-      split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2}
-      len1=length(offarr1) 
-      offset_p=offarr1[len1]               # 000000000002__head_FB425CD3__2
-      split(offset_p, offarr2, "__");      # {000000000002, head_FB425CD3, 2}
-      offset=offarr2[1];                   # 000000000002
-
-      system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image);
-      #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \"");
-      #print node" "pgid" "hobject" "offset" "snapid
-      
-      # find pg_epoch from pg_coll database
-      system("awk  \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image);
-      #system("awk  \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll);
-  }'
-
-  local sort_image=$offset_dir_temp/sortimage.$$$$
-  >$sort_image
-  sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image
-  sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable
-  
-  #rm -rf $offset_dir_temp
-  return 0
-}
-
-function gather_hobject_nosnap()
-{
-  gather_hobject_common $1 $2
-}
-
-function gather_hobject_snap()
-{
-  gather_hobject_common $1 $2 $3
-}
-
-# select the max pg_epoch item of the same $field
-# if no same $field, choose the first 
-# format : "node $field pg_epoch"
-function choose_epoch()
-{
-  cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1;
-}
-
-# lookup image info , after scatter_node_jobs & gather_node_infos
-function lookup_image()
-{
-  local func="lookup_image"
-  if [ $# -lt 2 ];then
-    echo "$func: parameters error <pool_id> <image_name> [<snap_name>]"
-  fi
-  local pool_id=$1
-  local image_name=$2
-  local snap_name=$3
-  pool_id=$((pool_id))
-  echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name"
-  if [ $pool_id -lt 0 ];then
-    echo "$func: pool_id must great than zero"
-    exit
-  fi
-  local hex_pool_id=`printf "%x" $pool_id`
-  input_image $image_name
-  local node=
-  local item=/tmp/item.$$$$
-  local img_name=`dump_backslash $image_name`
-
-  local image_format=0
-  local image_id_hobject=
-  local image_header_hobject=
-  local result=/tmp/tmp_result.$$$$
-  local res1=/tmp/tmp_res1.$$$$
-  local res2=/tmp/tmp_res2.$$$$
-  local data_path=
-
-  # image format v1
-  {
-    cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1
-    if [ -s $res1 ];then
-      echo -n "$func: rbd_header_hobject = "
-      choose_epoch $res1| tee $item
-      #choose_epoch $res1 > $item
-      
-      if [ -e $item ];then
-        node=`cat $item|awk '{print $1}'`
-        image_header_hobject=`cat $item|awk '{print $2}'`
-        if [ "$node"x = ""x ];then
-         echo "$func: v1 node is NULL"
-         exit
-       fi
-       if [ "$image_header_hobject"x = ""x ];then
-         echo "$func: v1 image_header_hobject is NULL"
-         exit
-       fi
-        rm -f $item
-      fi
-
-      image_format=1
-      echo -e "image_name:\t$image_name_in"
-      echo -e "image_format:\t$image_format"
-      data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
-      >$result
-      cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name" 
-      ssh $ssh_option $node $cmds | tee $result
-    fi
-  }
-
-  # image format v2
-  {
-    cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2
-    if [ -s $res2 ];then
-      echo -n "$func: rbd_id_hobject = "
-      choose_epoch $res2 | tee $item
-      #choose_epoch $res2 > $item
-
-      if [ -e $item ];then
-        node=`cat $item|awk '{print $1}'`
-        image_id_hobject=`cat $item|awk '{print $2}'`
-        if [ "$node"x = ""x ];then
-         echo "$func: v2 node is NULL(to get image_id_hobject)"
-         exit
-       fi
-       if [ "$image_id_hobject"x = ""x ];then
-         echo "$func: v2 image_id_hobject is NULL"
-         exit
-       fi
-        rm -f $item
-      fi
-
-      check_osd_process $node
-      image_format=2
-      
-      local tid=/tmp/image_id.$$$$
-      data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'`
-      >$tid
-      cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`" 
-      ssh $ssh_option $node $cmds > $tid
-
-      local image_id=`cat $tid`
-      rm -f $tid
-
-      #get image_header_hobject
-      pushd $database >/dev/null
-      local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id"
-      local tcoll=/tmp/tmp_image_head_coll.$$$$
-
-      # hostname(by command hostname) in $pg_coll  maybe different from hostname in tcoll(input by user) 
-      # t_host:        hostname read from config file ($tcoll)
-      # t_host_remote: $(hostname) on osd node ($pg_coll)
-      grep -r -E $pattern * >$tcoll
-      popd >/dev/null
-
-      local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`)
-      local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`)
-      local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`)
-      local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`)
-      rm -f $tcoll
-      declare -a t_host_remote
-
-      #if there is no failed pg migration, number of t_host is replica num
-      #replica num : 3, 4, 5 ...
-      local t_hostname=/tmp/t_hostname.$$$$
-      for ((i=0; i<${#t_host[*]}; i++))
-      do
-       ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname
-       if [ $? != 0 ];then
-         echo "$func: ${t_host[$i]} get host_remote failed"
-         exit
-       fi
-        t_host_remote[$i]=`cat $t_hostname`    
-      done
-      rm -f $t_hostname
-
-      local t_item=/tmp/tmp_item.$$$$
-      local tmp_item=/tmp/tmp_tmp_item.$$$$
-      
-      >$tmp_item
-      for ((i=0; i<${#t_host_remote[*]}; i++ ))
-      do
-       local node=${t_host_remote[$i]}
-       local pgid=${t_pgid[$i]}
-       awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item
-      done
-
-      # t_item: <remote_hostname> <pgid> <epoch> <data_path>
-      sort -u $tmp_item >$t_item
-      rm -f $tmp_item
-
-      local entry=`choose_epoch $t_item` #t_host_remote
-      rm -f $t_item
-
-      node=`echo $entry|awk '{print $1}'`
-      data_path=`echo $entry|awk '{print $4}'`
-      if [ "$node"x = ""x ];then
-        echo "$func: v2 node is NULL (to get image_header_hobject)"
-        exit
-      fi
-
-      for ((i=0; i<${#t_host_remote[*]}; i++))
-      do
-       if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then
-         image_header_hobject=${t_hobject[$i]}
-         break
-       fi
-      done
-      
-      if [ "$image_id_hobject"x = ""x ];then
-        echo "$func: v2 image_header_hobject is NULL"
-        exit
-      fi
-
-      check_osd_process $node
-     
-      echo "$func: rbd_header_hobject = $node $image_header_hobject"
-      echo -e "image_name:\t$image_name_in"
-      echo -e "image_format:\t$image_format"
-
-      #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
-      >$result
-      cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name" 
-      ssh $ssh_option $node $cmds | tee $result
-    fi
-  }
-
-  if [ ! -s $result ];then
-    echo "$func: $image_name_in not exists" 
-    exit
-  fi
-  
-  # to assign value to global variable
-  db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'`
-  if [ "$db_image_prefix"x = ""x ];then
-    echo "$func: image_prefix is NULL"
-    exit
-  fi
-
-  db_image_size=`cat $result|awk '/^image_size:/{print $2}'`
-  db_order=`cat $result|awk '/^order:/{print $2}'`
-  if [ "$snap_name"x != ""x ];then
-    db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'`
-    if [ "$db_snap_id"x = ""x ];then
-      echo "$func: $image_name_in@$snap_name NOT EXISTS"
-      exit
-    fi
-    db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'`
-  else
-    #save snaplist
-    local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist
-    local image_dir=$images/pool_$pool_id/$image_name_in
-    if [ ! -e $image_dir ];then
-      mkdir -p $image_dir
-    fi
-    cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist
-  fi
-  found=1
-  rm -f $result
-}
-
-function list_images()
-{
-   echo "=============== format =============="
-   echo "format: <pool_id>/<image_name>"
-   echo "================ v1: ================"
-   #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g'
-   sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g'
-   echo "================ v2: ================"
-   #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g'
-   sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g'
-}
-
-# lookup image metadata
-# and 
-# collect hobjects of image with the latest pg epoch
-function discover_image_nosnap()
-{
-  local func="discover_image_nosnap"
-  echo "$func ..."
-  local pool_id=$1
-  local image_name=$2
-  pool_id=$(($pool_id))
-  lookup_image $pool_id $image_name # assign $image_prefix
-  gather_hobject_nosnap $pool_id $db_image_prefix 
-  if [ $? -ne 0 ];then
-    exit
-  fi
-  local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head"
-  local image_hobjects_dir=$images/pool_$pool_id/$image_name_in
-  if [ ! -e $image_hobjects_dir ];then
-    mkdir -p $image_hobjects_dir
-  fi
-  # mv image_prefix to image_name
-  mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in
-  rm -rf $images/pool_$pool_id/$db_image_prefix
-}
-
-# get the offset snapid object 
-# if there is no object, choose the smallest snapid which is great than current snapid
-function get_object_clone()
-{
-  local func="get_object_clone"
-  if [ $# -lt 4 ];then
-    exit
-  fi
-
-  local object_offset_string=$1
-  local snapid=$2
-  local snaplist_path=$3
-  local snapset_output_dir=$4
-
-  # snapid in desc
-  local snap_coll_arr=(`
-  cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`) 
-
-  local hex_snapid=`printf "%x" $snapid`
-  pushd $snapset_output_dir >/dev/null
-  # get object with the smallest snapid great than current specify snapid
-  awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1
-  popd >/dev/null
-}
-
-# gather hobject for each snapid
-function gen_snapset_hobject()
-{
-  local func="gen_image_snapset"
-  echo "$func ..."
-  if [ $# -lt 4 ];then
-    echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>"
-    exit
-  fi
-  local pool_id=$1
-  local image_prefix=$2
-  local snaplist_path=$3
-  local snapset_output_dir=$4
-  pool_id=$(($pool_id))
-  OIFS=$IFS
-  IFS=$'\n'
-  local snaparr=(`cat $snaplist_path`) 
-  # gather hobject for each snapshot
-  trap 'echo $func failed; exit;' INT HUP
-  for line in ${snaparr[@]}
-  do
-    OOIFS=$IFS
-    IFS=$' '
-    local field=(`echo $line`)
-    local snapid=${field[0]}
-    local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid
-    local image_snap=$snapset_output_dir/@$snapid
-    gather_hobject_snap $pool_id $image_prefix $snapid 
-    local res=$?
-    if [ $res -ne 0 ];then
-      touch $image_snap
-    else 
-      mv $image_hobjects_stable_snap $image_snap
-    fi
-    IFS=$OOIFS
-  done
-  IFS=$OIFS
-}
-
-# lookup image metadata and get snapid hobjects
-function discover_image_snap()
-{
-  local func="discover_image_snap"
-  echo "$func ..."
-  if [ $# -lt 3 ];then
-    echo "$func: paramters: <pool_id> <image_name> [<snap_name>]"
-    exit
-  fi
-  local pool_id=$1
-  local image_name=$2
-  local snap_name=$3
-  pool_id=$(($pool_id))
-  #mkdir -p $images/$image_prefix
-  lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id
-  if [ "$db_snap_id"x = ""x ];then
-    echo "$func: lookup image failed to gen snapid"
-    exit
-  fi
-  local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix
-  local image_nosnap=$images/pool_$pool_id/$image_name_in 
-  #check if image nosnap recovered
-  if [ ! -s $image_nosnap ];then
-    echo "$func: please recover image nosnap before recover with snap"
-    rm -rf $image_hobjects_dir_prefix
-    exit
-  fi
-  local image_hobject_dir=$images/pool_$pool_id/$image_name_in
-  local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id
-  local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head
-  local image_snaplist=$image_hobject_dir/@snaplist
-  local image_snapset_dir=$image_hobject_dir/@snapset_dir
-  local image_head=$image_hobject_dir/$image_name_in
-  if [ ! -e $image_hobject_dir ];then
-    mkdir -p $image_hobject_dir
-  fi
-  # only gen snapset one time
-  if [ ! -e $image_snapset_dir ];then
-    mkdir -p $image_snapset_dir
-    gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir  
-   
-  fi
-
-  echo "$func: will get object clone ..."
-  >$image_snap_hobject
-  >$image_snap_hobject_head
-
-  trap 'echo $func failed; exit;' INT HUP
-  # get each offset 's snapid hobject
-  while read line
-  do
-    #echo $line
-    OOIFS=$IFS
-    IFS=$' '
-    local field=(`echo $line`)
-    local offset_string=${field[3]}
-    IFS=$OOIFS
-    local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir` 
-    if [ "$entry"x != ""x ];then
-      echo $entry >> $image_snap_hobject
-      echo `dump_backslash $line` >> $image_snap_hobject_head
-    fi
-  done < $image_head
-  rm -rf $image_hobjects_dir_prefix
-}
-
-# after discover_image_nosnap
-# collect objects from osds one by one in sequence
-function copy_image_nosnap_single_thread()
-{
-  local func="copy_image_nosnap_single_thread"
-  echo "$func ..."
-  if [ $# -lt 3 ];then
-    echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>"
-    exit
-  fi
-  local pool_id=$1
-  local image_hobjects=$2
-  local backup_dir=$3
-  pool_id=$(($pool_id))
-
-  # make sure lookup_image first
-  if [ $found = 0 ];then
-    echo "$func: image not found, maybe forget to discover_image"
-    exit
-  fi
-  if [ ! -e $backup_dir ];then
-    mkdir -p $backup_dir
-  fi
-
-  local image_dir=$backup_dir/pool_$pool_id/$image_name_in
-  local image_file=$image_dir/$image_name_in
-  local CURRENT=$image_dir/@CURRENT
-  local LOCK=$image_dir/@LOCK
-  if [ ! -e $image_dir ];then
-    mkdir -p $image_dir
-  fi
-  if [ -e $LOCK ];then
-    echo "$func: $LOCK is locked by other process"
-    exit
-  else
-    touch $LOCK
-  fi
-
-  >$image_file
-  truncate -s $db_image_size $image_file 
-  echo "head">$CURRENT
-
-  local count=$(($db_image_size >> $db_order))
-  local start=`cat $image_hobjects|head -n 1|awk '{print $4}'`
-  local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'`
-  local entry_count=`cat $image_hobjects|wc -l`
-
-  local char_bits=$((`echo $start|wc -c` -1 ))
-  local format="%0"$char_bits"x"
-  
-  local expect_start=`printf $format 0`
-  local expect_end=`printf $format $(($count -1 ))`  
-
-  echo -e "object_count\t$entry_count"
-  echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count"
-  echo -e "range\t\t[$start ~ $end] count:$entry_count"
-
-  local icount=0
-  local istart=
-  local iend=
-  local percent=
-  
-  trap 'echo $func failed; exit;' INT HUP
-  local unit=$((1<<$db_order))
-  while read line
-  do
-  {
-    icount=$(($icount+1))
-    node=`echo $line|awk '{print $1}'` 
-    hobject=`echo $line|awk '{print $3}'` 
-    offset=`echo $line|awk '{print $4}'`
-    off=$((16#$offset))
-    if [ $icount = 1 ];then
-      istart=$offset
-    fi
-    hobject=`dump_backslash $hobject`
-    iend=$offset
-    sshcmd="cat $hobject"
-    ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null
-    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
-    tput sc  #record current cursor
-    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
-    if [ $icount != $entry_count ];then
-      tput rc # backport most recent cursor
-    fi
-  }
-  done < $image_hobjects
-
-  echo
-  echo -n "size: "
-  ls -lh $image_file|awk '{print  $5"\t"$9}'
-  echo -n "du:   "
-  du -h $image_file
-  #unlock
-  rm -f $LOCK
-}
-
-
-# ssh copy snap_object & head_object from osd to admin node
-# copy all snapshot objects 
-# and 
-# all head objects which has the same offset with snapshot objects 
-function collect_image_snap_objects()
-{
-  local func="collect_image_snap_objects"
-  #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects
-  if [ $# -lt 6 ];then
-    echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>"
-    exit
-  fi  
-
-  local pool_id=$1
-  local image_name=$2
-  local snap_id=$3
-  local snap_hobjects=$4 #snap hobjects info
-  local head_hobjects=$5 #head hobjects info
-  local backup_dir=$6
-  pool_id=$(($pool_id))
-
-  local head_dir=$backup_dir/pool_$pool_id/$image_name/@head
-  local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id
-  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
-  if [ ! -e $head_dir ];then
-    mkdir -p $head_dir
-  fi
-  if [ ! -e $snap_dir ];then
-    mkdir -p $snap_dir
-  fi
-
-  local snap_node= #osd node
-  local snap_hobject= #hobject path with snapid on osd
-  local snap_offset=
-  local snap_filename=
-
-  local head_node=
-  local head_hobject=
-  local head_offset=
-  local head_filename=
-
-  # ignore if there is no object in snapshot(empty )
-  if [ ! -s $snap_hobjects ];then
-    echo "$func: $snap_hobjects is empty"
-    return 0
-  fi
-  local start=`head -n 1 $snap_hobjects|awk '{print $4}'`
-  local end=`tail -n 1 $snap_hobjects|awk '{print $4}'`
-  local entry_count=`cat $snap_hobjects|wc -l`
-  if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then
-    echo "$func: $snap_hobjects not sorted"
-    return 1
-  fi
-
-  # just assert if ignored empty snapshot
-  if [ "$start"x = ""x ] || [ "$end"x = ""x ];then
-    return 1
-  fi
-  # speed up copy snapshot
-  # lookup the coresponding head hobject of snap hobject
-  # use command: grep <offset> <head hobjects>
-  # 
-  # eg.
-  # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614)
-  # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869
-  # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867
-  # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869
-  # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869
-  # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867
-  # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867
-  # ......
-  # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869
-  #
-  # snap hobjects: (3 objects, snapid >= 29)
-  # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867
-  # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869
-  # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869
-  #
-  # so find out offset in head hobjects line number:
-  # snap hobjects: 000000000001 ---> head hobjects: 2 (n1)
-  # snap hobjects: 000000000003 ---> head hobjects: 4 (n2)
-  # 
-  # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2]
-  # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking
-
-  # get the line number of the start offset in head hobjects
-  local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1`
-  # get the line number of the end offset in head hobjects
-  local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1`
-  local icount=0
-  local istart=
-  local iend=
-  local percent=
-
-  OIFS=$IFS
-  IFS=$'\n'
-
-  #assume file:snap_hobjects is not very large, and can be load into memory
-  local snap_arr=(`cat $snap_hobjects`)
-  local snap_tmp=/tmp/snaptmp.$$$$
-
-  # snap_tmp: 
-  # consists of snap hobject or head hobject
-  # select lineno range: [n1 ~ n2]
-  head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp 
-
-  echo "copy image snap/head objects from osd ..."
-  echo -e "object_count\t$entry_count"
-  echo -e "range\t\t[$start ~ $end] count:$entry_count"
-
-  trap 'echo $func failed; exit;' INT HUP
-  for line in ${snap_arr[*]}
-  do
-    icount=$(($icount+1))    
-
-    OOIFS=$IFS
-    IFS=$' '
-
-    local arr=(`echo $line`)
-    snap_node=${arr[0]}
-    snap_hobject=${arr[2]}
-    snap_offset=${arr[3]}
-    snap_filename=$snap_dir/$snap_offset
-
-    if [ $icount = 1 ];then
-      istart=$snap_offset
-    fi
-    iend=$snap_offset
-
-    #lookup corresponding head hobject of snap hobject
-    local res=`grep $snap_offset $snap_tmp|head -n 1` 
-    if [ "$res"x = ""x ];then
-      echo "$func: image object[ $snap_offset ] missing"
-      exit
-    fi
-    
-    local arr2=(`echo $res`)
-    head_node=${arr2[0]}
-    head_hobject=${arr2[2]}
-    head_offset=${arr2[3]}
-    head_filename=$head_dir/$head_offset
-
-    # just copy object(snap/head) if it not exits
-    if [ ! -e $snap_filename ];then
-      ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename 
-    fi
-    if [ ! -e $head_filename ];then
-      ssh $ssh_option $head_node "cat $head_hobject" > $head_filename 
-    fi
-    IFS=$OOIFS
-
-    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
-    tput sc  #record current cursor
-    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
-    if [ $icount != $entry_count ];then
-      tput rc # backport most recent cursor
-    fi
-  done
-  echo
-  IFS=$OIFS 
-  rm -f $snap_tmp
-  return 0
-}
-
-# copy all snap objects and corresponding head objects from osds
-# in single process
-function copy_image_snap_single_thread()
-{
-  local func="copy_image_snap_single_thread"
-  if [ $# -lt 6 ];then
-    echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" 
-    exit
-  fi
-  local pool_id=$1
-  local image_name=$2
-  local snap_id=$3
-  local snap_hobjects=$4
-  local head_hobjects=$5
-  local backup_dir=$6
-  pool_id=$(($pool_id))
-
-  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
-  local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
-  #lock
-  if [ -e $LOCK ];then
-    echo "$func: $LOCK is locked by other process"
-    exit
-  else
-    touch $LOCK
-  fi
-  collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir
-  #unlock
-  rm -f $LOCK
-}
-
-# after all snap objects and neccessary head objects are copied,
-# just pick appropriate head objects and snap objects and write them to image
-# in order to rollback image to snapshot
-#
-# init: image is created by copy_image_nosnap_single_thread firstly
-#
-# all output include 3 parts:
-# <image>  <head objects>      <snap objects>
-# 
-#          head objects1  ---  snap1 objects
-#          head objects2  ---  snap2 objects
-#  image   head objects3  ---  snap3 objects
-#          ......
-#          head objectsN  ---  snapN objects
-#
-# how to rollback:
-# firstly rollback to head, secondly write <snapX objects>
-# head  = <image> + <head objects>
-# snap1 = <image> + <head objects> + <snap1 objects>
-# snap2 = <image> + <head objects> + <snap2 objects>
-# snap3 = <image> + <head objects> + <snap3 objects>
-# ......
-# snapN = <image> + <head objects> + <snapN objects>
-# 
-# improve rollback:
-# there is intersection of head objects and snapX objects, if snapX objects are not empty
-# and need to deduplicate the intersection.
-# dedunplicate steps:
-# - get difference set of head objects and snapX objects
-# - write the difference set objects to image
-# - write the snapX objects to image
-function rollback_image_snap()
-{
-  local func="rollback_image_snap"
-  
-  echo "$func ..."
-  
-  trap 'echo $func failed; exit;' INT HUP
-  if [ $# -lt 6 ];then
-    echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>"
-    exit
-  fi
-  local pool_id=$1
-  local image_name=$2
-  local snap_id=$3
-  local snap_object_dir=$4
-  local backup_dir=$5
-  local image_unit=$6
-
-  local need_diff_set=0
-
-  local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name
-  local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head
-  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
-  local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
-  if [ -e $LOCK ];then
-    echo "$func: $LOCK is locked by other process"
-    exit
-  else
-    touch $LOCK
-  fi
-  if [ $snap_id -ne -2 ];then
-    echo $snap_id > $CURRENT
-  else
-    echo "head" > $CURRENT
-  fi 
-
-  if [ ! -e $snap_object_dir ];then
-    return 0
-  fi
-
-  if [ "$snap_object_dir"x != "$head_object_dir"x ];then
-    echo "$func: need to compute diff_set of head"
-    need_diff_set=1
-  else
-    echo "$func: NO diff_set"
-    need_diff_set=0
-  fi
-
-  local entry_count=0
-  local start=
-  local end=
-  local offset=
-  local icount=0
-  local istart=
-  local iend=
-  local percent=
-
-  local snap_objects=
-  local head_objects=
-  local diff_set=
-
-  snap_objects=(`ls $snap_object_dir`)
-
-  # if need to compute difference set of head_objects and snap_objects
-  if [ $need_diff_set -ne 0 ];then
-    head_objects=(`ls $head_object_dir`) 
-
-    #get the difference set: ( head_objects - snap_objects )
-    diff_set=(`
-    sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \
-       <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`) 
-
-    # copy diff_set of head object to image
-    pushd $head_object_dir >/dev/null
-
-    echo "$func: copy diff_set head objects ..."
-    entry_count=${#diff_set[@]}  
-    start=${diff_set[0]}
-    end=
-    if [ $entry_count -gt 0 ];then
-      end=${diff_set[$(($entry_count - 1))]}
-    fi
-    offset=
-    icount=0
-    istart=
-    iend=
-    percent=
-
-    echo -e "object_count\t$entry_count"
-    echo -e "range\t\t[$start ~ $end] count:$entry_count"
-
-    for object in ${diff_set[@]}
-    do
-      icount=$(($icount+1))
-      if [ $icount = 1 ];then
-        istart=$object
-      fi
-      iend=$object
-
-      local offset=$((16#$object))
-      dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
-
-      percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
-      tput sc  #record current cursor
-      echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
-      if [ $icount != $entry_count ];then
-        tput rc # backport most recent cursor
-      fi
-    done
-    if [ $entry_count -gt 0 ];then
-      echo
-    fi
-    popd >/dev/null
-
-    if [ $snap_id -ne -2 ];then
-      echo -e "$image_name already rollback diff_set: (head - snap)"
-    fi
-  fi
-  
-  # copy snap object to image
-  pushd $snap_object_dir >/dev/null 
-
-  if [ $need_diff_set -ne 0 ];then
-    echo "$func: copy snap objects ..."
-  else
-    echo "$func: copy head objects ..."
-  fi
-  entry_count=${#snap_objects[@]}  
-  start=${snap_objects[0]}
-  end=
-  if [ $entry_count -gt 0 ];then
-    end=${snap_objects[$(($entry_count - 1))]}
-  fi
-  offset=
-  icount=0
-  istart=
-  iend=
-  percent=
-
-  echo -e "object_count\t$entry_count"
-  echo -e "range\t\t[$start ~ $end] count:$entry_count"
-
-  for object in ${snap_objects[@]}
-  do
-    icount=$(($icount+1))
-    if [ $icount = 1 ];then
-      istart=$object
-    fi
-    iend=$object
-
-    local offset=$((16#$object))
-    dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
-
-    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
-    tput sc  #record current cursor
-    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
-    if [ $icount != $entry_count ];then
-      tput rc # backport most recent cursor
-    fi
-  done
-  if [ $entry_count -gt 0 ];then
-    echo
-  fi
-  popd >/dev/null
-
-  rm -f $LOCK
-  if [ $snap_id -ne -2 ];then
-    echo "$image_name rollback to snapid: $snap_id"
-  else
-    echo "$image_name rollback to head"
-  fi
-}
-
-function recover_image()
-{
-  local func="recover_image"
-  echo "$func ..."
-  
-  if [ $# -lt 3 ];then
-    echo "$func: paramters: <pool_id> <image_name> <snap_name> [<backup_dir>]"
-    exit
-  fi
-
-  local pool_id=$1
-  local img_name=$2
-  local snap_name=$3
-  local backup_dir=$4
-  pool_id=$(($pool_id))
-  if [ "$snap_name"x = "@"x ];then
-    snap_name=
-  fi
-  if [ "$backup_dir"x = ""x ];then
-    backup_dir=$default_backup_dir
-  fi
-
-  #recover image with nosnap
-  if [ "$snap_name"x = ""x ];then
-    discover_image_nosnap $pool_id $img_name #input image_name 
-    local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in
-    copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir
-
-  #recover image with snap
-  else
-
-    # check if recovered head already
-    local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name
-    local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name
-    if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then
-      echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first"
-      exit
-    fi
-
-    # rollback to head
-    if [ "$snap_name"x = "@@"x ];then
-      local head_dir=$backup_dir/pool_$pool_id/$img_name/@head
-      if [ -e $head_dir ];then
-       local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'`
-        # rollback to head
-        rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit
-        echo "$image_name_in head : $backup_dir/$img_name/$img_name"
-      else
-       echo "$func: no need to rollback to head"
-      fi
-      return 0
-    fi
-    
-    # rollback to snap
-    discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object
-    local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id
-    local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head
-    local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id
-    local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in
-    local image_unit=$((1<<$db_order))
-    copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir
-    rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit 
-    echo "$image_name_in@$snap_name : $image_path"
-  fi
-}
diff --git a/src/rbd_recover_tool/epoch_h b/src/rbd_recover_tool/epoch_h
deleted file mode 100644 (file)
index 4e50026..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-# file: epoch_h
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-my_dir=$(dirname "$0")
-. $my_dir/common_h
-
-#pgid_list=$single_node/$cluster-$id/pgid_list
-function get_pgid_list()
-{
-  find $osd_data/current/ -type d -name "*_head"|\
-       sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\
-       sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list;
-}
-
-function get_pgid()
-{
-  hobject_path=$1
-  echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p'
-}
-
-infos_seq=
-function get_infos_seq()
-{
-  local func="get_infos_seq"
-  
-  local keyword=":infos." 
-  local infos_key=`get_map_header_key $keyword`
-
-  if [ "$infos_key"x = ""x ];then
-    echo "$func: keyword not input or infos_key not exisits"
-    exit 
-  fi
-  local prefix=`get_map_header_prefix`
-  local key=$infos_key
-
-  infos_seq=`get_header_seq $prefix $key`
-  if [ "$infos_seq"x = ""x ];then
-    echo "$func: infos_seq not exists"
-    exit
-  fi
-}
-
-pg_epoch=
-function get_pg_epoch()
-{
-  local func="get_pg_epoch"
-  if [ "$1"x = ""x ];then
-    echo "$func: no pgid input"
-    exit
-  fi
-  local pgid=$1
-  local key=$pgid"_epoch"
-
-  #get_infos_seq;
-  # infos_seq default to 1
-  infos_seq=1
-  local infos_seq=`printf "%016d" $infos_seq`
-  local prefix="_USER_"$infos_seq"_USER_"
-
-  pg_epoch=`get_header_kv $prefix $key int`
-  if [ "$pg_epoch"x = ""x ];then
-    echo "$func: $key not exisits"
-    exit
-  fi
-}
diff --git a/src/rbd_recover_tool/metadata_h b/src/rbd_recover_tool/metadata_h
deleted file mode 100644 (file)
index 0296962..0000000
+++ /dev/null
@@ -1,368 +0,0 @@
-#!/bin/bash
-# file: metadata_h
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-my_dir=$(dirname "$0")
-. $my_dir/common_h
-. $my_dir/epoch_h
-
-# put origin name in $image_name_in: for output
-# put convert "_" name in $image_name: for grep image hobjects from database
-image_name_in=
-image_name=
-function input_image()
-{
-  local func="input_image"
-  if [ "$1"x = ""x ];then
-    echo "$func: no image name input"
-    exit
-  fi
-
-  image_name_in=$1
-  # "_" -> "\u"
-  image_name=`convert_underline $image_name_in`
-}
-
-#======================================== distinguish v1 or v2 ===================================  
-#image_list_v1=$single_node/$cluster-$id/image_list_v1
-#image_list_v2=$single_node/$cluster-$id/image_list_v2
-function get_image_list()
-{
-  find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1
-  find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2
-}
-
-function get_image_format_by_hobject()
-{
-  local func="get_image_format"
-  if [ "$1"x = ""x ];then
-    exit
-  fi
-  local res1=`cat $image_list_v1|grep $1`  
-  if [ "$res1"x != ""x ];then
-    echo 1
-    exit
-  fi  
-  local res2=`cat $image_list_v2|grep  $1`
-  if [ "$res2"x = ""x ];then
-    echo 2
-    exit
-  fi
-}
-
-#======================================== image format v1 ========================================  
-# <image_name>.rbd inlude 3 parts:
-# header + snap_count*snapshot + snap_count*snap_name
-# 
-# struct rbd_obj_header_ondisk {
-# 40   char text[40];
-# 24   char block_name[RBD_MAX_BLOCK_NAME_SIZE];
-# 4    char signature[4];
-# 8    char version[8];
-#      struct {
-# 1            __u8 order;
-# 1            __u8 crypt_type;
-# 1            __u8 comp_type;
-# 1            __u8 unused;
-#      } __attribute__((packed)) options;
-# 8    __le64 image_size;//hexdump -C s=80 n=8
-# 8    __le64 snap_seq;  //hexdump -C s=88 n=8
-# 4    __le32 snap_count;//hexdump -C s=96 n=4
-# 4    __le32 reserved;
-# 8    __le64 snap_names_len;//hexdump -C s=104 n=8
-#      struct rbd_obj_snap_ondisk snaps[0];
-# } __attribute__((packed));
-#
-# sizeof(rbd_obj_header_ondisk): 112
-#
-# struct rbd_obj_snap_ondisk {
-# 8    __le64 id;    //hexdump -C s=112+i*16 n=8   , i=[0, snap_count)
-# 8    __le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count)
-# } __attribute__((packed));
-# sizeof(rbd_obj_snap_ondisk): 16
-#
-# get snap_names form <image_nane>.rbd
-# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd
-# then split snap_names into array
-
-function get_image_metadata_v1()
-{
-  local func="get_image_metadata_v1"
-  if [ "$1"x = ""x ];then
-    echo "$func: no image head object input"
-    exit
-  fi
-  local snap_name=
-  if [ "$2"x != ""x ];then
-    snap_name=$2 
-  fi
-    
-  if [ ! -e $1 ];then
-    echo "$func: $1 not exists"
-    exit
-  fi
-  local hobject_path=$1  
-  d_hobject_path=`dump_backslash $1`
-  local image_format=`get_image_format_by_hobject $d_hobject_path`
-  if [ $image_format != 1 ];then
-    echo "$func: image_format must be 1"
-    exit
-  fi
-  if [ ! -e $hobject_path ];then
-    echo "$func: $hobject_path not exists"
-    exit
-  fi
-
-  # decode rbd_obj_header_ondisk of <image_name>.rbd
-  local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path`
-  local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path`
-  local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'`
-  image_size=$((16#$image_size))
-  local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1|
-       awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'`
-  local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1|
-       awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` 
-  local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1|
-       awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` 
-
-  echo -e "block_name:\t$block_name"
-  echo -e "order:\t\t$order"
-  echo -e "image_size:\t$image_size"
-  echo -e "snap_seq:\t$snap_seq"
-
-  # decode N rbd_obj_snap_ondisk of <image_name>.rbd
-  declare -a snap_ids
-  declare -a snap_names
-  declare -a snap_image_sizes
-  local size_header=112 #sizeof(rbd_obj_header_ondisk)
-  local size_snap=16 #sizeof(rbd_obj_snap_ondisk)
-  local offset=0
-  local id_off=0
-  local size_off=0
-  for ((i=0; i<$snap_count; i++))
-  do
-    offset=$(($size_header + $i * $size_snap)) 
-    id_off=$offset 
-    size_off=$(($offset + 8))
-    snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1|
-       awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
-    snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1|
-       awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
-  done
-  offset=$(($size_header + $snap_count * $size_snap))
-  snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path|
-       awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`); 
-
-  echo -e "\t\tID\tNAME\t\tSIZE"
-  for ((i=0; i<$snap_count; i++))
-  do
-    if [ "$snap_name"x = ""x ];then
-      echo -n -e "snapshot:\t"
-      echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
-      continue
-    fi
-    if [ "$snap_name"x = "${snap_names[$i]}"x ];then 
-      echo -n -e "snapshot:\t"
-      echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
-      return
-    fi
-  done
-}
-
-#======================================== end image format v1 ========================================  
-
-#======================================== image format v2 ======================================== 
-
-# map_header, header_seq, header, key/value
-# eg. 
-# map_header           _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6
-# meta_header_seq      17426
-# header:              _USER_0000000000017426_USER_:object_prefix
-#                      _USER_0000000000017426_USER_:order
-#                      _USER_0000000000017426_USER_:size
-#                      _USER_0000000000017426_USER_:snap_seq
-# key/value            ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq)
-
-# decode image id from image_id_hobject 
-function get_image_id()
-{
-  local func="get_image_id"
-  if [ "$1"x = ""x ];then
-    exit;
-  fi
-  local image_id_hobject=$1 #from admin node's database
-
-  if [ ! -e $image_id_hobject ];then
-    #echo "$func: $image_id_hobject not exists"
-    exit;
-  fi
-  
-  # get len of string  
-  local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject`
-  # get string
-  hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject
-}
-
-#find image_id omap entry in omaplist
-map_header_prefix=
-map_header_key=
-function get_map_header()
-{
-  local func="get_map_header"
-  local image_id=$1
-  if [ "$image_id"x = ""x ];then
-    echo "$func: no image_id input"
-    exit;
-  fi
-  map_header_prefix=`get_map_header_prefix`
-  local keyword="header%e"$image_id
-  map_header_key=`get_map_header_key $keyword`
-  if [ "$map_header_key"x = ""x ];then
-    echo "$func: map_header_key is NULL(not in omaplist)"
-    exit
-  fi
-}
-
-#get meta header seq from map_header
-meta_header_seq=
-function get_meta_header_seq() 
-{
-  local func="get_meta_header_seq"
-  if [ "$1"x == ""x ];then
-    echo "$func: no prefix input"
-    exit;
-  elif [ "$2"x == ""x ];then
-    echo "$func: no key input"
-    exit;
-  fi
-  local prefix=$1;
-  local key=$2;
-  meta_header_seq=`get_header_seq $prefix $key`
-}
-
-# get image metadata : object_prefix, order, image_size, snap_seq
-object_prefix=
-order=
-image_size=
-snap_seq=
-function get_image_metadata_v2()
-{
-  local func="get_image_metadata_v2"
-  if [ "$1"x = ""x ];then
-    echo "$func: no meta_header_seq input"
-    exit;
-  fi
-  local meta_header_seq=`printf "%016d" $1`
-  #echo "$func: meta_header_seq = "$meta_header_seq
-  local ghobject_key="_USER_"$meta_header_seq"_USER_"
-  local prefix=$ghobject_key
-
-  object_prefix=`get_header_kv $prefix object_prefix string`
-  #object_prefix="rbd_data.$image_id"
-  order=`get_header_kv $prefix order int`
-  image_size=`get_header_kv $prefix size int` 
-  snap_seq=`get_header_kv $prefix snap_seq int`
-
-  echo -e "object_prefix:\t$object_prefix"
-  echo -e "order:\t\t$order"
-  echo -e "image_size:\t$image_size"
-  echo -e "snap_seq:\t$snap_seq"
-
-  # list snapshot
-  list_snaps_v2 $1 $2
-}
-
-# struct cls_rbd_snap {
-#      snapid_t id;
-#      string name;
-#      uint64_t image_size;
-#      uint64_t features;
-#      uint8_t protection_status;
-#      cls_rbd_parent parent;
-# }
-# decode cls_rbd_snap
-# 1    u8      struct_v
-# 1    u8      struct_compat
-# 4    u32     struct_len
-# 8    u64     snapid_t id //s=6 n=8
-# 4    u32     len of name //s=14 n=4
-# len  char    name        //s=18 n=len
-# 8    u64     image_size
-# 8    u64     features
-# ......
-#
-function list_snaps_v2()
-{
-  local func="list_snaps_v2"
-  if [ "$1"x = ""x ];then
-    exit
-  fi
-  local sname=
-  if [ $# -eq 2 ];then
-    sname=$2
-  fi
-  local meta_header_seq=`printf "%016d" $1`
-  local prefix="_USER_"$meta_header_seq"_USER_"
-  local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_"); 
-       print arr[2];}' $omap_list|sort -r`)
-  echo -e "\t\tID\tNAME\t\tSIZE"
-  for key in ${keys[@]}
-  do
-    key="snapshot_$key"
-    local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`);
-    # get snap_name
-    tmp=
-    for ((i=17; i>13; i--))
-    do
-      tmp="$tmp${arr[$i]}"
-    done
-    local len=$((16#$tmp))
-    local snap_name=
-    for ((i=18; i<$((18+$len)); i++))
-    do
-      # convert ascii to char
-      local char=`echo -e "\x${arr[$i]}"` 
-      snap_name="$snap_name$char"
-    done
-    # get snap_id (little endian)
-    local tmp=
-    for ((i=13; i>5; i--))
-    do
-      tmp="$tmp${arr[$i]}" 
-    done
-    local snap_id=$((16#$tmp))
-    # get image_size of current snap (little endian)
-    tmp=
-    for ((i=$((25+$len)); i>$((17+$len)); i--))
-    do
-      tmp="$tmp${arr[$i]}"
-    done
-    local image_size=$((16#$tmp))
-    if [ "$sname"x = ""x ];then
-      echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" 
-      continue
-    fi
-    if [ "$sname"x = "$snap_name"x ];then
-      echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" 
-      return
-    fi
-  done
-}
-
-#======================================== end image format v2 ========================================  
diff --git a/src/rbd_recover_tool/osd_job b/src/rbd_recover_tool/osd_job
deleted file mode 100755 (executable)
index f3e2ff3..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/bin/bash
-# file: osd_job
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-my_dir=$(dirname "$0")
-
-. $my_dir/common_h
-. $my_dir/metadata_h
-. $my_dir/epoch_h
-
-function check_ceph_osd()
-{
-  local func="check_ceph_osd"
-  local host=`hostname`
-  # if ceph-osd service is still running, except flush-journal
-  if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then
-    echo "[$host]: $func: ceph-osd is running..., stop it"
-    exit 
-  fi
-}
-
-function cat_pg_epoch()
-{
-  local func="cat_pg_epoch" 
-  init_env_osd $1
-  if [ -e $node_pg_epoch ];then
-    cat $node_pg_epoch
-  fi
-} 
-
-function cat_image_v1()
-{
-  local func="cat_image_v1" 
-  init_env_osd $1
-  if [ -e $image_v1 ];then
-    cat $image_v1
-  fi
-} 
-
-function cat_image_v2()
-{
-  local func="cat_image_v2" 
-  init_env_osd $1
-  if [ -e $image_v2 ];then
-    cat $image_v2
-  fi
-} 
-
-function flush_osd_journal()
-{
-  local func="flush_osd_journal"
-  init_env_osd $1
-  local osd_data_path=$osd_data
-  local osd_journal_path=$osd_data/journal 
-  local whoami_path=$osd_data/whoami
-  local host=`hostname`
-  if [ ! -e $whoami_path ];then
-    echo "[$host]: $func: $whoami_path not exists"
-    exit
-  fi
-  local whoami=`cat $whoami_path`
-  echo "[$host]: $func ..."
-  ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null
-  if [ $? -ne 0 ];then
-    echo "[$host]: $func: flush osd journal failed"
-    exit
-  fi
-}
-
-function do_omap_list()
-{
-  local func="do_omap_list"
-  init_env_osd $1
-  local host=`hostname`
-  echo "[$host]: $func ..."
-  get_omap_list
-}
-
-# get all pgs epoch 
-function do_pg_epoch()
-{
-  local func="do_pg_epoch"
-  init_env_osd $1
-  local node=`hostname`
-  get_pgid_list
-  >$node_pg_epoch
-  local pgid=
-  local data_path=
-  local host=`hostname`
-  echo "[$host]: $func ..."
-  while read line
-  do
-  {
-    pgid=`echo $line|awk '{print $1}'`
-    data_path=`echo $line|awk '{print $2}'`
-    get_pg_epoch $pgid
-    echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch
-  } 
-  done < $pgid_list
-}
-
-# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish
-function do_image_list()
-{
-  local func="do_image_list"
-  init_env_osd $1
-  get_image_list   
-  local node=`hostname`
-  >$image_v1
-  >$image_v2
-  local host=`hostname`
-  echo "[$host]: $func ..."
-  for line in `cat $image_list_v1`
-  do
-    pgid=`get_pgid $line`
-    get_pg_epoch $pgid
-    echo "$node $line $pg_epoch" >> $image_v1
-  done
-  for line in `cat $image_list_v2`
-  do
-    pgid=`get_pgid $line`
-    get_pg_epoch $pgid
-    echo "$node $line $pg_epoch" >> $image_v2
-  done
-}
-
-function do_image_id()
-{
-  local func="do_image_id"
-  init_env_osd $1
-  get_image_id $2
-}
-
-function do_image_metadata_v1()
-{
-  local func="do_image_metadata_v1"
-  init_env_osd $1
-  local image_header_hobject=$2
-  local snap_name=$3
-  get_image_metadata_v1 $image_header_hobject $snap_name
-}
-
-function do_image_metadata_v2()
-{
-  local func="do_image_metadata_v2"
-  init_env_osd $1
-  local image_id=$2
-  local image_header_hobject=$3
-  local snap_name=$4
-  get_map_header $image_id 
-  get_meta_header_seq $map_header_prefix $map_header_key
-  get_image_metadata_v2 $meta_header_seq $snap_name
-}
-
-check_ceph_osd
-$*
diff --git a/src/rbd_recover_tool/rbd-recover-tool b/src/rbd_recover_tool/rbd-recover-tool
deleted file mode 100755 (executable)
index b24992d..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-#!/bin/bash
-# file: rbd-recover-tool
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
-# when ceph cluster is stopped.
-# it is a simple disater recovery policy, just for urgent condition
-
-my_dir=$(dirname "$0")
-
-. $my_dir/common_h
-. $my_dir/metadata_h
-. $my_dir/epoch_h
-. $my_dir/database_h
-
-#scp files from admin node to osd node
-file1=common_h
-file2=metadata_h
-file3=epoch_h
-file4=osd_job
-
-#------------ admin node's action -------------
-
-function scp_file()
-{
-  local func="scp_file"
-  file=$1
-  if [ "$1"x = ""x ];then
-    echo "$func: not file input"
-    exit
-  fi
-  for host in `cat $osd_host`
-  do
-  {
-    echo "$func: $host"
-    scp $ssh_option $file $host:$job_path  1>/dev/null
-  } &
-  done
-}
-
-function scp_files()
-{
-  local func="scp_files"
-  for host in `cat $osd_host`
-  do
-  {
-    echo "$func: $host"
-    scp $ssh_option $file1 $host:$job_path
-    scp $ssh_option $file2 $host:$job_path
-    scp $ssh_option $file3 $host:$job_path
-    scp $ssh_option $file4 $host:$job_path
-  } &
-  done
-  wait
-  echo "$func: finish"
-}
-
-function scatter_node_jobs()
-{
-  local func="scatter_node_jobs"
-  local host=
-  local data_path=
-  echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
-
-  trap 'echo $func failed; exit' INT HUP
-  while read line
-  do
-  {
-    host=`echo $line|awk '{print $1}'`
-    data_path=`echo $line|awk '{print $2}'`
-    check_osd_process $host
-
-    cmd="mkdir -p $job_path"
-    ssh $ssh_option $host $cmd
-    scp $ssh_option $file1 $host:$job_path  >/dev/null
-    scp $ssh_option $file2 $host:$job_path  >/dev/null
-    scp $ssh_option $file3 $host:$job_path  >/dev/null
-    scp $ssh_option $file4 $host:$job_path  >/dev/null
-
-    cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
-    cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
-    cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
-    cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
-
-    ssh $ssh_option $host $cmd </dev/null
-  } &
-  done < $osd_host_path
-  wait
-  echo "$func: finish"
-}
-
-function gather_node_infos()
-{
-  local func="gather_node_infos"
-  echo "$func ..."
-  >$pg_coll
-  >$image_coll_v1
-  >$image_coll_v2
-  trap 'echo $func failed; exit' INT HUP
-  while read line
-  do
-  {
-    host=`echo $line|awk '{print $1}'`
-    data_path=`echo $line|awk '{print $2}'`
-    echo "$func: $host"
-    check_osd_process $host
-
-    #pg epoch
-    cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
-    ssh $ssh_option $host $cmd1 >> $pg_coll
-    #image v1
-    cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
-    ssh $ssh_option $host $cmd2 >> $image_coll_v1
-    #image v2
-    cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
-    ssh $ssh_option $host $cmd3 >> $image_coll_v2
-  } &
-  done < $osd_host_path
-  wait
-  echo "$func: finish"
-}
-
-function scatter_gather()
-{
-  local func="scatter_gather"
-  if [ ! -s $osd_host ];then
-    echo "$func: no osd_host input"
-    exit
-  fi
-  if [ ! -s $mon_host ];then
-    echo "$func: no mon_host input"
-    exit
-  fi
-  scatter_node_jobs
-  gather_node_infos
-}
-
-
-#------------- operations --------------
-
-function database()
-{
-  scatter_gather
-  gen_database
-}
-
-function list()
-{
-  list_images
-}
-
-function lookup()
-{
-  lookup_image $1 $2 $3
-}
-
-function recover()
-{
-  recover_image $1 $2 $3 $4
-}
-
-#------------- helper -------------
-
-function usage()
-{
-  local cmd_name="rbd-recover-tool"
-  echo 
-  echo "$cmd_name is used to recover rbd image of replicated pool, 
-       when all ceph services are stopped"
-  echo "Usage:"
-  echo "$cmd_name database
-                       gather pg info, object info, image metadata, 
-                       and epoch info from all osd nodes,
-                       this will cosume a long time, just be patient, 
-                       especially when scale up to 1000+ osds"
-  echo "$cmd_name list
-                       list all rbd images of all replicated pools, 
-                       before to lookup & recover"
-  echo "$cmd_name lookup  <pool_id>/<image_name>[@[<snap_name>]]
-                       show image metadata: image format, rbd id, size, order, snapseq
-                       In addtion, for image with snapshots, 
-                       this will list all snapshot infomations"
-  echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
-                       all snapshots share one image head, to economize disk space
-                       so there is only one snapshot at any time,
-                       image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
-                       cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
-                       will show snapid
-                       recover to raw image/nosnap/head: <image_name>
-                       rollback to image head:           <image_name>@
-                       rollback to image snap:           <image_name>@<snap_name>
-                       recover steps:
-                       1. recover image nosnap (only one time)
-                       2. rollback to image snap"
-}
-
-function get_path()
-{
-  local func="get_path"
-  if [ $# -lt 1 ];then
-    return
-  fi
-  if [[ $1 =~ // ]];then
-    return # "/path//to" is invalid
-  fi
-  local parent=`dirname $1`
-  local name=`basename $1`
-  if [ "$parent"x = "/"x ];then
-    echo "$parent$name"
-  else
-    echo -n "$parent/$name"
-  fi
-}
-
-function admin_cmd()
-{
-  local func="admin_cmd"
-  if [ $# -lt 1 ];then
-    usage
-    exit
-  fi
-  if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
-    usage
-    exit
-  fi
-  
-  if [ "$1"x = "database"x ];then
-    if [ $# -gt 1 ];then
-      usage
-      exit
-    fi
-    # remove osd_host to refresh osd_host and osd_host_mapping
-    rm -f $osd_host
-    init_env_admin
-    database
-  elif [ "$1"x = "list"x ];then
-    if [ $# -gt 1 ];then
-      usage
-      exit
-    fi
-    init_env_admin
-    list
-  elif [ "$1"x = "lookup"x ];then
-    if [ $# -gt 2 ];then
-      usage
-      exit
-    fi
-    local pool_id=-1
-    local image_name=
-    local snap_name=
-    if [[ $2 =~  ^([^@/]+)/([^@/]+)$ ]];then
-      pool_id="${BASH_REMATCH[1]}"
-      image_name="${BASH_REMATCH[2]}"
-    elif [[ $2 =~  ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
-      pool_id="${BASH_REMATCH[1]}"
-      image_name="${BASH_REMATCH[2]}"
-      snap_name="${BASH_REMATCH[3]}"
-    else
-      echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
-      exit
-    fi
-    init_env_admin
-    lookup $pool_id $image_name $snap_name
-  elif [ "$1"x = "recover"x ];then
-    if [ $# -lt 2 ] || [ $# -gt 3 ];then
-      usage
-      exit
-    fi
-    local pool_id=-1
-    local image_name=
-    local snap_name=@
-    local image_dir=
-    if [[ $2 =~  ^([^@/]+)/([^@/]+)$ ]];then
-      pool_id="${BASH_REMATCH[1]}"
-      image_name="${BASH_REMATCH[2]}"
-    elif [[ $2 =~  ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
-      pool_id="${BASH_REMATCH[1]}"
-      image_name="${BASH_REMATCH[2]}"
-      snap_name="${BASH_REMATCH[3]}"
-      if [ "$snap_name"x = ""x ];then
-        snap_name=@@
-      fi
-    else
-      echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
-      exit
-    fi
-    if [ $# = 3 ];then
-      image_dir=`get_path $3`
-      if [ "image_dir"x = ""x ];then
-        echo "$3 invalid"
-        exit
-      fi
-    fi
-    init_env_admin
-    recover $pool_id $image_name $snap_name $image_dir
-  elif [ "$1"x = "scp_files"x ];then
-    if [ $# -gt 1 ];then
-      exit
-    fi
-    admin_parse_osd
-    scp_files
-  elif [ "$1"x = "scp_file"x ];then
-    if [ $# -gt 2 ];then
-      exit
-    fi
-    admin_parse_osd
-    scp_file $2
-  else
-    echo "$func: $1: command not found"
-  fi
-}
-
-admin_cmd $*
diff --git a/src/rbd_recover_tool/test_rbd_recover_tool.sh b/src/rbd_recover_tool/test_rbd_recover_tool.sh
deleted file mode 100755 (executable)
index 80b0a8c..0000000
+++ /dev/null
@@ -1,542 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Library Public License for more details.
-#
-
-# unit test case for rbd-recover-tool
-
-#prepare:
-# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds
-#step 1. rbd export all images as you need
-#step 2. stop all ceph services
-#step 3. use ceph_rbd_recover_tool to recover all images
-#step 4. compare md5sum of recover image with that of export image who has the same image name
-
-ssh_opt="-o ConnectTimeout=1"
-my_dir=$(dirname "$0")
-tool_dir=$my_dir
-
-#storage_path=$my_dir/config/storage_path
-mon_host=$my_dir/config/mon_host
-osd_host=$my_dir/config/osd_host
-mds_host=$my_dir/config/mds_host
-
-test_dir= # `cat $storage_path`
-export_dir= #$test_dir/export
-recover_dir= #$test_dir/recover
-image_names= #$test_dir/image_names
-online_images= #$test_dir/online_images, all images on ceph rbd pool
-gen_db= #$test_dir/gen_db, label database if exist
-pool=rbd
-pool_id=2
-
-function get_pool_id()
-{
-  local pool_id_file=/tmp/pool_id_file.$$$$
-  ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file
-  if [ $? -ne 0 ];then
-    echo "$func: get pool id failed: pool = $pool"
-    rm -f $pool_id_file
-    exit
-  fi
-  pool_id=`cat $pool_id_file`
-  echo "$func: pool_id = $pool_id"
-  rm -f $pool_id_file
-}
-
-function init()
-{
-  local func="init"
-  if [ $# -eq 0 ];then
-    echo "$func: must input <path> to storage images, enough disk space is good"
-    exit
-  fi
-  if [ ! -s $osd_host ];then
-    echo "$func: config/osd_host not exists or empty"
-    exit
-  fi
-  if [ ! -s $mon_host ];then
-    echo "$func: config/mon_host not exists or empty"
-    exit
-  fi
-  if [ ! -e $mds_host ];then
-    echo "$func: config/mds_host not exists"
-    exit
-  fi
-  test_dir=$1
-  export_dir=$test_dir/export
-  recover_dir=$test_dir/recover
-  image_names=$test_dir/image_names
-  online_images=$test_dir/online_images
-  gen_db=$test_dir/gen_db
-
-  trap 'echo "ceph cluster is stopped ..."; exit;' INT
-  ceph -s >/dev/null
-  get_pool_id
-
-  mkdir -p $test_dir
-  mkdir -p $export_dir
-  mkdir -p $recover_dir
-  rm -rf $export_dir/*
-  rm -rf $recover_dir/*
-}
-
-function do_gen_database()
-{
-  local func="do_gen_database"
-  if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then
-    echo "$func: database already existed"
-    exit
-  fi
-  bash $tool_dir/rbd-recover-tool database
-  echo 1 >$gen_db 
-}
-
-#check if all ceph processes are stopped
-function check_ceph_service()
-{
-  local func="check_ceph_service"
-  local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"`
-  if [ "$res"x != ""x ];then
-    echo "$func: NOT all ceph services are stopped"
-    return 1
-    exit
-  fi
-  echo "$func: all ceph services are stopped"
-  return 0
-}
-
-function stop_ceph()
-{
-  local func="stop_ceph"
-  #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd" 
-  while read osd
-  do
-  {
-    osd=`echo $osd|tr -d [:blank:]`
-    if [ "$osd"x = ""x ];then
-      continue
-    fi
-    #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null
-    ssh $ssh_opt $osd "killall ceph-osd" </dev/null
-  } &
-  done < $osd_host
-  wait
-  echo "waiting kill all osd ..."
-  sleep 1
-  #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds" 
-  cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon" 
-  #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd" 
-  cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds" 
-}
-
-function create_image()
-{
-  local func="create_image"
-  if [ ${#} -lt 3 ];then
-    echo "create_image: parameters: <image_name> <size> <image_format>"
-    exit
-  fi
-  local image_name=$1
-  local size=$2
-  local image_format=$3
-  if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then
-    echo "$func: image_format must be 1 or 2"
-    exit
-  fi
-  local res=`rbd list|grep -E "^$1$"` 
-  echo "$func $image_name ..."
-  if [ "$res"x = ""x ];then
-    rbd -p $pool create $image_name --size $size --image_format $image_format
-  else
-    if [ $image_format -eq 2 ];then
-      rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@%
-    fi
-    rbd snap purge $image_name
-    #rbd rm $image_name
-    rbd -p $pool resize --allow-shrink --size $size $image_name
-  fi
-}
-
-function export_image()
-{
-  local func="export_image"
-
-  if [ $# -lt 2 ];then
-    echo "$func: parameters: <image_name> <image_format> [<image_size>]"
-    exit
-  fi
-
-  local image_name=$1
-  local format=$(($2)) 
-  local size=$(($3)) #MB
-  
-  if [ $format -ne 1 ] && [ $format -ne 2 ];then
-    echo "$func: image format must be 1 or 2"
-    exit
-  fi
-
-  if [ $size -eq 0 ];then
-    size=24 #MB
-    echo "$func: size = $size"
-  fi
-  local mnt=/rbdfuse 
-
-  mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
-  if [ $? -ne 0 ];then
-    rbd-fuse $mnt
-  fi
-    
-  create_image $image_name $size $format
-  dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4))
-  
-  local export_image_dir=$export_dir/pool_$pool_id/$image_name
-  mkdir -p $export_image_dir
-  local export_md5_nosnap=$export_image_dir/@md5_nosnap
-  >$export_md5_nosnap
-  local export_image_path=$export_image_dir/$image_name
-  rm -f $export_image_path
-
-  rbd export $pool/$image_name $export_image_path
-  md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap 
-}
-
-function recover_image()
-{
-  local func="recover_snapshots"
-  if [ $# -lt 1 ];then
-    echo "$func: parameters: <image_name>"
-    exit
-  fi
-
-  local image_name=$1
-  #pool_id=29
-
-  local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
-  mkdir -p $recover_image_dir
-  local recover_md5_nosnap=$recover_image_dir/@md5_nosnap
-  >$recover_md5_nosnap
-  local snapshot=
-  
-  bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
-  md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap
-}
-
-function make_snapshot()
-{
-  local func="make_snapshot"
-  if [ $# -lt 5 ];then
-    echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>"
-    exit
-  fi
-  local ofile=$1
-  local seek=$(($2))
-  local count=$(($3))
-  local snap=$4
-  local export_image_dir=$5
-
-  if [ $seek -lt 0 ];then
-    echo "$func: seek can not be minus"
-    exit
-  fi
-
-  if [ $count -lt 1 ];then
-    echo "$func: count must great than zero"
-    exit
-  fi
-
-  echo "[$snap] $func ..."
-  echo "$1 $2 $3 $4"
-  rbd snap ls $image_name|grep $snap;
-  
-  local res=$?
-  if [ $res -eq 0 ];then
-    return $res
-  fi
-
-  dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null
-  snapshot=$image_name@$snap 
-  rbd snap create $snapshot
-  rm -f $export_image_dir/$snapshot
-  rbd export $pool/$image_name $export_image_dir/$snapshot
-  pushd $export_image_dir >/dev/null
-  md5sum $snapshot >> @md5
-  popd >/dev/null
-}
-
-function recover_snapshots()
-{
-  local func="recover_snapshots"
-  if [ $# -lt 1 ];then
-    echo "$func: parameters: <image_name>"
-    exit
-  fi
-
-  local image_name=$1
-  #pool_id=29
-
-  local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
-  mkdir -p $recover_image_dir
-  local recover_md5=$recover_image_dir/@md5
-  >$recover_md5
-  local snapshot=
-
-  
-  # recover head
-  bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
-
-  # recover snapshots
-  for((i=1; i<10; i++))
-  do
-    snapshot=snap$i
-    bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir
-    pushd $recover_image_dir >/dev/null
-    local chksum=`md5sum $image_name|awk '{print $1}'` 
-    echo "$chksum  $image_name@$snapshot" >>@md5
-    popd >/dev/null
-  done
-}
-
-function export_snapshots()
-{
-  local func="export_snapshots"
-
-  if [ $# -lt 2 ];then
-    echo "$func: parameters: <image_name> <image_format> [<image_size>]"
-    exit
-  fi
-
-  local image_name=$1
-  local format=$(($2)) 
-  local size=$(($3)) #MB
-  
-  if [ $format -ne 1 ] && [ $format -ne 2 ];then
-    echo "$func: image format must be 1 or 2"
-    exit
-  fi
-
-  if [ $size -eq 0 ];then
-    size=24 #MB
-    echo "$func: size = $size"
-  fi
-  local mnt=/rbdfuse 
-
-  mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
-  if [ $? -ne 0 ];then
-    rbd-fuse $mnt
-  fi
-    
-  create_image $image_name $size $format
-  
-  local export_image_dir=$export_dir/pool_$pool_id/$image_name
-  mkdir -p $export_image_dir
-  local export_md5=$export_image_dir/@md5
-  >$export_md5
-
-  # create 9 snapshots
-  # image = {object0, object1, object2, object3, object4, object5, ...}
-  #
-  # snap1 : init/write all objects 
-  # snap2 : write object0
-  # snap3 : write object1
-  # snap4 : write object2
-  # snap5 : write object3
-  # snap6 : write object4
-  # snap7 : write object5
-  # snap8 : write object0
-  # snap9 : write object3
-
-  make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir
-  make_snapshot $mnt/$image_name 0  1    snap2 $export_image_dir
-  make_snapshot $mnt/$image_name 4  1    snap3 $export_image_dir
-  make_snapshot $mnt/$image_name 8  1    snap4 $export_image_dir
-  make_snapshot $mnt/$image_name 12 1    snap5 $export_image_dir
-  make_snapshot $mnt/$image_name 16 1    snap6 $export_image_dir
-  make_snapshot $mnt/$image_name 20 1    snap7 $export_image_dir
-  make_snapshot $mnt/$image_name 1  1    snap8 $export_image_dir
-  make_snapshot $mnt/$image_name 13 1    snap9 $export_image_dir
-}
-
-function check_recover_nosnap()
-{
-  local func="check_recover_nosnap"
-  if [ $# -lt 3 ];then
-    echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
-  fi
-  local export_md5=$1
-  local recover_md5=$2
-  local image_name=$3
-
-  local ifpassed="FAILED"
-  echo "================ < $image_name nosnap > ================" 
-
-  local export_md5sum=`cat $export_md5` 
-  local recover_md5sum=`cat $recover_md5` 
-
-  if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then
-    ifpassed="PASSED"
-  fi
-  echo "export:  $export_md5sum"
-  echo "recover: $recover_md5sum $ifpassed"
-}
-
-function check_recover_snapshots()
-{
-  local func="check_recover_snapshots"
-  if [ $# -lt 3 ];then
-    echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
-  fi
-  local export_md5=$1
-  local recover_md5=$2
-  local image_name=$3
-
-  local ifpassed="FAILED"
-  echo "================ < $image_name snapshots > ================" 
-
-  OIFS=$IFS
-  IFS=$'\n'
-  local export_md5s=(`cat $export_md5`)
-  local recover_md5s=(`cat $recover_md5`)
-  for((i=0; i<9; i++))
-  do
-    OOIFS=$IFS
-    IFS=$'  '
-    local x=$(($i+1))
-    snapshot=snap$x
-
-    local export_arr=(`echo ${export_md5s[$i]}`)
-    local recover_arr=(`echo ${recover_md5s[$i]}`)
-    echo "export:  ${export_md5s[$i]}"
-    if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then
-      ifpassed="PASSED"
-    fi
-    echo "recover: ${recover_md5s[$i]} $ifpassed"
-    IFS=$OOIFS
-  done
-  IFS=$OIFS
-}
-
-# step 1: export image, snapshot
-function do_export_nosnap()
-{
-  export_image image_v1_nosnap 1
-  export_image image_v2_nosnap 2
-}
-
-function do_export_snap()
-{
-  export_snapshots  image_v1_snap 1
-  export_snapshots  image_v2_snap 2
-}
-
-# step 2: stop ceph cluster and gen database
-function stop_cluster_gen_database()
-{
-  trap 'echo stop ceph cluster failed; exit;' INT HUP
-  stop_ceph 
-  sleep 2
-  check_ceph_service
-  local res=$?
-  while [ $res -ne 0 ]
-  do
-    stop_ceph
-    sleep 2
-    check_ceph_service
-    res=$?
-  done
-
-  echo 0 >$gen_db
-  do_gen_database
-}
-
-# step 3: recover image,snapshot
-function do_recover_nosnap()
-{
-  recover_image image_v1_nosnap
-  recover_image image_v2_nosnap
-}
-
-function do_recover_snap()
-{
-  recover_snapshots image_v1_snap
-  recover_snapshots image_v2_snap
-}
-
-# step 4: check md5sum pair<export_md5sum, recover_md5sum>
-function do_check_recover_nosnap()
-{
-  local image1=image_v1_nosnap
-  local image2=image_v2_nosnap
-
-  local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap
-  local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap
-  local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap
-  local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap
-
-  check_recover_nosnap $export_md5_1 $recover_md5_1 $image1 
-  check_recover_nosnap $export_md5_2 $recover_md5_2 $image2
-}
-
-function do_check_recover_snap()
-{
-  local image1=image_v1_snap
-  local image2=image_v2_snap
-
-  local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5
-  local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5
-  local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5
-  local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5
-
-  check_recover_snapshots $export_md5_1 $recover_md5_1 $image1 
-  check_recover_snapshots $export_md5_2 $recover_md5_2 $image2
-}
-
-function test_case_1()
-{
-  do_export_nosnap
-  stop_cluster_gen_database
-  do_recover_nosnap
-  do_check_recover_nosnap
-}
-
-function test_case_2()
-{
-  do_export_snap
-  stop_cluster_gen_database
-  do_recover_snap
-  do_check_recover_snap
-}
-
-function test_case_3()
-{
-  do_export_nosnap
-  do_export_snap
-
-  stop_cluster_gen_database
-
-  do_recover_nosnap
-  do_recover_snap
-
-  do_check_recover_nosnap
-  do_check_recover_snap
-}
-
-
-init $*
-test_case_3
diff --git a/src/tools/rbd_recover_tool/FAQ b/src/tools/rbd_recover_tool/FAQ
new file mode 100644 (file)
index 0000000..1655e85
--- /dev/null
@@ -0,0 +1,16 @@
+# author: min chen(minchen@ubuntukylin.com) 2014 2015
+
+1. error "get_image_metadata_v2: no meta_header_seq input"
+cause: 
+  database is old, refresh database
+solution:
+  ./rbd-recover-tool database
+
+2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable
+   ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted
+cause: 
+  when ./rbd-recover-tool database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED
+  if run ./rbd-recover-tool database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command
+  are failed.
+solution:
+  wait until all previous command finished.
diff --git a/src/tools/rbd_recover_tool/README b/src/tools/rbd_recover_tool/README
new file mode 100644 (file)
index 0000000..73c19b9
--- /dev/null
@@ -0,0 +1,97 @@
+# author: Min chen(minchen@ubuntukylin.com) 2014 2015
+
+------------- ceph rbd recover tool -------------
+
+  ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed.
+it is based on ceph-0.80.x (Firefly and newer)
+  currently, ceph service(ceph-mon, ceph-osd) evently are not avaiable caused by bugs or sth else
+, especially on large scale ceph cluster, so that the ceph cluster can not supply service 
+and rbd images can not be accessed. In this case, a tool to recover rbd image is nessecary.
+  ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed
+osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure
+object data is complete, this tool does flush osd journal on each osd node before recovering.
+  but, there are some limitions:
+-need ssh service and unobstructed network 
+-osd data must be accessed on local disk
+-clone image is not supported, while snapshot is supported
+-only support relicated pool
+
+before you run this tool, you should make sure that:
+1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown
+2). ssh deamon is running & network is ok (ssh to each node without password)
+3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test)
+4). osd disk is not crashed and data can be accessed on local filesystem
+
+-architecture:
+
+                      +---- osd.0
+                      |
+admin_node -----------+---- osd.1
+                      |
+                      +---- osd.2
+                     |
+                      ......
+
+-files:
+admin_node: {rbd-recover-tool  common_h  epoch_h  metadata_h  database_h}
+osd:        {osd_job           common_h  epoch_h  metadata_h} #/var/rbd_tool/osd_job
+in this architecture, admin_node acts as client, osds act as server.
+so, they run diffrent files: 
+on admin_node run:  rbd-recover-tool <action> [<parameters>]
+on osd node run:    ./osd_job <funtion> <parameters>
+admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node
+
+
+-config file
+before you run this tool, make sure write config files first
+osd_host_path: osd hostnames and osd data path #user input
+  osdhost0     /var/lib/ceph/osd/ceph-0
+  osdhost1     /var/lib/ceph/osd/ceph-1
+  ......
+mon_host: all mon node hostname #user input
+  monhost0
+  monhost1
+  ......
+mds_host: all mds node hostname #user input
+  mdshost0
+  mdshost1
+  ......
+then, init_env_admin function will create file: osd_host
+osd_host: all osd node hostname #generated by admin_job, user ignore it
+  osdhost0
+  osdhost1
+  ......
+
+
+-usage:
+rbd-recovert-tool <operation>
+<operation> :
+database               #generating offline database: hobject path, node hostname, pg_epoch and image metadata
+list                   #list all images from offline database
+lookup <pool_id>/<image_name>[@[<snap_name>]]  #lookup image metadata in offline database
+recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image]   #recover image data according to image metadata
+
+-steps:
+1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds
+2. setup config files: osd_host_path, mon_host, mds_host
+3. rbd-recover-tool database   # wait a long time 
+4. rbd-recover-tool list
+4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image]
+
+
+-debug & error check
+if admin_node operation is failed, you can check it on osd node
+cd /var/rbd_tool/osd_job
+./osd_job <operation>
+<opeartion> :
+do_image_id <image_id_hobject>         #get image id of image format v2 
+do_image_id <image_header_hobject>     #get image id of image format v1
+do_image_metadata_v1 <image_header_hobject>    #get image metadata of image format v1, maybe pg epoch is not latest
+do_image_metadata_v2 <image_header_hobject>    #get image metadata of image format v2, maybe pg epoch is not latest
+do_image_list                          #get all images on this osd(image head hobject)
+do_pg_epoch                            #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch
+do_omap_list                           #list all omap headers and omap entries on this osd
+
+
+-FAQ
+file FAQ lists some common confusing cases while testing
diff --git a/src/tools/rbd_recover_tool/TODO b/src/tools/rbd_recover_tool/TODO
new file mode 100644 (file)
index 0000000..c36d4c9
--- /dev/null
@@ -0,0 +1,2 @@
+
+1.support clone imag
diff --git a/src/tools/rbd_recover_tool/common_h b/src/tools/rbd_recover_tool/common_h
new file mode 100644 (file)
index 0000000..4a0d3fc
--- /dev/null
@@ -0,0 +1,412 @@
+#!/bin/bash
+# file: common_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+# admin node init path
+rbd_image=/var/rbd_tool/rbd_image
+database=$rbd_image/database
+image_coll_v1=$rbd_image/image_coll_v1
+image_coll_v2=$rbd_image/image_coll_v2
+pg_coll=$rbd_image/pg_coll
+images=$rbd_image/images
+images_meta=$rbd_image/images_meta
+default_backup_dir=/var/rbd_tool/default_backup_dir
+
+# admin node: image snap & nosnap
+nosnap= #$rbd_image/<image_name>/nosnap
+snap= #rbd_image/<image_name>/<snap_name>
+
+# osd node init path
+job_path=/var/rbd_tool/osd_job
+single_node=/var/rbd_tool/single_node
+
+# osd node vars
+osd_env= #single_node/$cluster$id/osd_env
+osd_data= #/var/lib/ceph/osd/$cluster-$id
+omap_path= #$osd_data/current/omap
+image_list_v1= #single_node/$cluster-$id/image_list_v1
+image_list_v2= #single_node/$cluster-$id/image_list_v2
+image_v1= #$single_node/$cluster-$id/image_v1
+image_v2= #$single_node/$cluster-$id/image_v2
+pgid_list= #$single_node/$cluster-$id/pgid_list
+node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch
+omap_list= #$single_node/$cluster-$id/omap_list 
+
+# admin node config file
+osd_host_path=$my_dir/config/osd_host_path
+osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin()
+osd_host=$my_dir/config/osd_host #generated by function init_env_admin()
+mon_host=$my_dir/config/mon_host
+mds_host=$my_dir/config/mds_host
+
+# ssh option
+ssh_option="-o ConnectTimeout=1"
+
+# gen md5sum
+function gen_md5()
+{
+  echo $1|md5sum|awk '{print $1}'
+}
+
+# on each osd node
+# check ceph enviroment: ssh, ceph-kvstore-tool, osd_data_path 
+function check_ceph_env()
+{
+  local func="check_ceph_env"
+  if [ $# -lt 2 ];then
+    echo "$func: parameters: <node> <data_path>" 
+    exit
+  fi
+  local node=$1
+  local data_path=$2
+  local res=
+  local cmd=
+
+  trap 'echo [$node]: ssh failed; exit' INT HUP
+  ssh -o ConnectTimeout=1 $node "echo -n" </dev/null
+  res=$?
+  if [ $res -ne 0 ];then
+    echo "[$node]: ssh failed"
+    exit
+  fi
+
+  cmd=ceph-kvstore-tool
+  trap 'echo [$node]: $cmd failed; exit' INT HUP
+  ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null 
+  res=$?
+  # ceph-kvstore-tool will return 1 with no parameters input
+  if [ $res -ne 1 ];then
+    echo "[$node]: $cmd not installed"
+    exit
+  fi
+
+  trap 'echo [$node]: stat $data_path failed; exit' INT HUP
+  ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;"  </dev/null
+  res=$?
+  if [ $res -ne 0 ];then
+    echo "[$node]: $data_path not exists"
+    exit
+  fi
+}
+
+# osd node context : osd_data_path
+function init_env_osd()
+{
+  local func="init_env_osd"
+  if [ "$1"x = ""x ];then
+    echo "$func: no osd_data_path input" 
+    exit
+  fi
+  osd_data=$1
+  omap_path=$osd_data/current/omap
+
+  if [ ! -e $single_node ];then
+    mkdir -p $single_node
+  fi
+
+  local osd_id=`gen_md5 $osd_data`
+  local osd_dir=$single_node/$osd_id
+
+  if [ ! -e $osd_dir ];then
+    mkdir -p $osd_dir
+  fi
+  image_list_v1=$osd_dir/image_list_v1
+  image_list_v2=$osd_dir/image_list_v2
+  image_v1=$osd_dir/image_v1
+  image_v2=$osd_dir/image_v2
+  pgid_list=$osd_dir/pgid_list
+  node_pg_epoch=$osd_dir/node_pg_epoch
+  omap_list=$osd_dir/omap_list
+}
+
+# admin node process file: osd_host_path
+function init_env_admin()
+{
+  local func="init_env_admin" 
+  local pwd_path=`pwd`
+  osd_host_mapping=$pwd_path/config/osd_host_mapping
+  if [ ! -s $osd_host_path ];then
+    echo "$func: config/osd_host_path not exists or empty"
+    exit
+  fi
+  if [ ! -e $rbd_image ];then
+    mkdir -p $rbd_image
+  fi
+  if [ ! -e $images ];then
+    mkdir -p $images
+  fi
+
+  if [ ! -s $mon_host ];then
+    echo "$func: config/mon_host not exists or empty"
+    exit
+  fi
+  if [ ! -e $mds_host ];then
+    echo "$func: config/mds_host not exists"
+    exit
+  fi
+
+  # we just judge if osd_host is needed to be updated
+  if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then
+    return  
+  fi
+  echo "$func: create osd_host ..."
+  # create file: osd_host and osd_host_mapping
+  >$osd_host
+  >$osd_host_mapping
+  local lines=0
+  local lineno=0
+  while read line
+  do
+    lineno=$(($lineno + 1))
+    if [ "$line"x = ""x ];then
+      continue;
+    fi
+    local node=`echo $line|awk '{print $1}'`
+    if [ "$node"x = ""x ];then
+      echo "$func: osd_host_path : line $lineno: osd hostname not input"
+      rm -rf $osd_host $osd_host_mapping
+      exit
+    fi
+    local data_path=`echo $line|awk '{print $2}'`
+    if [ "$data_path"x = ""x ];then
+      echo "$func: osd_host_path : line $lineno: osd data_path not input"
+      rm -rf $osd_host $osd_host_mapping
+      exit
+    fi
+    lines=$(($lines + 1))
+    # in case : there are servral hostnames on the same node
+    # just need output of `hostname`
+    local hostname_alias=
+    hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null`
+    if [ "$hostname_alias"x = ""x ];then
+      echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed"
+      rm -rf $osd_host $osd_host_mapping
+      exit
+    fi
+    echo "$node $hostname_alias" >>$osd_host_mapping
+    echo $node >> $osd_host
+    # check ceph env on remote osd
+    check_ceph_env $node $data_path
+  done < $osd_host_path
+
+  if [ $lines = 0 ];then
+    echo "$func: no osd host path valid"
+    exit
+  fi
+}
+
+function admin_parse_osd()
+{
+  local func="admin_parse_osd"
+  if [ -s $osd_host ];then
+    return  
+  fi
+  # create file: osd_host
+  >$osd_host
+  local lines=0
+  local lineno=0
+  while read line
+  do
+    lineno=$(($lineno + 1))
+    if [ "$line"x = ""x ];then
+      continue;
+    fi
+    local node=`echo $line|awk '{print $1}'`
+    if [ "$node"x = ""x ];then
+      echo "$func: osd_host_path : line $lineno: osd_host not input"
+      exit
+    fi
+    local data_path=`echo $line|awk '{print $2}'`
+    if [ "$data_path"x = ""x ];then
+      echo "$func: osd_host_path : line $lineno: osd_data not input"
+      exit
+    fi
+    lines=$(($lines + 1))
+    echo $node >> $osd_host
+  done < $osd_host_path
+}
+
+# for osd node
+function get_omap_list()
+{
+  ceph-kvstore-tool $omap_path list > $omap_list
+}
+
+function convert_underline()
+{
+  if [ "$1"x = ""x ];then
+    return
+  fi
+
+  echo $1|sed -e 's/_/\\u/gp'|head -n 1
+}
+
+function dump_backslash()
+{
+  echo $*|sed -e 's/\\/\\\\/gp'|head -n 1
+}
+
+function dump_dump_backslash()
+{
+  echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1
+}
+
+function char_convert()
+{
+  if [ "$1"x = ""x ];then
+    return
+  fi
+
+  echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1
+}
+
+function check_osd_process()
+{
+  local func="check_osd_process"
+  local host=$1
+  if [ "$1"x = ""x ];then
+    exit
+  fi
+  local cmds="ps aux|grep ceph-osd|grep -v grep"
+  local ret=/tmp/ret.$$$$
+  ssh $ssh_option $host $cmds |tee $ret
+  if [ -s $ret ];then
+    echo "$func: [$host] ceph-osd process is not killed"
+    exit
+  fi
+  rm -f $ret 
+}
+
+function get_map_header_prefix()
+{
+  echo "_HOBJTOSEQ_"
+}
+
+function get_map_header_key()
+{
+  local func="get_map_header_key"
+  if [ "$1"x = ""x ];then
+    #echo $func': no keyword input'
+    exit 
+  fi 
+  local keyword=$1
+  local res=`cat $omap_list| grep $keyword`
+  if [ "$res"x = ""x ];then
+    #echo "$func: map_header_key = $keyword not exisits"
+    exit
+  fi
+  echo $res|awk -F ":" '{print $2}'
+}
+
+function get_header_seq() 
+{
+  local func="get_header_seq"
+  if [ "$1"x == ""x ];then
+    #echo "$func: no prefix input"
+    exit;
+  elif [ "$2"x == ""x ];then
+    #echo "$func: no key input"
+    exit;
+  fi
+  local prefix=$1;
+  local key=$2;
+  local res=/tmp/header_seq.$$$$
+
+  ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
+  if [ $? != 0 ]; then
+    #echo "$func: <$prefix , $key> not exists" ;
+    exit;
+  fi
+
+  # ceph-kvstore-tool get result like this:
+  # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00
+  # get header seq bytes: 
+  # 12 44 00 00 00 00 00 00 
+  # -> 00 00 00 00 00 00 44 12 
+  # echo $((16#0000000000004412)) -> 17426 == header_seq
+  local seq=`cat $res |head -n 2|tail -n 1| \
+  awk '
+  BEGIN {
+    FS=":"
+    seq="";
+    i=7;
+  } {
+    split($2, arr, " ")  
+    # header_seq uint64 : 8 bytes
+    for (x=7; x>=0; --x) {
+      seq=seq""arr[i+x];
+   }
+  }
+  END {
+   print seq
+  }'`
+  if [ "$seq"x = ""x ];then
+    #echo "$func: get <$prefix , $key> failed"
+    exit;
+  fi
+  rm -f $res
+  echo $((16#$seq))
+}
+
+# get header info key/value
+function get_header_kv()
+{
+  local func="get_header_kv"
+  if [ "$1"x = ""x ];then
+    #echo "$func: no prefix input"
+    exit
+  elif [ "$2"x = ""x ];then
+    #echo "$func: no key input"
+    exit
+  elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then
+    #echo "$func: no valid type input, use type (string|int)"
+    exit
+  fi
+
+  local prefix=$1
+  local key=$2
+  local types=$3
+  local res=/tmp/kv.$$$$
+
+  ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res
+  if [ $? != 0 ];then
+    #echo "$func: <$prefix , $key> not exists" 
+    exit
+  fi
+
+  if [ "$types"x = "string"x ];then
+    local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'`
+    echo $value
+  elif [ "$types"x = "int"x ];then
+    local value=`cat $res |tail -n +2|head -n -1| \
+      awk '
+        BEGIN{
+          FS=":"
+        } {
+          split($2, arr, " ");
+          len=length(arr)
+          for (i=len; i>0; --i) { 
+                printf arr[i];
+          }
+        }'`
+    echo $((16#$value))
+  fi
+  rm -f $res
+}
diff --git a/src/tools/rbd_recover_tool/config/mds_host b/src/tools/rbd_recover_tool/config/mds_host
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/tools/rbd_recover_tool/config/mon_host b/src/tools/rbd_recover_tool/config/mon_host
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/tools/rbd_recover_tool/config/osd_host_path b/src/tools/rbd_recover_tool/config/osd_host_path
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/tools/rbd_recover_tool/database_h b/src/tools/rbd_recover_tool/database_h
new file mode 100644 (file)
index 0000000..75d0aa9
--- /dev/null
@@ -0,0 +1,1134 @@
+#!/bin/bash
+# file: database_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+
+db_image_prefix=
+db_image_size=
+db_order=
+db_snap_id=
+db_snap_image_size=
+found=0
+
+#init osd_data and get all objects path
+function gen_database()
+{
+  local func="gen_database"
+  rm -rf $database/*
+  rm -rf $images
+  rm -rf $raw
+  mkdir -p $database
+  local host=
+  local data_path=
+
+  trap 'echo $func failed; exit;' INT HUP
+  while read line
+  do
+    {
+      host=`echo $line|awk '{print $1}'`
+      data_path=`echo $line|awk '{print $2}'`
+      if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then
+       continue
+      fi
+      local cmds="find $data_path/current -type f"
+      ssh $ssh_option $host $cmds > $database/$host
+    } &
+  done < $osd_host_path
+  wait
+  echo "$func: finish"
+}
+
+# collect hobjects from database 
+# and choose the object whose epoch is latest
+# then, sort the objects by their offsets in image 
+function gather_hobject_common()
+{
+  func="gather_hobject_common"
+
+  trap 'echo $func failed; exit;' INT HUP
+  if [ $# -lt 2 ];then
+    echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]"  
+    exit
+  fi
+
+  local pool_id=$1
+  local image_prefix=$2
+  pool_id=$(($pool_id))
+  local hex_pool_id=`printf "%x" $pool_id`
+  # NOSNAP = uint64(-2)
+  local snap_id=`printf "%u" -2`
+  local hex_snap_id="head"
+  local psuffix=
+  local fsuffix="_head"
+  if [ $# = 3 ];then
+    snap_id=$(($3))
+    hex_snap_id=`printf "%x" $snap_id`
+    psuffix="_"$snap_id
+    fsuffix="_"$snap_id
+  fi
+  local underline_image_prefix=`convert_underline $image_prefix`
+  local dump_image_prefix=`dump_backslash $underline_image_prefix`
+  local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix`
+  local images_raw_dir=$rbd_image/raw
+  local image_hobjects_dir=$images/pool_$pool_id/$image_prefix
+  # $images/raw/$image_prefix"_head"
+  local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix"
+  # $images/$image_prefix/$image_prefix"_head"
+  local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix"
+
+  if [ ! -e $images_raw_dir ];then
+    mkdir -p $images_raw_dir
+  fi
+  if [ ! -e $image_hobjects_dir ];then
+  local image_metadata=$images_meta/$image_name_in
+    mkdir -p $image_hobjects_dir
+  fi
+
+  pushd $database >/dev/null
+  local  pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id
+  >$image_hobjects_raw
+  grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw
+  if [ ! -s $image_hobjects_raw ];then
+    echo "$func: image snap [ $image_prefix"$psuffix" ] is empty"
+    return 1 #no data available
+  fi
+  popd >/dev/null
+
+  local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp"
+  rm -rf $offset_dir_temp
+  mkdir -p $offset_dir_temp
+
+  echo "gather hobjects from database: snapid=$snap_id ..."
+
+  # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+  local tmp_image=$offset_dir_temp/tmpimage.$$$$
+  >$tmp_image
+  cat $image_hobjects_raw | 
+  awk -F ':' '
+  BEGIN {
+    pg_coll="'$pg_coll'"
+    tmp_image="'$tmp_image'"
+    osd_host_mapping="'$osd_host_mapping'"
+    snapid="'$snap_id'"
+  }{ 
+      # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+
+      split($2, arr1, "/current/");   # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2}
+      split(arr1[2], arr2, "/");     # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} 
+      split(arr2[1], arr3, "_head"); # {2.d3,} 
+
+      hobject=$2;
+      data_path=arr1[1];
+      gsub(/\\u/, "\\\\\\\\\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\)
+      "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node
+      pgid = arr3[1];
+
+      len=length(arr2);
+      offset_hobject=arr2[len]             # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2
+      split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2}
+      len1=length(offarr1) 
+      offset_p=offarr1[len1]               # 000000000002__head_FB425CD3__2
+      split(offset_p, offarr2, "__");      # {000000000002, head_FB425CD3, 2}
+      offset=offarr2[1];                   # 000000000002
+
+      system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image);
+      #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \"");
+      #print node" "pgid" "hobject" "offset" "snapid
+      
+      # find pg_epoch from pg_coll database
+      system("awk  \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image);
+      #system("awk  \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll);
+  }'
+
+  local sort_image=$offset_dir_temp/sortimage.$$$$
+  >$sort_image
+  sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image
+  sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable
+  
+  #rm -rf $offset_dir_temp
+  return 0
+}
+
+function gather_hobject_nosnap()
+{
+  gather_hobject_common $1 $2
+}
+
+function gather_hobject_snap()
+{
+  gather_hobject_common $1 $2 $3
+}
+
+# select the max pg_epoch item of the same $field
+# if no same $field, choose the first 
+# format : "node $field pg_epoch"
+function choose_epoch()
+{
+  cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1;
+}
+
+# lookup image info , after scatter_node_jobs & gather_node_infos
+function lookup_image()
+{
+  local func="lookup_image"
+  if [ $# -lt 2 ];then
+    echo "$func: parameters error <pool_id> <image_name> [<snap_name>]"
+  fi
+  local pool_id=$1
+  local image_name=$2
+  local snap_name=$3
+  pool_id=$((pool_id))
+  echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name"
+  if [ $pool_id -lt 0 ];then
+    echo "$func: pool_id must great than zero"
+    exit
+  fi
+  local hex_pool_id=`printf "%x" $pool_id`
+  input_image $image_name
+  local node=
+  local item=/tmp/item.$$$$
+  local img_name=`dump_backslash $image_name`
+
+  local image_format=0
+  local image_id_hobject=
+  local image_header_hobject=
+  local result=/tmp/tmp_result.$$$$
+  local res1=/tmp/tmp_res1.$$$$
+  local res2=/tmp/tmp_res2.$$$$
+  local data_path=
+
+  # image format v1
+  {
+    cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1
+    if [ -s $res1 ];then
+      echo -n "$func: rbd_header_hobject = "
+      choose_epoch $res1| tee $item
+      #choose_epoch $res1 > $item
+      
+      if [ -e $item ];then
+        node=`cat $item|awk '{print $1}'`
+        image_header_hobject=`cat $item|awk '{print $2}'`
+        if [ "$node"x = ""x ];then
+         echo "$func: v1 node is NULL"
+         exit
+       fi
+       if [ "$image_header_hobject"x = ""x ];then
+         echo "$func: v1 image_header_hobject is NULL"
+         exit
+       fi
+        rm -f $item
+      fi
+
+      image_format=1
+      echo -e "image_name:\t$image_name_in"
+      echo -e "image_format:\t$image_format"
+      data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
+      >$result
+      cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name" 
+      ssh $ssh_option $node $cmds | tee $result
+    fi
+  }
+
+  # image format v2
+  {
+    cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2
+    if [ -s $res2 ];then
+      echo -n "$func: rbd_id_hobject = "
+      choose_epoch $res2 | tee $item
+      #choose_epoch $res2 > $item
+
+      if [ -e $item ];then
+        node=`cat $item|awk '{print $1}'`
+        image_id_hobject=`cat $item|awk '{print $2}'`
+        if [ "$node"x = ""x ];then
+         echo "$func: v2 node is NULL(to get image_id_hobject)"
+         exit
+       fi
+       if [ "$image_id_hobject"x = ""x ];then
+         echo "$func: v2 image_id_hobject is NULL"
+         exit
+       fi
+        rm -f $item
+      fi
+
+      check_osd_process $node
+      image_format=2
+      
+      local tid=/tmp/image_id.$$$$
+      data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'`
+      >$tid
+      cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`" 
+      ssh $ssh_option $node $cmds > $tid
+
+      local image_id=`cat $tid`
+      rm -f $tid
+
+      #get image_header_hobject
+      pushd $database >/dev/null
+      local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id"
+      local tcoll=/tmp/tmp_image_head_coll.$$$$
+
+      # hostname(by command hostname) in $pg_coll  maybe different from hostname in tcoll(input by user) 
+      # t_host:        hostname read from config file ($tcoll)
+      # t_host_remote: $(hostname) on osd node ($pg_coll)
+      grep -r -E $pattern * >$tcoll
+      popd >/dev/null
+
+      local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`)
+      local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`)
+      local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`)
+      local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`)
+      rm -f $tcoll
+      declare -a t_host_remote
+
+      #if there is no failed pg migration, number of t_host is replica num
+      #replica num : 3, 4, 5 ...
+      local t_hostname=/tmp/t_hostname.$$$$
+      for ((i=0; i<${#t_host[*]}; i++))
+      do
+       ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname
+       if [ $? != 0 ];then
+         echo "$func: ${t_host[$i]} get host_remote failed"
+         exit
+       fi
+        t_host_remote[$i]=`cat $t_hostname`    
+      done
+      rm -f $t_hostname
+
+      local t_item=/tmp/tmp_item.$$$$
+      local tmp_item=/tmp/tmp_tmp_item.$$$$
+      
+      >$tmp_item
+      for ((i=0; i<${#t_host_remote[*]}; i++ ))
+      do
+       local node=${t_host_remote[$i]}
+       local pgid=${t_pgid[$i]}
+       awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item
+      done
+
+      # t_item: <remote_hostname> <pgid> <epoch> <data_path>
+      sort -u $tmp_item >$t_item
+      rm -f $tmp_item
+
+      local entry=`choose_epoch $t_item` #t_host_remote
+      rm -f $t_item
+
+      node=`echo $entry|awk '{print $1}'`
+      data_path=`echo $entry|awk '{print $4}'`
+      if [ "$node"x = ""x ];then
+        echo "$func: v2 node is NULL (to get image_header_hobject)"
+        exit
+      fi
+
+      for ((i=0; i<${#t_host_remote[*]}; i++))
+      do
+       if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then
+         image_header_hobject=${t_hobject[$i]}
+         break
+       fi
+      done
+      
+      if [ "$image_id_hobject"x = ""x ];then
+        echo "$func: v2 image_header_hobject is NULL"
+        exit
+      fi
+
+      check_osd_process $node
+     
+      echo "$func: rbd_header_hobject = $node $image_header_hobject"
+      echo -e "image_name:\t$image_name_in"
+      echo -e "image_format:\t$image_format"
+
+      #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'`
+      >$result
+      cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name" 
+      ssh $ssh_option $node $cmds | tee $result
+    fi
+  }
+
+  if [ ! -s $result ];then
+    echo "$func: $image_name_in not exists" 
+    exit
+  fi
+  
+  # to assign value to global variable
+  db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'`
+  if [ "$db_image_prefix"x = ""x ];then
+    echo "$func: image_prefix is NULL"
+    exit
+  fi
+
+  db_image_size=`cat $result|awk '/^image_size:/{print $2}'`
+  db_order=`cat $result|awk '/^order:/{print $2}'`
+  if [ "$snap_name"x != ""x ];then
+    db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'`
+    if [ "$db_snap_id"x = ""x ];then
+      echo "$func: $image_name_in@$snap_name NOT EXISTS"
+      exit
+    fi
+    db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'`
+  else
+    #save snaplist
+    local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist
+    local image_dir=$images/pool_$pool_id/$image_name_in
+    if [ ! -e $image_dir ];then
+      mkdir -p $image_dir
+    fi
+    cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist
+  fi
+  found=1
+  rm -f $result
+}
+
+function list_images()
+{
+   echo "=============== format =============="
+   echo "format: <pool_id>/<image_name>"
+   echo "================ v1: ================"
+   #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g'
+   sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g'
+   echo "================ v2: ================"
+   #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g'
+   sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g'
+}
+
+# lookup image metadata
+# and 
+# collect hobjects of image with the latest pg epoch
+function discover_image_nosnap()
+{
+  local func="discover_image_nosnap"
+  echo "$func ..."
+  local pool_id=$1
+  local image_name=$2
+  pool_id=$(($pool_id))
+  lookup_image $pool_id $image_name # assign $image_prefix
+  gather_hobject_nosnap $pool_id $db_image_prefix 
+  if [ $? -ne 0 ];then
+    exit
+  fi
+  local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head"
+  local image_hobjects_dir=$images/pool_$pool_id/$image_name_in
+  if [ ! -e $image_hobjects_dir ];then
+    mkdir -p $image_hobjects_dir
+  fi
+  # mv image_prefix to image_name
+  mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in
+  rm -rf $images/pool_$pool_id/$db_image_prefix
+}
+
+# get the offset snapid object 
+# if there is no object, choose the smallest snapid which is great than current snapid
+function get_object_clone()
+{
+  local func="get_object_clone"
+  if [ $# -lt 4 ];then
+    exit
+  fi
+
+  local object_offset_string=$1
+  local snapid=$2
+  local snaplist_path=$3
+  local snapset_output_dir=$4
+
+  # snapid in desc
+  local snap_coll_arr=(`
+  cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`) 
+
+  local hex_snapid=`printf "%x" $snapid`
+  pushd $snapset_output_dir >/dev/null
+  # get object with the smallest snapid great than current specify snapid
+  awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1
+  popd >/dev/null
+}
+
+# gather hobject for each snapid
+function gen_snapset_hobject()
+{
+  local func="gen_image_snapset"
+  echo "$func ..."
+  if [ $# -lt 4 ];then
+    echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>"
+    exit
+  fi
+  local pool_id=$1
+  local image_prefix=$2
+  local snaplist_path=$3
+  local snapset_output_dir=$4
+  pool_id=$(($pool_id))
+  OIFS=$IFS
+  IFS=$'\n'
+  local snaparr=(`cat $snaplist_path`) 
+  # gather hobject for each snapshot
+  trap 'echo $func failed; exit;' INT HUP
+  for line in ${snaparr[@]}
+  do
+    OOIFS=$IFS
+    IFS=$' '
+    local field=(`echo $line`)
+    local snapid=${field[0]}
+    local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid
+    local image_snap=$snapset_output_dir/@$snapid
+    gather_hobject_snap $pool_id $image_prefix $snapid 
+    local res=$?
+    if [ $res -ne 0 ];then
+      touch $image_snap
+    else 
+      mv $image_hobjects_stable_snap $image_snap
+    fi
+    IFS=$OOIFS
+  done
+  IFS=$OIFS
+}
+
+# lookup image metadata and get snapid hobjects
+function discover_image_snap()
+{
+  local func="discover_image_snap"
+  echo "$func ..."
+  if [ $# -lt 3 ];then
+    echo "$func: paramters: <pool_id> <image_name> [<snap_name>]"
+    exit
+  fi
+  local pool_id=$1
+  local image_name=$2
+  local snap_name=$3
+  pool_id=$(($pool_id))
+  #mkdir -p $images/$image_prefix
+  lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id
+  if [ "$db_snap_id"x = ""x ];then
+    echo "$func: lookup image failed to gen snapid"
+    exit
+  fi
+  local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix
+  local image_nosnap=$images/pool_$pool_id/$image_name_in 
+  #check if image nosnap recovered
+  if [ ! -s $image_nosnap ];then
+    echo "$func: please recover image nosnap before recover with snap"
+    rm -rf $image_hobjects_dir_prefix
+    exit
+  fi
+  local image_hobject_dir=$images/pool_$pool_id/$image_name_in
+  local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id
+  local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head
+  local image_snaplist=$image_hobject_dir/@snaplist
+  local image_snapset_dir=$image_hobject_dir/@snapset_dir
+  local image_head=$image_hobject_dir/$image_name_in
+  if [ ! -e $image_hobject_dir ];then
+    mkdir -p $image_hobject_dir
+  fi
+  # only gen snapset one time
+  if [ ! -e $image_snapset_dir ];then
+    mkdir -p $image_snapset_dir
+    gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir  
+   
+  fi
+
+  echo "$func: will get object clone ..."
+  >$image_snap_hobject
+  >$image_snap_hobject_head
+
+  trap 'echo $func failed; exit;' INT HUP
+  # get each offset 's snapid hobject
+  while read line
+  do
+    #echo $line
+    OOIFS=$IFS
+    IFS=$' '
+    local field=(`echo $line`)
+    local offset_string=${field[3]}
+    IFS=$OOIFS
+    local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir` 
+    if [ "$entry"x != ""x ];then
+      echo $entry >> $image_snap_hobject
+      echo `dump_backslash $line` >> $image_snap_hobject_head
+    fi
+  done < $image_head
+  rm -rf $image_hobjects_dir_prefix
+}
+
+# after discover_image_nosnap
+# collect objects from osds one by one in sequence
+function copy_image_nosnap_single_thread()
+{
+  local func="copy_image_nosnap_single_thread"
+  echo "$func ..."
+  if [ $# -lt 3 ];then
+    echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>"
+    exit
+  fi
+  local pool_id=$1
+  local image_hobjects=$2
+  local backup_dir=$3
+  pool_id=$(($pool_id))
+
+  # make sure lookup_image first
+  if [ $found = 0 ];then
+    echo "$func: image not found, maybe forget to discover_image"
+    exit
+  fi
+  if [ ! -e $backup_dir ];then
+    mkdir -p $backup_dir
+  fi
+
+  local image_dir=$backup_dir/pool_$pool_id/$image_name_in
+  local image_file=$image_dir/$image_name_in
+  local CURRENT=$image_dir/@CURRENT
+  local LOCK=$image_dir/@LOCK
+  if [ ! -e $image_dir ];then
+    mkdir -p $image_dir
+  fi
+  if [ -e $LOCK ];then
+    echo "$func: $LOCK is locked by other process"
+    exit
+  else
+    touch $LOCK
+  fi
+
+  >$image_file
+  truncate -s $db_image_size $image_file 
+  echo "head">$CURRENT
+
+  local count=$(($db_image_size >> $db_order))
+  local start=`cat $image_hobjects|head -n 1|awk '{print $4}'`
+  local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'`
+  local entry_count=`cat $image_hobjects|wc -l`
+
+  local char_bits=$((`echo $start|wc -c` -1 ))
+  local format="%0"$char_bits"x"
+  
+  local expect_start=`printf $format 0`
+  local expect_end=`printf $format $(($count -1 ))`  
+
+  echo -e "object_count\t$entry_count"
+  echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count"
+  echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+  local icount=0
+  local istart=
+  local iend=
+  local percent=
+  
+  trap 'echo $func failed; exit;' INT HUP
+  local unit=$((1<<$db_order))
+  while read line
+  do
+  {
+    icount=$(($icount+1))
+    node=`echo $line|awk '{print $1}'` 
+    hobject=`echo $line|awk '{print $3}'` 
+    offset=`echo $line|awk '{print $4}'`
+    off=$((16#$offset))
+    if [ $icount = 1 ];then
+      istart=$offset
+    fi
+    hobject=`dump_backslash $hobject`
+    iend=$offset
+    sshcmd="cat $hobject"
+    ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null
+    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+    tput sc  #record current cursor
+    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+    if [ $icount != $entry_count ];then
+      tput rc # backport most recent cursor
+    fi
+  }
+  done < $image_hobjects
+
+  echo
+  echo -n "size: "
+  ls -lh $image_file|awk '{print  $5"\t"$9}'
+  echo -n "du:   "
+  du -h $image_file
+  #unlock
+  rm -f $LOCK
+}
+
+
+# ssh copy snap_object & head_object from osd to admin node
+# copy all snapshot objects 
+# and 
+# all head objects which has the same offset with snapshot objects 
+function collect_image_snap_objects()
+{
+  local func="collect_image_snap_objects"
+  #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects
+  if [ $# -lt 6 ];then
+    echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>"
+    exit
+  fi  
+
+  local pool_id=$1
+  local image_name=$2
+  local snap_id=$3
+  local snap_hobjects=$4 #snap hobjects info
+  local head_hobjects=$5 #head hobjects info
+  local backup_dir=$6
+  pool_id=$(($pool_id))
+
+  local head_dir=$backup_dir/pool_$pool_id/$image_name/@head
+  local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id
+  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+  if [ ! -e $head_dir ];then
+    mkdir -p $head_dir
+  fi
+  if [ ! -e $snap_dir ];then
+    mkdir -p $snap_dir
+  fi
+
+  local snap_node= #osd node
+  local snap_hobject= #hobject path with snapid on osd
+  local snap_offset=
+  local snap_filename=
+
+  local head_node=
+  local head_hobject=
+  local head_offset=
+  local head_filename=
+
+  # ignore if there is no object in snapshot(empty )
+  if [ ! -s $snap_hobjects ];then
+    echo "$func: $snap_hobjects is empty"
+    return 0
+  fi
+  local start=`head -n 1 $snap_hobjects|awk '{print $4}'`
+  local end=`tail -n 1 $snap_hobjects|awk '{print $4}'`
+  local entry_count=`cat $snap_hobjects|wc -l`
+  if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then
+    echo "$func: $snap_hobjects not sorted"
+    return 1
+  fi
+
+  # just assert if ignored empty snapshot
+  if [ "$start"x = ""x ] || [ "$end"x = ""x ];then
+    return 1
+  fi
+  # speed up copy snapshot
+  # lookup the coresponding head hobject of snap hobject
+  # use command: grep <offset> <head hobjects>
+  # 
+  # eg.
+  # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614)
+  # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869
+  # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867
+  # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869
+  # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869
+  # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867
+  # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867
+  # ......
+  # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869
+  #
+  # snap hobjects: (3 objects, snapid >= 29)
+  # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867
+  # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869
+  # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869
+  #
+  # so find out offset in head hobjects line number:
+  # snap hobjects: 000000000001 ---> head hobjects: 2 (n1)
+  # snap hobjects: 000000000003 ---> head hobjects: 4 (n2)
+  # 
+  # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2]
+  # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking
+
+  # get the line number of the start offset in head hobjects
+  local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1`
+  # get the line number of the end offset in head hobjects
+  local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1`
+  local icount=0
+  local istart=
+  local iend=
+  local percent=
+
+  OIFS=$IFS
+  IFS=$'\n'
+
+  #assume file:snap_hobjects is not very large, and can be load into memory
+  local snap_arr=(`cat $snap_hobjects`)
+  local snap_tmp=/tmp/snaptmp.$$$$
+
+  # snap_tmp: 
+  # consists of snap hobject or head hobject
+  # select lineno range: [n1 ~ n2]
+  head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp 
+
+  echo "copy image snap/head objects from osd ..."
+  echo -e "object_count\t$entry_count"
+  echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+  trap 'echo $func failed; exit;' INT HUP
+  for line in ${snap_arr[*]}
+  do
+    icount=$(($icount+1))    
+
+    OOIFS=$IFS
+    IFS=$' '
+
+    local arr=(`echo $line`)
+    snap_node=${arr[0]}
+    snap_hobject=${arr[2]}
+    snap_offset=${arr[3]}
+    snap_filename=$snap_dir/$snap_offset
+
+    if [ $icount = 1 ];then
+      istart=$snap_offset
+    fi
+    iend=$snap_offset
+
+    #lookup corresponding head hobject of snap hobject
+    local res=`grep $snap_offset $snap_tmp|head -n 1` 
+    if [ "$res"x = ""x ];then
+      echo "$func: image object[ $snap_offset ] missing"
+      exit
+    fi
+    
+    local arr2=(`echo $res`)
+    head_node=${arr2[0]}
+    head_hobject=${arr2[2]}
+    head_offset=${arr2[3]}
+    head_filename=$head_dir/$head_offset
+
+    # just copy object(snap/head) if it not exits
+    if [ ! -e $snap_filename ];then
+      ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename 
+    fi
+    if [ ! -e $head_filename ];then
+      ssh $ssh_option $head_node "cat $head_hobject" > $head_filename 
+    fi
+    IFS=$OOIFS
+
+    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+    tput sc  #record current cursor
+    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+    if [ $icount != $entry_count ];then
+      tput rc # backport most recent cursor
+    fi
+  done
+  echo
+  IFS=$OIFS 
+  rm -f $snap_tmp
+  return 0
+}
+
+# copy all snap objects and corresponding head objects from osds
+# in single process
+function copy_image_snap_single_thread()
+{
+  local func="copy_image_snap_single_thread"
+  if [ $# -lt 6 ];then
+    echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" 
+    exit
+  fi
+  local pool_id=$1
+  local image_name=$2
+  local snap_id=$3
+  local snap_hobjects=$4
+  local head_hobjects=$5
+  local backup_dir=$6
+  pool_id=$(($pool_id))
+
+  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+  local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
+  #lock
+  if [ -e $LOCK ];then
+    echo "$func: $LOCK is locked by other process"
+    exit
+  else
+    touch $LOCK
+  fi
+  collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir
+  #unlock
+  rm -f $LOCK
+}
+
+# after all snap objects and neccessary head objects are copied,
+# just pick appropriate head objects and snap objects and write them to image
+# in order to rollback image to snapshot
+#
+# init: image is created by copy_image_nosnap_single_thread firstly
+#
+# all output include 3 parts:
+# <image>  <head objects>      <snap objects>
+# 
+#          head objects1  ---  snap1 objects
+#          head objects2  ---  snap2 objects
+#  image   head objects3  ---  snap3 objects
+#          ......
+#          head objectsN  ---  snapN objects
+#
+# how to rollback:
+# firstly rollback to head, secondly write <snapX objects>
+# head  = <image> + <head objects>
+# snap1 = <image> + <head objects> + <snap1 objects>
+# snap2 = <image> + <head objects> + <snap2 objects>
+# snap3 = <image> + <head objects> + <snap3 objects>
+# ......
+# snapN = <image> + <head objects> + <snapN objects>
+# 
+# improve rollback:
+# there is intersection of head objects and snapX objects, if snapX objects are not empty
+# and need to deduplicate the intersection.
+# dedunplicate steps:
+# - get difference set of head objects and snapX objects
+# - write the difference set objects to image
+# - write the snapX objects to image
+function rollback_image_snap()
+{
+  local func="rollback_image_snap"
+  
+  echo "$func ..."
+  
+  trap 'echo $func failed; exit;' INT HUP
+  if [ $# -lt 6 ];then
+    echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>"
+    exit
+  fi
+  local pool_id=$1
+  local image_name=$2
+  local snap_id=$3
+  local snap_object_dir=$4
+  local backup_dir=$5
+  local image_unit=$6
+
+  local need_diff_set=0
+
+  local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name
+  local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head
+  local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT
+  local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK
+  if [ -e $LOCK ];then
+    echo "$func: $LOCK is locked by other process"
+    exit
+  else
+    touch $LOCK
+  fi
+  if [ $snap_id -ne -2 ];then
+    echo $snap_id > $CURRENT
+  else
+    echo "head" > $CURRENT
+  fi 
+
+  if [ ! -e $snap_object_dir ];then
+    return 0
+  fi
+
+  if [ "$snap_object_dir"x != "$head_object_dir"x ];then
+    echo "$func: need to compute diff_set of head"
+    need_diff_set=1
+  else
+    echo "$func: NO diff_set"
+    need_diff_set=0
+  fi
+
+  local entry_count=0
+  local start=
+  local end=
+  local offset=
+  local icount=0
+  local istart=
+  local iend=
+  local percent=
+
+  local snap_objects=
+  local head_objects=
+  local diff_set=
+
+  snap_objects=(`ls $snap_object_dir`)
+
+  # if need to compute difference set of head_objects and snap_objects
+  if [ $need_diff_set -ne 0 ];then
+    head_objects=(`ls $head_object_dir`) 
+
+    #get the difference set: ( head_objects - snap_objects )
+    diff_set=(`
+    sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \
+       <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`) 
+
+    # copy diff_set of head object to image
+    pushd $head_object_dir >/dev/null
+
+    echo "$func: copy diff_set head objects ..."
+    entry_count=${#diff_set[@]}  
+    start=${diff_set[0]}
+    end=
+    if [ $entry_count -gt 0 ];then
+      end=${diff_set[$(($entry_count - 1))]}
+    fi
+    offset=
+    icount=0
+    istart=
+    iend=
+    percent=
+
+    echo -e "object_count\t$entry_count"
+    echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+    for object in ${diff_set[@]}
+    do
+      icount=$(($icount+1))
+      if [ $icount = 1 ];then
+        istart=$object
+      fi
+      iend=$object
+
+      local offset=$((16#$object))
+      dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
+
+      percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+      tput sc  #record current cursor
+      echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+      if [ $icount != $entry_count ];then
+        tput rc # backport most recent cursor
+      fi
+    done
+    if [ $entry_count -gt 0 ];then
+      echo
+    fi
+    popd >/dev/null
+
+    if [ $snap_id -ne -2 ];then
+      echo -e "$image_name already rollback diff_set: (head - snap)"
+    fi
+  fi
+  
+  # copy snap object to image
+  pushd $snap_object_dir >/dev/null 
+
+  if [ $need_diff_set -ne 0 ];then
+    echo "$func: copy snap objects ..."
+  else
+    echo "$func: copy head objects ..."
+  fi
+  entry_count=${#snap_objects[@]}  
+  start=${snap_objects[0]}
+  end=
+  if [ $entry_count -gt 0 ];then
+    end=${snap_objects[$(($entry_count - 1))]}
+  fi
+  offset=
+  icount=0
+  istart=
+  iend=
+  percent=
+
+  echo -e "object_count\t$entry_count"
+  echo -e "range\t\t[$start ~ $end] count:$entry_count"
+
+  for object in ${snap_objects[@]}
+  do
+    icount=$(($icount+1))
+    if [ $icount = 1 ];then
+      istart=$object
+    fi
+    iend=$object
+
+    local offset=$((16#$object))
+    dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null
+
+    percent=`echo "scale=3; 100*$icount/$entry_count"|bc`
+    tput sc  #record current cursor
+    echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%"
+    if [ $icount != $entry_count ];then
+      tput rc # backport most recent cursor
+    fi
+  done
+  if [ $entry_count -gt 0 ];then
+    echo
+  fi
+  popd >/dev/null
+
+  rm -f $LOCK
+  if [ $snap_id -ne -2 ];then
+    echo "$image_name rollback to snapid: $snap_id"
+  else
+    echo "$image_name rollback to head"
+  fi
+}
+
+function recover_image()
+{
+  local func="recover_image"
+  echo "$func ..."
+  
+  if [ $# -lt 3 ];then
+    echo "$func: paramters: <pool_id> <image_name> <snap_name> [<backup_dir>]"
+    exit
+  fi
+
+  local pool_id=$1
+  local img_name=$2
+  local snap_name=$3
+  local backup_dir=$4
+  pool_id=$(($pool_id))
+  if [ "$snap_name"x = "@"x ];then
+    snap_name=
+  fi
+  if [ "$backup_dir"x = ""x ];then
+    backup_dir=$default_backup_dir
+  fi
+
+  #recover image with nosnap
+  if [ "$snap_name"x = ""x ];then
+    discover_image_nosnap $pool_id $img_name #input image_name 
+    local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in
+    copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir
+
+  #recover image with snap
+  else
+
+    # check if recovered head already
+    local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name
+    local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name
+    if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then
+      echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first"
+      exit
+    fi
+
+    # rollback to head
+    if [ "$snap_name"x = "@@"x ];then
+      local head_dir=$backup_dir/pool_$pool_id/$img_name/@head
+      if [ -e $head_dir ];then
+       local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'`
+        # rollback to head
+        rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit
+        echo "$image_name_in head : $backup_dir/$img_name/$img_name"
+      else
+       echo "$func: no need to rollback to head"
+      fi
+      return 0
+    fi
+    
+    # rollback to snap
+    discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object
+    local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id
+    local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head
+    local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id
+    local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in
+    local image_unit=$((1<<$db_order))
+    copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir
+    rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit 
+    echo "$image_name_in@$snap_name : $image_path"
+  fi
+}
diff --git a/src/tools/rbd_recover_tool/epoch_h b/src/tools/rbd_recover_tool/epoch_h
new file mode 100644 (file)
index 0000000..4e50026
--- /dev/null
@@ -0,0 +1,80 @@
+#!/bin/bash
+# file: epoch_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+. $my_dir/common_h
+
+#pgid_list=$single_node/$cluster-$id/pgid_list
+function get_pgid_list()
+{
+  find $osd_data/current/ -type d -name "*_head"|\
+       sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\
+       sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list;
+}
+
+function get_pgid()
+{
+  hobject_path=$1
+  echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p'
+}
+
+infos_seq=
+function get_infos_seq()
+{
+  local func="get_infos_seq"
+  
+  local keyword=":infos." 
+  local infos_key=`get_map_header_key $keyword`
+
+  if [ "$infos_key"x = ""x ];then
+    echo "$func: keyword not input or infos_key not exisits"
+    exit 
+  fi
+  local prefix=`get_map_header_prefix`
+  local key=$infos_key
+
+  infos_seq=`get_header_seq $prefix $key`
+  if [ "$infos_seq"x = ""x ];then
+    echo "$func: infos_seq not exists"
+    exit
+  fi
+}
+
+pg_epoch=
+function get_pg_epoch()
+{
+  local func="get_pg_epoch"
+  if [ "$1"x = ""x ];then
+    echo "$func: no pgid input"
+    exit
+  fi
+  local pgid=$1
+  local key=$pgid"_epoch"
+
+  #get_infos_seq;
+  # infos_seq default to 1
+  infos_seq=1
+  local infos_seq=`printf "%016d" $infos_seq`
+  local prefix="_USER_"$infos_seq"_USER_"
+
+  pg_epoch=`get_header_kv $prefix $key int`
+  if [ "$pg_epoch"x = ""x ];then
+    echo "$func: $key not exisits"
+    exit
+  fi
+}
diff --git a/src/tools/rbd_recover_tool/metadata_h b/src/tools/rbd_recover_tool/metadata_h
new file mode 100644 (file)
index 0000000..0296962
--- /dev/null
@@ -0,0 +1,368 @@
+#!/bin/bash
+# file: metadata_h
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+. $my_dir/common_h
+. $my_dir/epoch_h
+
+# put origin name in $image_name_in: for output
+# put convert "_" name in $image_name: for grep image hobjects from database
+image_name_in=
+image_name=
+function input_image()
+{
+  local func="input_image"
+  if [ "$1"x = ""x ];then
+    echo "$func: no image name input"
+    exit
+  fi
+
+  image_name_in=$1
+  # "_" -> "\u"
+  image_name=`convert_underline $image_name_in`
+}
+
+#======================================== distinguish v1 or v2 ===================================  
+#image_list_v1=$single_node/$cluster-$id/image_list_v1
+#image_list_v2=$single_node/$cluster-$id/image_list_v2
+function get_image_list()
+{
+  find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1
+  find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2
+}
+
+function get_image_format_by_hobject()
+{
+  local func="get_image_format"
+  if [ "$1"x = ""x ];then
+    exit
+  fi
+  local res1=`cat $image_list_v1|grep $1`  
+  if [ "$res1"x != ""x ];then
+    echo 1
+    exit
+  fi  
+  local res2=`cat $image_list_v2|grep  $1`
+  if [ "$res2"x = ""x ];then
+    echo 2
+    exit
+  fi
+}
+
+#======================================== image format v1 ========================================  
+# <image_name>.rbd inlude 3 parts:
+# header + snap_count*snapshot + snap_count*snap_name
+# 
+# struct rbd_obj_header_ondisk {
+# 40   char text[40];
+# 24   char block_name[RBD_MAX_BLOCK_NAME_SIZE];
+# 4    char signature[4];
+# 8    char version[8];
+#      struct {
+# 1            __u8 order;
+# 1            __u8 crypt_type;
+# 1            __u8 comp_type;
+# 1            __u8 unused;
+#      } __attribute__((packed)) options;
+# 8    __le64 image_size;//hexdump -C s=80 n=8
+# 8    __le64 snap_seq;  //hexdump -C s=88 n=8
+# 4    __le32 snap_count;//hexdump -C s=96 n=4
+# 4    __le32 reserved;
+# 8    __le64 snap_names_len;//hexdump -C s=104 n=8
+#      struct rbd_obj_snap_ondisk snaps[0];
+# } __attribute__((packed));
+#
+# sizeof(rbd_obj_header_ondisk): 112
+#
+# struct rbd_obj_snap_ondisk {
+# 8    __le64 id;    //hexdump -C s=112+i*16 n=8   , i=[0, snap_count)
+# 8    __le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count)
+# } __attribute__((packed));
+# sizeof(rbd_obj_snap_ondisk): 16
+#
+# get snap_names form <image_nane>.rbd
+# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd
+# then split snap_names into array
+
+function get_image_metadata_v1()
+{
+  local func="get_image_metadata_v1"
+  if [ "$1"x = ""x ];then
+    echo "$func: no image head object input"
+    exit
+  fi
+  local snap_name=
+  if [ "$2"x != ""x ];then
+    snap_name=$2 
+  fi
+    
+  if [ ! -e $1 ];then
+    echo "$func: $1 not exists"
+    exit
+  fi
+  local hobject_path=$1  
+  d_hobject_path=`dump_backslash $1`
+  local image_format=`get_image_format_by_hobject $d_hobject_path`
+  if [ $image_format != 1 ];then
+    echo "$func: image_format must be 1"
+    exit
+  fi
+  if [ ! -e $hobject_path ];then
+    echo "$func: $hobject_path not exists"
+    exit
+  fi
+
+  # decode rbd_obj_header_ondisk of <image_name>.rbd
+  local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path`
+  local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path`
+  local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'`
+  image_size=$((16#$image_size))
+  local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1|
+       awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'`
+  local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1|
+       awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` 
+  local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1|
+       awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` 
+
+  echo -e "block_name:\t$block_name"
+  echo -e "order:\t\t$order"
+  echo -e "image_size:\t$image_size"
+  echo -e "snap_seq:\t$snap_seq"
+
+  # decode N rbd_obj_snap_ondisk of <image_name>.rbd
+  declare -a snap_ids
+  declare -a snap_names
+  declare -a snap_image_sizes
+  local size_header=112 #sizeof(rbd_obj_header_ondisk)
+  local size_snap=16 #sizeof(rbd_obj_snap_ondisk)
+  local offset=0
+  local id_off=0
+  local size_off=0
+  for ((i=0; i<$snap_count; i++))
+  do
+    offset=$(($size_header + $i * $size_snap)) 
+    id_off=$offset 
+    size_off=$(($offset + 8))
+    snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1|
+       awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
+    snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1|
+       awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'`
+  done
+  offset=$(($size_header + $snap_count * $size_snap))
+  snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path|
+       awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`); 
+
+  echo -e "\t\tID\tNAME\t\tSIZE"
+  for ((i=0; i<$snap_count; i++))
+  do
+    if [ "$snap_name"x = ""x ];then
+      echo -n -e "snapshot:\t"
+      echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
+      continue
+    fi
+    if [ "$snap_name"x = "${snap_names[$i]}"x ];then 
+      echo -n -e "snapshot:\t"
+      echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}"
+      return
+    fi
+  done
+}
+
+#======================================== end image format v1 ========================================  
+
+#======================================== image format v2 ======================================== 
+
+# map_header, header_seq, header, key/value
+# eg. 
+# map_header           _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6
+# meta_header_seq      17426
+# header:              _USER_0000000000017426_USER_:object_prefix
+#                      _USER_0000000000017426_USER_:order
+#                      _USER_0000000000017426_USER_:size
+#                      _USER_0000000000017426_USER_:snap_seq
+# key/value            ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq)
+
+# decode image id from image_id_hobject 
+function get_image_id()
+{
+  local func="get_image_id"
+  if [ "$1"x = ""x ];then
+    exit;
+  fi
+  local image_id_hobject=$1 #from admin node's database
+
+  if [ ! -e $image_id_hobject ];then
+    #echo "$func: $image_id_hobject not exists"
+    exit;
+  fi
+  
+  # get len of string  
+  local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject`
+  # get string
+  hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject
+}
+
+#find image_id omap entry in omaplist
+map_header_prefix=
+map_header_key=
+function get_map_header()
+{
+  local func="get_map_header"
+  local image_id=$1
+  if [ "$image_id"x = ""x ];then
+    echo "$func: no image_id input"
+    exit;
+  fi
+  map_header_prefix=`get_map_header_prefix`
+  local keyword="header%e"$image_id
+  map_header_key=`get_map_header_key $keyword`
+  if [ "$map_header_key"x = ""x ];then
+    echo "$func: map_header_key is NULL(not in omaplist)"
+    exit
+  fi
+}
+
+#get meta header seq from map_header
+meta_header_seq=
+function get_meta_header_seq() 
+{
+  local func="get_meta_header_seq"
+  if [ "$1"x == ""x ];then
+    echo "$func: no prefix input"
+    exit;
+  elif [ "$2"x == ""x ];then
+    echo "$func: no key input"
+    exit;
+  fi
+  local prefix=$1;
+  local key=$2;
+  meta_header_seq=`get_header_seq $prefix $key`
+}
+
+# get image metadata : object_prefix, order, image_size, snap_seq
+object_prefix=
+order=
+image_size=
+snap_seq=
+function get_image_metadata_v2()
+{
+  local func="get_image_metadata_v2"
+  if [ "$1"x = ""x ];then
+    echo "$func: no meta_header_seq input"
+    exit;
+  fi
+  local meta_header_seq=`printf "%016d" $1`
+  #echo "$func: meta_header_seq = "$meta_header_seq
+  local ghobject_key="_USER_"$meta_header_seq"_USER_"
+  local prefix=$ghobject_key
+
+  object_prefix=`get_header_kv $prefix object_prefix string`
+  #object_prefix="rbd_data.$image_id"
+  order=`get_header_kv $prefix order int`
+  image_size=`get_header_kv $prefix size int` 
+  snap_seq=`get_header_kv $prefix snap_seq int`
+
+  echo -e "object_prefix:\t$object_prefix"
+  echo -e "order:\t\t$order"
+  echo -e "image_size:\t$image_size"
+  echo -e "snap_seq:\t$snap_seq"
+
+  # list snapshot
+  list_snaps_v2 $1 $2
+}
+
+# struct cls_rbd_snap {
+#      snapid_t id;
+#      string name;
+#      uint64_t image_size;
+#      uint64_t features;
+#      uint8_t protection_status;
+#      cls_rbd_parent parent;
+# }
+# decode cls_rbd_snap
+# 1    u8      struct_v
+# 1    u8      struct_compat
+# 4    u32     struct_len
+# 8    u64     snapid_t id //s=6 n=8
+# 4    u32     len of name //s=14 n=4
+# len  char    name        //s=18 n=len
+# 8    u64     image_size
+# 8    u64     features
+# ......
+#
+function list_snaps_v2()
+{
+  local func="list_snaps_v2"
+  if [ "$1"x = ""x ];then
+    exit
+  fi
+  local sname=
+  if [ $# -eq 2 ];then
+    sname=$2
+  fi
+  local meta_header_seq=`printf "%016d" $1`
+  local prefix="_USER_"$meta_header_seq"_USER_"
+  local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_"); 
+       print arr[2];}' $omap_list|sort -r`)
+  echo -e "\t\tID\tNAME\t\tSIZE"
+  for key in ${keys[@]}
+  do
+    key="snapshot_$key"
+    local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`);
+    # get snap_name
+    tmp=
+    for ((i=17; i>13; i--))
+    do
+      tmp="$tmp${arr[$i]}"
+    done
+    local len=$((16#$tmp))
+    local snap_name=
+    for ((i=18; i<$((18+$len)); i++))
+    do
+      # convert ascii to char
+      local char=`echo -e "\x${arr[$i]}"` 
+      snap_name="$snap_name$char"
+    done
+    # get snap_id (little endian)
+    local tmp=
+    for ((i=13; i>5; i--))
+    do
+      tmp="$tmp${arr[$i]}" 
+    done
+    local snap_id=$((16#$tmp))
+    # get image_size of current snap (little endian)
+    tmp=
+    for ((i=$((25+$len)); i>$((17+$len)); i--))
+    do
+      tmp="$tmp${arr[$i]}"
+    done
+    local image_size=$((16#$tmp))
+    if [ "$sname"x = ""x ];then
+      echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" 
+      continue
+    fi
+    if [ "$sname"x = "$snap_name"x ];then
+      echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" 
+      return
+    fi
+  done
+}
+
+#======================================== end image format v2 ========================================  
diff --git a/src/tools/rbd_recover_tool/osd_job b/src/tools/rbd_recover_tool/osd_job
new file mode 100755 (executable)
index 0000000..f3e2ff3
--- /dev/null
@@ -0,0 +1,170 @@
+#!/bin/bash
+# file: osd_job
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+
+function check_ceph_osd()
+{
+  local func="check_ceph_osd"
+  local host=`hostname`
+  # if ceph-osd service is still running, except flush-journal
+  if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then
+    echo "[$host]: $func: ceph-osd is running..., stop it"
+    exit 
+  fi
+}
+
+function cat_pg_epoch()
+{
+  local func="cat_pg_epoch" 
+  init_env_osd $1
+  if [ -e $node_pg_epoch ];then
+    cat $node_pg_epoch
+  fi
+} 
+
+function cat_image_v1()
+{
+  local func="cat_image_v1" 
+  init_env_osd $1
+  if [ -e $image_v1 ];then
+    cat $image_v1
+  fi
+} 
+
+function cat_image_v2()
+{
+  local func="cat_image_v2" 
+  init_env_osd $1
+  if [ -e $image_v2 ];then
+    cat $image_v2
+  fi
+} 
+
+function flush_osd_journal()
+{
+  local func="flush_osd_journal"
+  init_env_osd $1
+  local osd_data_path=$osd_data
+  local osd_journal_path=$osd_data/journal 
+  local whoami_path=$osd_data/whoami
+  local host=`hostname`
+  if [ ! -e $whoami_path ];then
+    echo "[$host]: $func: $whoami_path not exists"
+    exit
+  fi
+  local whoami=`cat $whoami_path`
+  echo "[$host]: $func ..."
+  ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null
+  if [ $? -ne 0 ];then
+    echo "[$host]: $func: flush osd journal failed"
+    exit
+  fi
+}
+
+function do_omap_list()
+{
+  local func="do_omap_list"
+  init_env_osd $1
+  local host=`hostname`
+  echo "[$host]: $func ..."
+  get_omap_list
+}
+
+# get all pgs epoch 
+function do_pg_epoch()
+{
+  local func="do_pg_epoch"
+  init_env_osd $1
+  local node=`hostname`
+  get_pgid_list
+  >$node_pg_epoch
+  local pgid=
+  local data_path=
+  local host=`hostname`
+  echo "[$host]: $func ..."
+  while read line
+  do
+  {
+    pgid=`echo $line|awk '{print $1}'`
+    data_path=`echo $line|awk '{print $2}'`
+    get_pg_epoch $pgid
+    echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch
+  } 
+  done < $pgid_list
+}
+
+# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish
+function do_image_list()
+{
+  local func="do_image_list"
+  init_env_osd $1
+  get_image_list   
+  local node=`hostname`
+  >$image_v1
+  >$image_v2
+  local host=`hostname`
+  echo "[$host]: $func ..."
+  for line in `cat $image_list_v1`
+  do
+    pgid=`get_pgid $line`
+    get_pg_epoch $pgid
+    echo "$node $line $pg_epoch" >> $image_v1
+  done
+  for line in `cat $image_list_v2`
+  do
+    pgid=`get_pgid $line`
+    get_pg_epoch $pgid
+    echo "$node $line $pg_epoch" >> $image_v2
+  done
+}
+
+function do_image_id()
+{
+  local func="do_image_id"
+  init_env_osd $1
+  get_image_id $2
+}
+
+function do_image_metadata_v1()
+{
+  local func="do_image_metadata_v1"
+  init_env_osd $1
+  local image_header_hobject=$2
+  local snap_name=$3
+  get_image_metadata_v1 $image_header_hobject $snap_name
+}
+
+function do_image_metadata_v2()
+{
+  local func="do_image_metadata_v2"
+  init_env_osd $1
+  local image_id=$2
+  local image_header_hobject=$3
+  local snap_name=$4
+  get_map_header $image_id 
+  get_meta_header_seq $map_header_prefix $map_header_key
+  get_image_metadata_v2 $meta_header_seq $snap_name
+}
+
+check_ceph_osd
+$*
diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool
new file mode 100755 (executable)
index 0000000..b24992d
--- /dev/null
@@ -0,0 +1,327 @@
+#!/bin/bash
+# file: rbd-recover-tool
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
+# when ceph cluster is stopped.
+# it is a simple disater recovery policy, just for urgent condition
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+. $my_dir/database_h
+
+#scp files from admin node to osd node
+file1=common_h
+file2=metadata_h
+file3=epoch_h
+file4=osd_job
+
+#------------ admin node's action -------------
+
+function scp_file()
+{
+  local func="scp_file"
+  file=$1
+  if [ "$1"x = ""x ];then
+    echo "$func: not file input"
+    exit
+  fi
+  for host in `cat $osd_host`
+  do
+  {
+    echo "$func: $host"
+    scp $ssh_option $file $host:$job_path  1>/dev/null
+  } &
+  done
+}
+
+function scp_files()
+{
+  local func="scp_files"
+  for host in `cat $osd_host`
+  do
+  {
+    echo "$func: $host"
+    scp $ssh_option $file1 $host:$job_path
+    scp $ssh_option $file2 $host:$job_path
+    scp $ssh_option $file3 $host:$job_path
+    scp $ssh_option $file4 $host:$job_path
+  } &
+  done
+  wait
+  echo "$func: finish"
+}
+
+function scatter_node_jobs()
+{
+  local func="scatter_node_jobs"
+  local host=
+  local data_path=
+  echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
+
+  trap 'echo $func failed; exit' INT HUP
+  while read line
+  do
+  {
+    host=`echo $line|awk '{print $1}'`
+    data_path=`echo $line|awk '{print $2}'`
+    check_osd_process $host
+
+    cmd="mkdir -p $job_path"
+    ssh $ssh_option $host $cmd
+    scp $ssh_option $file1 $host:$job_path  >/dev/null
+    scp $ssh_option $file2 $host:$job_path  >/dev/null
+    scp $ssh_option $file3 $host:$job_path  >/dev/null
+    scp $ssh_option $file4 $host:$job_path  >/dev/null
+
+    cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
+    cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
+    cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
+    cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
+
+    ssh $ssh_option $host $cmd </dev/null
+  } &
+  done < $osd_host_path
+  wait
+  echo "$func: finish"
+}
+
+function gather_node_infos()
+{
+  local func="gather_node_infos"
+  echo "$func ..."
+  >$pg_coll
+  >$image_coll_v1
+  >$image_coll_v2
+  trap 'echo $func failed; exit' INT HUP
+  while read line
+  do
+  {
+    host=`echo $line|awk '{print $1}'`
+    data_path=`echo $line|awk '{print $2}'`
+    echo "$func: $host"
+    check_osd_process $host
+
+    #pg epoch
+    cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
+    ssh $ssh_option $host $cmd1 >> $pg_coll
+    #image v1
+    cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
+    ssh $ssh_option $host $cmd2 >> $image_coll_v1
+    #image v2
+    cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
+    ssh $ssh_option $host $cmd3 >> $image_coll_v2
+  } &
+  done < $osd_host_path
+  wait
+  echo "$func: finish"
+}
+
+function scatter_gather()
+{
+  local func="scatter_gather"
+  if [ ! -s $osd_host ];then
+    echo "$func: no osd_host input"
+    exit
+  fi
+  if [ ! -s $mon_host ];then
+    echo "$func: no mon_host input"
+    exit
+  fi
+  scatter_node_jobs
+  gather_node_infos
+}
+
+
+#------------- operations --------------
+
+function database()
+{
+  scatter_gather
+  gen_database
+}
+
+function list()
+{
+  list_images
+}
+
+function lookup()
+{
+  lookup_image $1 $2 $3
+}
+
+function recover()
+{
+  recover_image $1 $2 $3 $4
+}
+
+#------------- helper -------------
+
+function usage()
+{
+  local cmd_name="rbd-recover-tool"
+  echo 
+  echo "$cmd_name is used to recover rbd image of replicated pool, 
+       when all ceph services are stopped"
+  echo "Usage:"
+  echo "$cmd_name database
+                       gather pg info, object info, image metadata, 
+                       and epoch info from all osd nodes,
+                       this will cosume a long time, just be patient, 
+                       especially when scale up to 1000+ osds"
+  echo "$cmd_name list
+                       list all rbd images of all replicated pools, 
+                       before to lookup & recover"
+  echo "$cmd_name lookup  <pool_id>/<image_name>[@[<snap_name>]]
+                       show image metadata: image format, rbd id, size, order, snapseq
+                       In addtion, for image with snapshots, 
+                       this will list all snapshot infomations"
+  echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
+                       all snapshots share one image head, to economize disk space
+                       so there is only one snapshot at any time,
+                       image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
+                       cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
+                       will show snapid
+                       recover to raw image/nosnap/head: <image_name>
+                       rollback to image head:           <image_name>@
+                       rollback to image snap:           <image_name>@<snap_name>
+                       recover steps:
+                       1. recover image nosnap (only one time)
+                       2. rollback to image snap"
+}
+
+function get_path()
+{
+  local func="get_path"
+  if [ $# -lt 1 ];then
+    return
+  fi
+  if [[ $1 =~ // ]];then
+    return # "/path//to" is invalid
+  fi
+  local parent=`dirname $1`
+  local name=`basename $1`
+  if [ "$parent"x = "/"x ];then
+    echo "$parent$name"
+  else
+    echo -n "$parent/$name"
+  fi
+}
+
+function admin_cmd()
+{
+  local func="admin_cmd"
+  if [ $# -lt 1 ];then
+    usage
+    exit
+  fi
+  if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
+    usage
+    exit
+  fi
+  
+  if [ "$1"x = "database"x ];then
+    if [ $# -gt 1 ];then
+      usage
+      exit
+    fi
+    # remove osd_host to refresh osd_host and osd_host_mapping
+    rm -f $osd_host
+    init_env_admin
+    database
+  elif [ "$1"x = "list"x ];then
+    if [ $# -gt 1 ];then
+      usage
+      exit
+    fi
+    init_env_admin
+    list
+  elif [ "$1"x = "lookup"x ];then
+    if [ $# -gt 2 ];then
+      usage
+      exit
+    fi
+    local pool_id=-1
+    local image_name=
+    local snap_name=
+    if [[ $2 =~  ^([^@/]+)/([^@/]+)$ ]];then
+      pool_id="${BASH_REMATCH[1]}"
+      image_name="${BASH_REMATCH[2]}"
+    elif [[ $2 =~  ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+      pool_id="${BASH_REMATCH[1]}"
+      image_name="${BASH_REMATCH[2]}"
+      snap_name="${BASH_REMATCH[3]}"
+    else
+      echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+      exit
+    fi
+    init_env_admin
+    lookup $pool_id $image_name $snap_name
+  elif [ "$1"x = "recover"x ];then
+    if [ $# -lt 2 ] || [ $# -gt 3 ];then
+      usage
+      exit
+    fi
+    local pool_id=-1
+    local image_name=
+    local snap_name=@
+    local image_dir=
+    if [[ $2 =~  ^([^@/]+)/([^@/]+)$ ]];then
+      pool_id="${BASH_REMATCH[1]}"
+      image_name="${BASH_REMATCH[2]}"
+    elif [[ $2 =~  ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+      pool_id="${BASH_REMATCH[1]}"
+      image_name="${BASH_REMATCH[2]}"
+      snap_name="${BASH_REMATCH[3]}"
+      if [ "$snap_name"x = ""x ];then
+        snap_name=@@
+      fi
+    else
+      echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+      exit
+    fi
+    if [ $# = 3 ];then
+      image_dir=`get_path $3`
+      if [ "image_dir"x = ""x ];then
+        echo "$3 invalid"
+        exit
+      fi
+    fi
+    init_env_admin
+    recover $pool_id $image_name $snap_name $image_dir
+  elif [ "$1"x = "scp_files"x ];then
+    if [ $# -gt 1 ];then
+      exit
+    fi
+    admin_parse_osd
+    scp_files
+  elif [ "$1"x = "scp_file"x ];then
+    if [ $# -gt 2 ];then
+      exit
+    fi
+    admin_parse_osd
+    scp_file $2
+  else
+    echo "$func: $1: command not found"
+  fi
+}
+
+admin_cmd $*
diff --git a/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh
new file mode 100755 (executable)
index 0000000..80b0a8c
--- /dev/null
@@ -0,0 +1,542 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+# unit test case for rbd-recover-tool
+
+#prepare:
+# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds
+#step 1. rbd export all images as you need
+#step 2. stop all ceph services
+#step 3. use ceph_rbd_recover_tool to recover all images
+#step 4. compare md5sum of recover image with that of export image who has the same image name
+
+ssh_opt="-o ConnectTimeout=1"
+my_dir=$(dirname "$0")
+tool_dir=$my_dir
+
+#storage_path=$my_dir/config/storage_path
+mon_host=$my_dir/config/mon_host
+osd_host=$my_dir/config/osd_host
+mds_host=$my_dir/config/mds_host
+
+test_dir= # `cat $storage_path`
+export_dir= #$test_dir/export
+recover_dir= #$test_dir/recover
+image_names= #$test_dir/image_names
+online_images= #$test_dir/online_images, all images on ceph rbd pool
+gen_db= #$test_dir/gen_db, label database if exist
+pool=rbd
+pool_id=2
+
+function get_pool_id()
+{
+  local pool_id_file=/tmp/pool_id_file.$$$$
+  ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file
+  if [ $? -ne 0 ];then
+    echo "$func: get pool id failed: pool = $pool"
+    rm -f $pool_id_file
+    exit
+  fi
+  pool_id=`cat $pool_id_file`
+  echo "$func: pool_id = $pool_id"
+  rm -f $pool_id_file
+}
+
+function init()
+{
+  local func="init"
+  if [ $# -eq 0 ];then
+    echo "$func: must input <path> to storage images, enough disk space is good"
+    exit
+  fi
+  if [ ! -s $osd_host ];then
+    echo "$func: config/osd_host not exists or empty"
+    exit
+  fi
+  if [ ! -s $mon_host ];then
+    echo "$func: config/mon_host not exists or empty"
+    exit
+  fi
+  if [ ! -e $mds_host ];then
+    echo "$func: config/mds_host not exists"
+    exit
+  fi
+  test_dir=$1
+  export_dir=$test_dir/export
+  recover_dir=$test_dir/recover
+  image_names=$test_dir/image_names
+  online_images=$test_dir/online_images
+  gen_db=$test_dir/gen_db
+
+  trap 'echo "ceph cluster is stopped ..."; exit;' INT
+  ceph -s >/dev/null
+  get_pool_id
+
+  mkdir -p $test_dir
+  mkdir -p $export_dir
+  mkdir -p $recover_dir
+  rm -rf $export_dir/*
+  rm -rf $recover_dir/*
+}
+
+function do_gen_database()
+{
+  local func="do_gen_database"
+  if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then
+    echo "$func: database already existed"
+    exit
+  fi
+  bash $tool_dir/rbd-recover-tool database
+  echo 1 >$gen_db 
+}
+
+#check if all ceph processes are stopped
+function check_ceph_service()
+{
+  local func="check_ceph_service"
+  local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"`
+  if [ "$res"x != ""x ];then
+    echo "$func: NOT all ceph services are stopped"
+    return 1
+    exit
+  fi
+  echo "$func: all ceph services are stopped"
+  return 0
+}
+
+function stop_ceph()
+{
+  local func="stop_ceph"
+  #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd" 
+  while read osd
+  do
+  {
+    osd=`echo $osd|tr -d [:blank:]`
+    if [ "$osd"x = ""x ];then
+      continue
+    fi
+    #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null
+    ssh $ssh_opt $osd "killall ceph-osd" </dev/null
+  } &
+  done < $osd_host
+  wait
+  echo "waiting kill all osd ..."
+  sleep 1
+  #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds" 
+  cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon" 
+  #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd" 
+  cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds" 
+}
+
+function create_image()
+{
+  local func="create_image"
+  if [ ${#} -lt 3 ];then
+    echo "create_image: parameters: <image_name> <size> <image_format>"
+    exit
+  fi
+  local image_name=$1
+  local size=$2
+  local image_format=$3
+  if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then
+    echo "$func: image_format must be 1 or 2"
+    exit
+  fi
+  local res=`rbd list|grep -E "^$1$"` 
+  echo "$func $image_name ..."
+  if [ "$res"x = ""x ];then
+    rbd -p $pool create $image_name --size $size --image_format $image_format
+  else
+    if [ $image_format -eq 2 ];then
+      rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@%
+    fi
+    rbd snap purge $image_name
+    #rbd rm $image_name
+    rbd -p $pool resize --allow-shrink --size $size $image_name
+  fi
+}
+
+function export_image()
+{
+  local func="export_image"
+
+  if [ $# -lt 2 ];then
+    echo "$func: parameters: <image_name> <image_format> [<image_size>]"
+    exit
+  fi
+
+  local image_name=$1
+  local format=$(($2)) 
+  local size=$(($3)) #MB
+  
+  if [ $format -ne 1 ] && [ $format -ne 2 ];then
+    echo "$func: image format must be 1 or 2"
+    exit
+  fi
+
+  if [ $size -eq 0 ];then
+    size=24 #MB
+    echo "$func: size = $size"
+  fi
+  local mnt=/rbdfuse 
+
+  mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
+  if [ $? -ne 0 ];then
+    rbd-fuse $mnt
+  fi
+    
+  create_image $image_name $size $format
+  dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4))
+  
+  local export_image_dir=$export_dir/pool_$pool_id/$image_name
+  mkdir -p $export_image_dir
+  local export_md5_nosnap=$export_image_dir/@md5_nosnap
+  >$export_md5_nosnap
+  local export_image_path=$export_image_dir/$image_name
+  rm -f $export_image_path
+
+  rbd export $pool/$image_name $export_image_path
+  md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap 
+}
+
+function recover_image()
+{
+  local func="recover_snapshots"
+  if [ $# -lt 1 ];then
+    echo "$func: parameters: <image_name>"
+    exit
+  fi
+
+  local image_name=$1
+  #pool_id=29
+
+  local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
+  mkdir -p $recover_image_dir
+  local recover_md5_nosnap=$recover_image_dir/@md5_nosnap
+  >$recover_md5_nosnap
+  local snapshot=
+  
+  bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
+  md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap
+}
+
+function make_snapshot()
+{
+  local func="make_snapshot"
+  if [ $# -lt 5 ];then
+    echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>"
+    exit
+  fi
+  local ofile=$1
+  local seek=$(($2))
+  local count=$(($3))
+  local snap=$4
+  local export_image_dir=$5
+
+  if [ $seek -lt 0 ];then
+    echo "$func: seek can not be minus"
+    exit
+  fi
+
+  if [ $count -lt 1 ];then
+    echo "$func: count must great than zero"
+    exit
+  fi
+
+  echo "[$snap] $func ..."
+  echo "$1 $2 $3 $4"
+  rbd snap ls $image_name|grep $snap;
+  
+  local res=$?
+  if [ $res -eq 0 ];then
+    return $res
+  fi
+
+  dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null
+  snapshot=$image_name@$snap 
+  rbd snap create $snapshot
+  rm -f $export_image_dir/$snapshot
+  rbd export $pool/$image_name $export_image_dir/$snapshot
+  pushd $export_image_dir >/dev/null
+  md5sum $snapshot >> @md5
+  popd >/dev/null
+}
+
+function recover_snapshots()
+{
+  local func="recover_snapshots"
+  if [ $# -lt 1 ];then
+    echo "$func: parameters: <image_name>"
+    exit
+  fi
+
+  local image_name=$1
+  #pool_id=29
+
+  local recover_image_dir=$recover_dir/pool_$pool_id/$image_name
+  mkdir -p $recover_image_dir
+  local recover_md5=$recover_image_dir/@md5
+  >$recover_md5
+  local snapshot=
+
+  
+  # recover head
+  bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir
+
+  # recover snapshots
+  for((i=1; i<10; i++))
+  do
+    snapshot=snap$i
+    bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir
+    pushd $recover_image_dir >/dev/null
+    local chksum=`md5sum $image_name|awk '{print $1}'` 
+    echo "$chksum  $image_name@$snapshot" >>@md5
+    popd >/dev/null
+  done
+}
+
+function export_snapshots()
+{
+  local func="export_snapshots"
+
+  if [ $# -lt 2 ];then
+    echo "$func: parameters: <image_name> <image_format> [<image_size>]"
+    exit
+  fi
+
+  local image_name=$1
+  local format=$(($2)) 
+  local size=$(($3)) #MB
+  
+  if [ $format -ne 1 ] && [ $format -ne 2 ];then
+    echo "$func: image format must be 1 or 2"
+    exit
+  fi
+
+  if [ $size -eq 0 ];then
+    size=24 #MB
+    echo "$func: size = $size"
+  fi
+  local mnt=/rbdfuse 
+
+  mount |grep "rbd-fuse on /rbdfuse" &>/dev/null
+  if [ $? -ne 0 ];then
+    rbd-fuse $mnt
+  fi
+    
+  create_image $image_name $size $format
+  
+  local export_image_dir=$export_dir/pool_$pool_id/$image_name
+  mkdir -p $export_image_dir
+  local export_md5=$export_image_dir/@md5
+  >$export_md5
+
+  # create 9 snapshots
+  # image = {object0, object1, object2, object3, object4, object5, ...}
+  #
+  # snap1 : init/write all objects 
+  # snap2 : write object0
+  # snap3 : write object1
+  # snap4 : write object2
+  # snap5 : write object3
+  # snap6 : write object4
+  # snap7 : write object5
+  # snap8 : write object0
+  # snap9 : write object3
+
+  make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir
+  make_snapshot $mnt/$image_name 0  1    snap2 $export_image_dir
+  make_snapshot $mnt/$image_name 4  1    snap3 $export_image_dir
+  make_snapshot $mnt/$image_name 8  1    snap4 $export_image_dir
+  make_snapshot $mnt/$image_name 12 1    snap5 $export_image_dir
+  make_snapshot $mnt/$image_name 16 1    snap6 $export_image_dir
+  make_snapshot $mnt/$image_name 20 1    snap7 $export_image_dir
+  make_snapshot $mnt/$image_name 1  1    snap8 $export_image_dir
+  make_snapshot $mnt/$image_name 13 1    snap9 $export_image_dir
+}
+
+function check_recover_nosnap()
+{
+  local func="check_recover_nosnap"
+  if [ $# -lt 3 ];then
+    echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
+  fi
+  local export_md5=$1
+  local recover_md5=$2
+  local image_name=$3
+
+  local ifpassed="FAILED"
+  echo "================ < $image_name nosnap > ================" 
+
+  local export_md5sum=`cat $export_md5` 
+  local recover_md5sum=`cat $recover_md5` 
+
+  if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then
+    ifpassed="PASSED"
+  fi
+  echo "export:  $export_md5sum"
+  echo "recover: $recover_md5sum $ifpassed"
+}
+
+function check_recover_snapshots()
+{
+  local func="check_recover_snapshots"
+  if [ $# -lt 3 ];then
+    echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>"
+  fi
+  local export_md5=$1
+  local recover_md5=$2
+  local image_name=$3
+
+  local ifpassed="FAILED"
+  echo "================ < $image_name snapshots > ================" 
+
+  OIFS=$IFS
+  IFS=$'\n'
+  local export_md5s=(`cat $export_md5`)
+  local recover_md5s=(`cat $recover_md5`)
+  for((i=0; i<9; i++))
+  do
+    OOIFS=$IFS
+    IFS=$'  '
+    local x=$(($i+1))
+    snapshot=snap$x
+
+    local export_arr=(`echo ${export_md5s[$i]}`)
+    local recover_arr=(`echo ${recover_md5s[$i]}`)
+    echo "export:  ${export_md5s[$i]}"
+    if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then
+      ifpassed="PASSED"
+    fi
+    echo "recover: ${recover_md5s[$i]} $ifpassed"
+    IFS=$OOIFS
+  done
+  IFS=$OIFS
+}
+
+# step 1: export image, snapshot
+function do_export_nosnap()
+{
+  export_image image_v1_nosnap 1
+  export_image image_v2_nosnap 2
+}
+
+function do_export_snap()
+{
+  export_snapshots  image_v1_snap 1
+  export_snapshots  image_v2_snap 2
+}
+
+# step 2: stop ceph cluster and gen database
+function stop_cluster_gen_database()
+{
+  trap 'echo stop ceph cluster failed; exit;' INT HUP
+  stop_ceph 
+  sleep 2
+  check_ceph_service
+  local res=$?
+  while [ $res -ne 0 ]
+  do
+    stop_ceph
+    sleep 2
+    check_ceph_service
+    res=$?
+  done
+
+  echo 0 >$gen_db
+  do_gen_database
+}
+
+# step 3: recover image,snapshot
+function do_recover_nosnap()
+{
+  recover_image image_v1_nosnap
+  recover_image image_v2_nosnap
+}
+
+function do_recover_snap()
+{
+  recover_snapshots image_v1_snap
+  recover_snapshots image_v2_snap
+}
+
+# step 4: check md5sum pair<export_md5sum, recover_md5sum>
+function do_check_recover_nosnap()
+{
+  local image1=image_v1_nosnap
+  local image2=image_v2_nosnap
+
+  local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap
+  local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap
+  local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap
+  local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap
+
+  check_recover_nosnap $export_md5_1 $recover_md5_1 $image1 
+  check_recover_nosnap $export_md5_2 $recover_md5_2 $image2
+}
+
+function do_check_recover_snap()
+{
+  local image1=image_v1_snap
+  local image2=image_v2_snap
+
+  local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5
+  local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5
+  local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5
+  local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5
+
+  check_recover_snapshots $export_md5_1 $recover_md5_1 $image1 
+  check_recover_snapshots $export_md5_2 $recover_md5_2 $image2
+}
+
+function test_case_1()
+{
+  do_export_nosnap
+  stop_cluster_gen_database
+  do_recover_nosnap
+  do_check_recover_nosnap
+}
+
+function test_case_2()
+{
+  do_export_snap
+  stop_cluster_gen_database
+  do_recover_snap
+  do_check_recover_snap
+}
+
+function test_case_3()
+{
+  do_export_nosnap
+  do_export_snap
+
+  stop_cluster_gen_database
+
+  do_recover_nosnap
+  do_recover_snap
+
+  do_check_recover_nosnap
+  do_check_recover_snap
+}
+
+
+init $*
+test_case_3