--- /dev/null
+#!/bin/bash
+# file: osd_job
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+
+function check_ceph_osd()
+{
+ local func="check_ceph_osd"
+ local host=`hostname`
+ # if ceph-osd service is still running, except flush-journal
+ if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then
+ echo "[$host]: $func: ceph-osd is running..., stop it"
+ exit
+ fi
+}
+
+function cat_pg_epoch()
+{
+ local func="cat_pg_epoch"
+ init_env_osd $1
+ if [ -e $node_pg_epoch ];then
+ cat $node_pg_epoch
+ fi
+}
+
+function cat_image_v1()
+{
+ local func="cat_image_v1"
+ init_env_osd $1
+ if [ -e $image_v1 ];then
+ cat $image_v1
+ fi
+}
+
+function cat_image_v2()
+{
+ local func="cat_image_v2"
+ init_env_osd $1
+ if [ -e $image_v2 ];then
+ cat $image_v2
+ fi
+}
+
+function flush_osd_journal()
+{
+ local func="flush_osd_journal"
+ init_env_osd $1
+ local osd_data_path=$osd_data
+ local osd_journal_path=$osd_data/journal
+ local whoami_path=$osd_data/whoami
+ local host=`hostname`
+ if [ ! -e $whoami_path ];then
+ echo "[$host]: $func: $whoami_path not exists"
+ exit
+ fi
+ local whoami=`cat $whoami_path`
+ echo "[$host]: $func ..."
+ ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null
+ if [ $? -ne 0 ];then
+ echo "[$host]: $func: flush osd journal failed"
+ exit
+ fi
+}
+
+function do_omap_list()
+{
+ local func="do_omap_list"
+ init_env_osd $1
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ get_omap_list
+}
+
+# get all pgs epoch
+function do_pg_epoch()
+{
+ local func="do_pg_epoch"
+ init_env_osd $1
+ local node=`hostname`
+ get_pgid_list
+ >$node_pg_epoch
+ local pgid=
+ local data_path=
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ while read line
+ do
+ {
+ pgid=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ get_pg_epoch $pgid
+ echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch
+ }
+ done < $pgid_list
+}
+
+# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish
+function do_image_list()
+{
+ local func="do_image_list"
+ init_env_osd $1
+ get_image_list
+ local node=`hostname`
+ >$image_v1
+ >$image_v2
+ local host=`hostname`
+ echo "[$host]: $func ..."
+ for line in `cat $image_list_v1`
+ do
+ pgid=`get_pgid $line`
+ get_pg_epoch $pgid
+ echo "$node $line $pg_epoch" >> $image_v1
+ done
+ for line in `cat $image_list_v2`
+ do
+ pgid=`get_pgid $line`
+ get_pg_epoch $pgid
+ echo "$node $line $pg_epoch" >> $image_v2
+ done
+}
+
+function do_image_id()
+{
+ local func="do_image_id"
+ init_env_osd $1
+ get_image_id $2
+}
+
+function do_image_metadata_v1()
+{
+ local func="do_image_metadata_v1"
+ init_env_osd $1
+ local image_header_hobject=$2
+ local snap_name=$3
+ get_image_metadata_v1 $image_header_hobject $snap_name
+}
+
+function do_image_metadata_v2()
+{
+ local func="do_image_metadata_v2"
+ init_env_osd $1
+ local image_id=$2
+ local image_header_hobject=$3
+ local snap_name=$4
+ get_map_header $image_id
+ get_meta_header_seq $map_header_prefix $map_header_key
+ get_image_metadata_v2 $meta_header_seq $snap_name
+}
+
+check_ceph_osd
+$*
--- /dev/null
+#!/bin/bash
+# file: rbd-recover-tool
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
+# when ceph cluster is stopped.
+# it is a simple disater recovery policy, just for urgent condition
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+. $my_dir/database_h
+
+#scp files from admin node to osd node
+file1=common_h
+file2=metadata_h
+file3=epoch_h
+file4=osd_job
+
+#------------ admin node's action -------------
+
+function scp_file()
+{
+ local func="scp_file"
+ file=$1
+ if [ "$1"x = ""x ];then
+ echo "$func: not file input"
+ exit
+ fi
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file $host:$job_path 1>/dev/null
+ } &
+ done
+}
+
+function scp_files()
+{
+ local func="scp_files"
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file1 $host:$job_path
+ scp $ssh_option $file2 $host:$job_path
+ scp $ssh_option $file3 $host:$job_path
+ scp $ssh_option $file4 $host:$job_path
+ } &
+ done
+ wait
+ echo "$func: finish"
+}
+
+function scatter_node_jobs()
+{
+ local func="scatter_node_jobs"
+ local host=
+ local data_path=
+ echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
+
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ check_osd_process $host
+
+ cmd="mkdir -p $job_path"
+ ssh $ssh_option $host $cmd
+ scp $ssh_option $file1 $host:$job_path >/dev/null
+ scp $ssh_option $file2 $host:$job_path >/dev/null
+ scp $ssh_option $file3 $host:$job_path >/dev/null
+ scp $ssh_option $file4 $host:$job_path >/dev/null
+
+ cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
+ cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
+
+ ssh $ssh_option $host $cmd </dev/null
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function gather_node_infos()
+{
+ local func="gather_node_infos"
+ echo "$func ..."
+ >$pg_coll
+ >$image_coll_v1
+ >$image_coll_v2
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ echo "$func: $host"
+ check_osd_process $host
+
+ #pg epoch
+ cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
+ ssh $ssh_option $host $cmd1 >> $pg_coll
+ #image v1
+ cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
+ ssh $ssh_option $host $cmd2 >> $image_coll_v1
+ #image v2
+ cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
+ ssh $ssh_option $host $cmd3 >> $image_coll_v2
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function scatter_gather()
+{
+ local func="scatter_gather"
+ if [ ! -s $osd_host ];then
+ echo "$func: no osd_host input"
+ exit
+ fi
+ if [ ! -s $mon_host ];then
+ echo "$func: no mon_host input"
+ exit
+ fi
+ scatter_node_jobs
+ gather_node_infos
+}
+
+
+#------------- operations --------------
+
+function database()
+{
+ scatter_gather
+ gen_database
+}
+
+function list()
+{
+ list_images
+}
+
+function lookup()
+{
+ lookup_image $1 $2 $3
+}
+
+function recover()
+{
+ recover_image $1 $2 $3 $4
+}
+
+#------------- helper -------------
+
+function usage()
+{
+ local cmd_name="rbd-recover-tool"
+ echo
+ echo "$cmd_name is used to recover rbd image of replicated pool,
+ when all ceph services are stopped"
+ echo "Usage:"
+ echo "$cmd_name database
+ gather pg info, object info, image metadata,
+ and epoch info from all osd nodes,
+ this will cosume a long time, just be patient,
+ especially when scale up to 1000+ osds"
+ echo "$cmd_name list
+ list all rbd images of all replicated pools,
+ before to lookup & recover"
+ echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]]
+ show image metadata: image format, rbd id, size, order, snapseq
+ In addtion, for image with snapshots,
+ this will list all snapshot infomations"
+ echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
+ all snapshots share one image head, to economize disk space
+ so there is only one snapshot at any time,
+ image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
+ cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
+ will show snapid
+ recover to raw image/nosnap/head: <image_name>
+ rollback to image head: <image_name>@
+ rollback to image snap: <image_name>@<snap_name>
+ recover steps:
+ 1. recover image nosnap (only one time)
+ 2. rollback to image snap"
+}
+
+function get_path()
+{
+ local func="get_path"
+ if [ $# -lt 1 ];then
+ return
+ fi
+ if [[ $1 =~ // ]];then
+ return # "/path//to" is invalid
+ fi
+ local parent=`dirname $1`
+ local name=`basename $1`
+ if [ "$parent"x = "/"x ];then
+ echo "$parent$name"
+ else
+ echo -n "$parent/$name"
+ fi
+}
+
+function admin_cmd()
+{
+ local func="admin_cmd"
+ if [ $# -lt 1 ];then
+ usage
+ exit
+ fi
+ if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
+ usage
+ exit
+ fi
+
+ if [ "$1"x = "database"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ # remove osd_host to refresh osd_host and osd_host_mapping
+ rm -f $osd_host
+ init_env_admin
+ database
+ elif [ "$1"x = "list"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ init_env_admin
+ list
+ elif [ "$1"x = "lookup"x ];then
+ if [ $# -gt 2 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ init_env_admin
+ lookup $pool_id $image_name $snap_name
+ elif [ "$1"x = "recover"x ];then
+ if [ $# -lt 2 ] || [ $# -gt 3 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=@
+ local image_dir=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ if [ "$snap_name"x = ""x ];then
+ snap_name=@@
+ fi
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ if [ $# = 3 ];then
+ image_dir=`get_path $3`
+ if [ "image_dir"x = ""x ];then
+ echo "$3 invalid"
+ exit
+ fi
+ fi
+ init_env_admin
+ recover $pool_id $image_name $snap_name $image_dir
+ elif [ "$1"x = "scp_files"x ];then
+ if [ $# -gt 1 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_files
+ elif [ "$1"x = "scp_file"x ];then
+ if [ $# -gt 2 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_file $2
+ else
+ echo "$func: $1: command not found"
+ fi
+}
+
+admin_cmd $*