]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
obsync: escape obj names between S3 and local FS
authorColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Thu, 7 Apr 2011 22:16:15 +0000 (15:16 -0700)
committerColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Thu, 7 Apr 2011 23:09:59 +0000 (16:09 -0700)
Signed-off-by: Colin McCabe <colin.mccabe@dreamhost.com>
src/obsync/obsync.py
src/obsync/test-obsync.py

index bee84039d1283984426edabf2d4e6189cf1d06d0..56a13f3aa6f4caff030e01042586449bec16da99 100755 (executable)
@@ -26,6 +26,7 @@ import errno
 import hashlib
 import mimetypes
 import os
+import re
 import shutil
 import string
 import sys
@@ -34,6 +35,12 @@ import traceback
 
 global opts
 
+class LocalFileIsAcl(Exception):
+    pass
+
+class InvalidLocalName(Exception):
+    pass
+
 ###### Helper functions #######
 def mkdir_p(path):
     try:
@@ -80,6 +87,54 @@ def getenv(a, b):
     else:
         return None
 
+# Escaping functions.
+#
+# Valid names for local files are a little different than valid object
+# names for S3. So these functions are needed to translate.
+#
+# Basically, in local names, every sequence starting with a dollar sign is
+# reserved as a special escape sequence. If you want to create an S3 object
+# with a dollar sign in the name, the local file should have a double dollar
+# sign ($$).
+#
+# TODO: translate local files' control characters into escape sequences.
+# Most S3 clients (boto included) cannot handle control characters in S3 object
+# names.
+# TODO: check for invalid utf-8 in local file names. Ideally, escape it, but
+# if not, just reject the local file name. S3 object names must be valid
+# utf-8.
+#
+# ----------           -----------
+# In S3                                Locally
+# ----------           -----------
+# foo/                         foo$slash
+#
+# $money                       $$money
+#
+# obj-with-acl         obj-with-acl
+#                                      .obj-with-acl$acl
+def s3_name_to_local_name(s3_name):
+    s3_name = re.sub(r'\$', "$$", s3_name)
+    if (s3_name[-1:] == "/"):
+        s3_name = s3_name[:-1] + "$slash"
+    return s3_name
+
+def local_name_to_s3_name(local_name):
+    if local_name.find(r'$acl') != -1:
+        raise LocalFileIsAcl()
+    local_name = re.sub(r'\$slash', "/", local_name)
+    mre = re.compile("[$][^$]")
+    if mre.match(local_name):
+        raise InvalidLocalName("Local name contains a dollar sign escape \
+sequence we don't understand.")
+    local_name = re.sub(r'\$\$', "$", local_name)
+    return local_name
+
+def get_local_acl_file_name(local_name):
+    if local_name.find(r'\$acl') != 0:
+        raise LocalFileIsAcl()
+    return "." + local_name + "$acl"
+
 ###### NonexistentStore #######
 class NonexistentStore(Exception):
     pass
@@ -98,6 +153,8 @@ class Object(object):
         if (self.size != rhs.size):
             return False
         return True
+    def local_name(self):
+        return s3_name_to_local_name(self.name)
     @staticmethod
     def from_file(obj_name, path):
         f = open(path)
@@ -247,9 +304,14 @@ class FileStoreIterator(object):
                 continue
             path = self.path + "/" + self.files[0]
             self.files = self.files[1:]
-            obj_name = path[len(self.base)+1:]
+            # Ignore non-files when iterating.
             if (not os.path.isfile(path)):
                 continue
+            try:
+                obj_name = local_name_to_s3_name(path[len(self.base)+1:])
+            except LocalFileIsAcl as e:
+                # ignore ACL side files when iterating
+                continue
             return Object.from_file(obj_name, path)
 
 class FileStoreLocalCopy(object):
@@ -274,11 +336,11 @@ class FileStore(Store):
     def __str__(self):
         return "file://" + self.base
     def make_local_copy(self, obj):
-        return FileStoreLocalCopy(self.base + "/" + obj.name)
+        return FileStoreLocalCopy(self.base + "/" + obj.local_name())
     def all_objects(self):
         return FileStoreIterator(self.base)
     def locate_object(self, obj):
-        path = self.base + "/" + obj.name
+        path = self.base + "/" + obj.local_name()
         found = os.path.isfile(path)
         if (opts.more_verbose):
             if (found):
@@ -294,7 +356,7 @@ class FileStore(Store):
         if (opts.dry_run):
             return
         s = local_copy.path
-        d = self.base + "/" + obj.name
+        d = self.base + "/" + obj.local_name()
         #print "s='" + s +"', d='" + d + "'"
         mkdir_p(os.path.dirname(d))
         shutil.copy(s, d)
index 91eb6daab45e4e9ac1a7c220f828fa6ed0d34d7b..8f013af3684eef4de651e1654b625e13abe72d1c 100755 (executable)
@@ -265,6 +265,18 @@ if (len(opts.buckets) >= 1):
     if (opts.verbose):
         print "successfully copied a directory with --follow-symlinks"
 
+    # test escaping
+    os.mkdir("%s/escape_dir1" % tdir)
+    f = open("%s/escape_dir1/$$foo" % tdir, 'w')
+    f.write("$foo")
+    f.close()
+    f = open("%s/escape_dir1/blarg$slash" % tdir, 'w')
+    f.write("blarg/")
+    f.close()
+    obsync_check("file://%s/escape_dir1" % tdir, opts.buckets[0], ["-d"])
+    obsync_check(opts.buckets[0], "file://%s/escape_dir2" % tdir, ["-c"])
+    compare_directories("%s/escape_dir1" % tdir, "%s/escape_dir2" % tdir)
+
 if (len(opts.buckets) >= 2):
     if (opts.verbose):
         print "copying dir1 to bucket0..."