From 556f06e8d1de1c46fe02c511b35b54a0009eb2f7 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Thu, 7 Apr 2011 15:16:15 -0700 Subject: [PATCH] obsync: escape obj names between S3 and local FS Signed-off-by: Colin McCabe --- src/obsync/obsync.py | 70 ++++++++++++++++++++++++++++++++++++--- src/obsync/test-obsync.py | 12 +++++++ 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/src/obsync/obsync.py b/src/obsync/obsync.py index bee84039d1283..56a13f3aa6f4c 100755 --- a/src/obsync/obsync.py +++ b/src/obsync/obsync.py @@ -26,6 +26,7 @@ import errno import hashlib import mimetypes import os +import re import shutil import string import sys @@ -34,6 +35,12 @@ import traceback global opts +class LocalFileIsAcl(Exception): + pass + +class InvalidLocalName(Exception): + pass + ###### Helper functions ####### def mkdir_p(path): try: @@ -80,6 +87,54 @@ def getenv(a, b): else: return None +# Escaping functions. +# +# Valid names for local files are a little different than valid object +# names for S3. So these functions are needed to translate. +# +# Basically, in local names, every sequence starting with a dollar sign is +# reserved as a special escape sequence. If you want to create an S3 object +# with a dollar sign in the name, the local file should have a double dollar +# sign ($$). +# +# TODO: translate local files' control characters into escape sequences. +# Most S3 clients (boto included) cannot handle control characters in S3 object +# names. +# TODO: check for invalid utf-8 in local file names. Ideally, escape it, but +# if not, just reject the local file name. S3 object names must be valid +# utf-8. +# +# ---------- ----------- +# In S3 Locally +# ---------- ----------- +# foo/ foo$slash +# +# $money $$money +# +# obj-with-acl obj-with-acl +# .obj-with-acl$acl +def s3_name_to_local_name(s3_name): + s3_name = re.sub(r'\$', "$$", s3_name) + if (s3_name[-1:] == "/"): + s3_name = s3_name[:-1] + "$slash" + return s3_name + +def local_name_to_s3_name(local_name): + if local_name.find(r'$acl') != -1: + raise LocalFileIsAcl() + local_name = re.sub(r'\$slash', "/", local_name) + mre = re.compile("[$][^$]") + if mre.match(local_name): + raise InvalidLocalName("Local name contains a dollar sign escape \ +sequence we don't understand.") + local_name = re.sub(r'\$\$', "$", local_name) + return local_name + +def get_local_acl_file_name(local_name): + if local_name.find(r'\$acl') != 0: + raise LocalFileIsAcl() + return "." + local_name + "$acl" + ###### NonexistentStore ####### class NonexistentStore(Exception): pass @@ -98,6 +153,8 @@ class Object(object): if (self.size != rhs.size): return False return True + def local_name(self): + return s3_name_to_local_name(self.name) @staticmethod def from_file(obj_name, path): f = open(path) @@ -247,9 +304,14 @@ class FileStoreIterator(object): continue path = self.path + "/" + self.files[0] self.files = self.files[1:] - obj_name = path[len(self.base)+1:] + # Ignore non-files when iterating. if (not os.path.isfile(path)): continue + try: + obj_name = local_name_to_s3_name(path[len(self.base)+1:]) + except LocalFileIsAcl as e: + # ignore ACL side files when iterating + continue return Object.from_file(obj_name, path) class FileStoreLocalCopy(object): @@ -274,11 +336,11 @@ class FileStore(Store): def __str__(self): return "file://" + self.base def make_local_copy(self, obj): - return FileStoreLocalCopy(self.base + "/" + obj.name) + return FileStoreLocalCopy(self.base + "/" + obj.local_name()) def all_objects(self): return FileStoreIterator(self.base) def locate_object(self, obj): - path = self.base + "/" + obj.name + path = self.base + "/" + obj.local_name() found = os.path.isfile(path) if (opts.more_verbose): if (found): @@ -294,7 +356,7 @@ class FileStore(Store): if (opts.dry_run): return s = local_copy.path - d = self.base + "/" + obj.name + d = self.base + "/" + obj.local_name() #print "s='" + s +"', d='" + d + "'" mkdir_p(os.path.dirname(d)) shutil.copy(s, d) diff --git a/src/obsync/test-obsync.py b/src/obsync/test-obsync.py index 91eb6daab45e4..8f013af3684ee 100755 --- a/src/obsync/test-obsync.py +++ b/src/obsync/test-obsync.py @@ -265,6 +265,18 @@ if (len(opts.buckets) >= 1): if (opts.verbose): print "successfully copied a directory with --follow-symlinks" + # test escaping + os.mkdir("%s/escape_dir1" % tdir) + f = open("%s/escape_dir1/$$foo" % tdir, 'w') + f.write("$foo") + f.close() + f = open("%s/escape_dir1/blarg$slash" % tdir, 'w') + f.write("blarg/") + f.close() + obsync_check("file://%s/escape_dir1" % tdir, opts.buckets[0], ["-d"]) + obsync_check(opts.buckets[0], "file://%s/escape_dir2" % tdir, ["-c"]) + compare_directories("%s/escape_dir1" % tdir, "%s/escape_dir2" % tdir) + if (len(opts.buckets) >= 2): if (opts.verbose): print "copying dir1 to bucket0..." -- 2.39.5