From 256bd62d215f03554aaa4e183624b33ca734337c Mon Sep 17 00:00:00 2001 From: caleb miles Date: Thu, 27 Sep 2012 19:28:35 -0400 Subject: [PATCH] rgw: Implement POST object to publicly writeable buckets. The S3 API supports object upload using POST requests, for browser based uploads, the specification is described at: http://docs.amazonwebservices.com/AmazonS3/latest/API/RESTObjectPOST.html In response to feature request 771: http://tracker.newdream.net/issues/771 Signed-off-by: caleb miles Signed-off-by: Yehuda Sadeh --- src/rgw/rgw_op.cc | 112 ++++++++++++++++++- src/rgw/rgw_op.h | 47 ++++++++ src/rgw/rgw_rest.cc | 104 ++++++++++++++++++ src/rgw/rgw_rest.h | 18 ++++ src/rgw/rgw_rest_s3.cc | 237 ++++++++++++++++++++++++++++++++++++++++- src/rgw/rgw_rest_s3.h | 10 ++ 6 files changed, 525 insertions(+), 3 deletions(-) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index bf4de3f2ea842..6e1d3c7759b19 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -283,7 +283,7 @@ static int read_policy(RGWRados *store, struct req_state *s, RGWBucketInfo& buck * only_bucket: If true, reads the bucket ACL rather than the object ACL. * Returns: 0 on success, -ERR# otherwise. */ -static int build_policies(RGWRados *store, struct req_state *s, bool only_bucket, bool prefetch_data) +int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bucket, bool prefetch_data) { int ret = 0; string obj_str; @@ -866,6 +866,18 @@ int RGWPutObj::verify_permission() return 0; } +int RGWPostObj::verify_permission() +{ + // read in the data from the POST form + ret = get_params(); + if (ret < 0) + return -EINVAL; + + /* we'll handle permissions later in the process, as user needs to attach policy */ + + return 0; +} + class RGWPutObjProcessor_Plain : public RGWPutObjProcessor { bufferlist data; @@ -1276,6 +1288,102 @@ done: (ceph_clock_now(s->cct) - s->time)); } + +RGWPutObjProcessor *RGWPostObj::select_processor() +{ + RGWPutObjProcessor *processor; + + if (s->content_length <= RGW_MAX_CHUNK_SIZE) + processor = new RGWPutObjProcessor_Plain(); + else + processor = new RGWPutObjProcessor_Atomic(); + + return processor; +} + +void RGWPostObj::dispose_processor(RGWPutObjProcessor *processor) +{ + delete processor; +} + +void RGWPostObj::execute() +{ + RGWPutObjProcessor *processor = NULL; + char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE]; + MD5 hash; + bufferlist bl, aclbl; + map attrs; + int len; + + ret = verify_params(); + if (ret < 0) + goto done; + + if (!verify_bucket_permission(s, RGW_PERM_WRITE)) { + ret = -EACCES; + goto done; + } + + processor = select_processor(); + + ret = processor->prepare(store, s); + if (ret < 0) + goto done; + + while (data_pending) { + bufferlist data; + len = get_data(data); + + if (len < 0) { + ret = len; + goto done; + } + + if (!len) + break; + + void *handle; + const unsigned char *data_ptr = (const unsigned char *)data.c_str(); + + ret = processor->handle_data(data, ofs, &handle); + if (ret < 0) + goto done; + + hash.Update(data_ptr, len); + + ret = processor->throttle_data(handle); + if (ret < 0) + goto done; + + ofs += len; + } + + s->obj_size = ofs; + + hash.Final(m); + buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); + + policy.encode(aclbl); + etag = calc_md5; + + bl.append(etag.c_str(), etag.size() + 1); + attrs[RGW_ATTR_ETAG] = bl; + attrs[RGW_ATTR_ACL] = aclbl; + + if (form_param.count("Content-Type")) { + bl.clear(); + bl.append(form_param["Content-Type"].c_str(), form_param["Content-Type"].size() + 1); + attrs[RGW_ATTR_CONTENT_TYPE] = bl; + } + + ret = processor->complete(etag, attrs); + +done: + dispose_processor(processor); +} + + int RGWPutMetadata::verify_permission() { if (!verify_object_permission(s, RGW_PERM_WRITE)) @@ -2065,7 +2173,7 @@ int RGWHandler::init(RGWRados *_store, struct req_state *_s, RGWClientIO *cio) int RGWHandler::do_read_permissions(RGWOp *op, bool only_bucket) { - int ret = build_policies(store, s, only_bucket, op->prefetch_data()); + int ret = rgw_build_policies(store, s, only_bucket, op->prefetch_data()); if (ret < 0) { ldout(s->cct, 10) << "read_permissions on " << s->bucket << ":" <object_str << " only_bucket=" << only_bucket << " ret=" << ret << dendl; diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 463765a7a230b..3d4d8ed4e01eb 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -10,6 +10,7 @@ #define CEPH_RGW_OP_H #include +#include #include "rgw_common.h" #include "rgw_rados.h" @@ -22,6 +23,8 @@ struct req_state; class RGWHandler; void rgw_get_request_metadata(struct req_state *s, map& attrs); +int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bucket, bool prefetch_data); + /** * Provide the base class for all ops. @@ -302,6 +305,50 @@ public: virtual const char *name() { return "put_obj"; } }; +class RGWPostObj : public RGWOp { + + friend class RGWPutObjProcessor; + +protected: + int ret; + int len; + off_t ofs; + const char *supplied_md5_b64; + const char *supplied_etag; + string etag; + string boundary; + bool data_pending; + RGWAccessControlPolicy policy; + map form_param; + +public: + RGWPostObj() {} + + virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) { + RGWOp::init(store, s, h); + ret = 0; + len = 0; + ofs = 0; + supplied_md5_b64 = NULL; + supplied_etag = NULL; + etag = ""; + boundary = ""; + data_pending = false; + policy.set_ctx(s->cct); + } + + int verify_permission(); + void execute(); + + RGWPutObjProcessor *select_processor(); + void dispose_processor(RGWPutObjProcessor *processor); + + virtual int get_params() = 0; + virtual int get_data(bufferlist& bl) = 0; + virtual void send_response() = 0; + virtual const char *name() { return "post_obj"; } +}; + class RGWPutMetadata : public RGWOp { protected: int ret; diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc index 3612a9e67521d..91a532e962595 100644 --- a/src/rgw/rgw_rest.cc +++ b/src/rgw/rgw_rest.cc @@ -162,6 +162,55 @@ void dump_etag(struct req_state *s, const char *etag) } } +void dump_pair(struct req_state *s, const char *key, const char *value) +{ + if ( (strlen(key) > 0) && (strlen(value) > 0)) + s->cio->print("%s: %s\n", key, value); +} + +void dump_bucket_from_state(struct req_state *s) +{ + if (!s->bucket_name_str.empty()) + s->cio->print("Bucket: \"%s\"\n", s->bucket_name_str.c_str()); +} + +void dump_object_from_state(struct req_state *s) +{ + if (!s->object_str.empty()) + s->cio->print("Key: \"%s\"\n", s->object_str.c_str()); +} + +void dump_uri_from_state(struct req_state *s) +{ + if (strcmp(s->request_uri.c_str(), "/") == 0) { + + string location = "http://"; + location += s->env->get("SERVER_NAME"); + if (!location.empty()) { + location += "/"; + if (!s->bucket_name_str.empty()) { + location += s->bucket_name_str; + location += "/"; + if (!s->object_str.empty()) { + location += s->object_str; + s->cio->print("Location: %s\n", location.c_str()); + } + } + } + } + else { + s->cio->print("Location: \"%s\"\n", s->request_uri.c_str()); + } +} + +void dump_redirect(struct req_state *s, const char *url) +{ + if (strlen(url) > 0) { + dump_errno(s, 301); + s->cio->print("Location: %s\n", url); + } +} + void dump_last_modified(struct req_state *s, time_t t) { @@ -501,6 +550,61 @@ int RGWPutObj_ObjStore::get_data(bufferlist& bl) return len; } +int RGWPostObj_ObjStore::verify_params() +{ + /* check that we have enough memory to store the object + note that this test isn't exact and may fail unintentionally + for large requests is */ + if ((unsigned long long)content_length > RGW_MAX_PUT_SIZE) + return -ERR_TOO_LARGE; + + return 0; +} + +int RGWPostObj_ObjStore::get_data(bufferlist& bl) +{ + size_t cl = 0; + + // try and prevent a partial read of the boundary + if (content_length - ofs > RGW_MAX_CHUNK_SIZE) + cl = RGW_MAX_CHUNK_SIZE; + else if (content_length - ofs > 0) + cl = (content_length - ofs); + else + cl = RGW_MAX_CHUNK_SIZE; + + bufferptr bp(cl); + int r = s->cio->read(bp.c_str(), cl, &len); + if (r < 0) + return r; + + // resize our buffer pointer to avoid appending garbage + bp.set_length(len); + + /* if we are at the boundary there will be two leading + newlines that we don't want */ + int start = len - boundary.size() -2; + if (start > 0) { + // read in what might be a boundary + string test_boundary; + for (int i = start; i < len -2 && i > 0 ; i++) { + test_boundary += bp.c_str()[i]; + } + + if (strcmp(test_boundary.c_str(), boundary.c_str()) == 0 ) { + // kill off bothersome newlines + bp.set_length(start-2); + data_read = true; + } + } + + len = bp.length(); + bl.append(bp); + + + return len; +} + int RGWPutACLs_ObjStore::get_params() { size_t cl = 0; diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h index 69056cb2a251b..56e2b3d1da7d7 100644 --- a/src/rgw/rgw_rest.h +++ b/src/rgw/rgw_rest.h @@ -106,6 +106,16 @@ public: int get_data(bufferlist& bl); }; +class RGWPostObj_ObjStore : public RGWPostObj +{ +public: + RGWPostObj_ObjStore() {} + ~RGWPostObj_ObjStore() {} + + virtual int verify_params(); + int get_data(bufferlist& bl); +}; + class RGWPutMetadata_ObjStore : public RGWPutMetadata { public: @@ -282,5 +292,13 @@ extern void dump_range(struct req_state *s, uint64_t ofs, uint64_t end, uint64_t extern void dump_continue(struct req_state *s); extern void list_all_buckets_end(struct req_state *s); extern void dump_time(struct req_state *s, const char *name, time_t *t); +extern void dump_bucket_from_state(struct req_state *s); +extern void dump_object_from_state(struct req_state *s); +extern void dump_uri_from_state(struct req_state *s); +extern void dump_redirect(struct req_state *s, const char *url); +extern void dump_pair(struct req_state *s, const char *key, const char *value); +extern bool is_valid_url(const char *url); + + #endif diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 995894b23976b..41887080143cb 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -3,6 +3,7 @@ #include "common/ceph_crypto.h" #include "common/Formatter.h" +#include "common/utf8.h" #include "rgw_rest.h" #include "rgw_rest_s3.h" @@ -16,6 +17,34 @@ using namespace ceph::crypto; +void dump_common_s3_headers(struct req_state *s, const char *etag, + size_t content_len, const char *conn_status) +{ + // how many elements do we expect to include in the response + unsigned int expected_var_len = 4; + map head_var; + + utime_t date = ceph_clock_now(s->cct); + if (!date.is_zero()) { + char buf[TIME_BUF_SIZE]; + date.sprintf(buf, TIME_BUF_SIZE); + head_var["date"] = buf; + } + + head_var["etag"] = etag; + head_var["conn_stat"] = conn_status; + head_var["server"] = s->env->get("HTTP_HOST"); + + // if we have all the variables we want go ahead and dump + if (head_var.size() == expected_var_len) { + dump_pair(s, "Date", head_var["date"].c_str()); + dump_etag(s, head_var["etag"].c_str()); + dump_content_length(s, content_len); + dump_pair(s, "Connection", head_var["conn_stat"].c_str()); + dump_pair(s, "Server", head_var["server"].c_str()); + } +} + void list_all_buckets_start(struct req_state *s) { s->formatter->open_array_section_in_ns("ListAllMyBucketsResult", @@ -332,6 +361,212 @@ void RGWPutObj_ObjStore_S3::send_response() end_header(s); } +int RGWPostObj_ObjStore_S3::get_form_head() +{ + char *buf; + size_t pos; + string temp_line; + string param; + string old_param; + string param_value; + + string whitespaces (" \t\f\v\n\r"); + + content_length = s->env->get_int("CONTENT_LENGTH", 0); + if (content_length == 0) + return -ENODATA; + + size_t start_receive = s->bytes_received; + + // get the part boundary + string content_string = s->env->get("CONTENT_TYPE"); + pos = content_string.find("boundary="); + if (pos == string::npos) + return -EINVAL; + + // create the boundary which marks the end of the request + boundary = "--"; + boundary += content_string.substr(pos+9); + boundary += "--"; + + // each part in the form begins with + string part_header = "Content-Disposition: form-data; name="; + + // quite possibly overkill on the size + buf = (char *)malloc(RGW_MAX_CHUNK_SIZE + 1); + + do { + // read a single line, exciting no? + CGI_GetLine(s, buf, RGW_MAX_CHUNK_SIZE); + temp_line = buf; + + pos = temp_line.find(part_header); + + if (pos != string::npos) { + // find the key contained in this part + pos = temp_line.find("name="); + param = temp_line.substr(pos+5); + old_param = param; + + // trim the key a little bit + param.erase(0,1); + param.erase(param.find("\"")); + + // make sure to stop before reading actual data + if (strncmp(param.c_str(), "file",4) == 0) { + data_pending = true; + + // look for a supplied filename + pos = old_param.find("filename="); + + if (pos != string::npos) { + string temp_name = old_param.substr(pos+10); + + // clean up the trailing quotation mark + temp_name.erase(temp_name.find("\"")); + supplied_filename = temp_name; + } + + // check if a key has actually been read correctly + pos = form_param["key"].find_last_not_of(whitespaces); + if (pos == string::npos) + form_param["key"] = supplied_filename; + + // read the next two lines which don't actually contain the data + CGI_GetLine(s, buf, RGW_MAX_CHUNK_SIZE); + param_value = buf; + + pos = param_value.find("Content-Type:"); + if (pos != string::npos) { + param_value = param_value.substr(pos + 14); + + // get rid of any trailing whitespace + pos = param_value.find_last_not_of(whitespaces); + if (pos != string::npos) + param_value.erase(pos+1); + + form_param["Content-Type"] = param_value; + } + + // this line will be blank + CGI_GetLine(s, buf, RGW_MAX_CHUNK_SIZE); + + break; + } + + // read out a boring blank line + CGI_GetLine(s, buf, RGW_MAX_CHUNK_SIZE); + + // now read the line we actually want + CGI_GetLine(s, buf, RGW_MAX_CHUNK_SIZE); + param_value = buf; + + // get rid of any trailing whitespace + pos = param_value.find_last_not_of(whitespaces); + if (pos != string::npos) + param_value.erase(pos+1); + + if (!param.empty() && !param_value.empty()) { + // store the parameter, value combination + form_param[param] = param_value; + } + } + else { + /* we may have read to the end of the request without coming across a file part + if so we want to error out because no one likes infinite loops */ + pos = temp_line.find(boundary); + if (pos != string::npos && !data_pending) { + free(buf); + return -ENODATA; //maybe there is a better error condition to use? + } + } + } while (!data_pending); + + header_length += (s->bytes_received - start_receive); + content_length -= header_length; + + free(buf); + return 0; +} + +int RGWPostObj_ObjStore_S3::get_params() +{ + // now get the beginning of the request, up until one line before the actual data + get_form_head(); + + string test_string; + + if (s->bucket_name_str.size() > 0 ) + test_string = s->bucket_name; + + // build policies for the specified bucket and load them into the state + if (s->bucket_name_str.size() == 0) { + if (form_param.count("bucket")) + s->bucket_name_str = form_param["bucket"]; + + ret = rgw_build_policies(store, s, true, false); + if (ret < 0) { + ldout(s->cct, 0) << "ERROR building policy, status: " << ret << dendl; + return ret; + } + } + + if (form_param.count("key")) { + s->object_str = form_param["key"]; + } else { + ret = -EINVAL; // could possibly use a better error condition + } + + // if we don't have an access policy build a policy for an anonymous user + RGWAccessControlPolicy_S3 s3policy(s->cct); + if (!form_param.count("Policy")) { + if (form_param.count("acl")) { + bool r = s3policy.create_canned(s->user.user_id, "", form_param["acl"]); + if (!r) + return -EINVAL; + + policy = s3policy; + } + } + + return ret; +} + +void RGWPostObj_ObjStore_S3::send_response() +{ + if (ret < 0) + set_req_state_err(s, ret); + + if (form_param.count("success_action_redirect")) { + const string& success_action_redirect = form_param["success_action_redirect"]; + if (check_utf8(success_action_redirect.c_str(), success_action_redirect.size())) { + dump_redirect(s, form_param["success_action_redirect"].c_str()); + end_header(s, "text/plain"); + return; + } + } + else if (form_param.count("success_action_status") && ret == 0) { + string status_string = form_param["success_action_status"]; + int status_int; + if ( !(istringstream(status_string) >> status_int) ) + status_int = 200; + + dump_errno(s, status_int); + } + else { + dump_errno(s); + if (ret < 0) + return; + } + + end_header(s, "text/plain"); + dump_common_s3_headers(s, etag.c_str(), 0, "close"); + dump_bucket_from_state(s); + dump_object_from_state(s); + dump_uri_from_state(s); +} + + void RGWDeleteObj_ObjStore_S3::send_response() { int r = ret; @@ -743,7 +978,7 @@ RGWOp *RGWHandler_ObjStore_Bucket_S3::op_post() return new RGWDeleteMultiObj_ObjStore_S3; } - return NULL; + return new RGWPostObj_ObjStore_S3; } RGWOp *RGWHandler_ObjStore_Obj_S3::get_obj_op(bool get_data) diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index 5bdec1b0e4f4d..e06bd70cd44e9 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -81,6 +81,16 @@ public: void send_response(); }; +class RGWPostObj_ObjStore_S3 : public RGWPostObj_ObjStore { +public: + RGWPostObj_ObjStore_S3() {} + ~RGWPostObj_ObjStore_S3() {} + + int get_params(); + void send_response(); + int get_form_head(); +}; + class RGWDeleteObj_ObjStore_S3 : public RGWDeleteObj_ObjStore { public: RGWDeleteObj_ObjStore_S3() {} -- 2.39.5