]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
upon multiple calls to `parse-query` with a failure on AST creation, it causes a... fix_crash_upon_parsing 57944/head
authorGal Salomon <gal.salomon@gmail.com>
Sun, 9 Jun 2024 07:44:09 +0000 (10:44 +0300)
committerGal Salomon <gal.salomon@gmail.com>
Thu, 10 Oct 2024 09:50:06 +0000 (12:50 +0300)
align with s3select API
adding the JSON output format, otherwise it cause some s3test to fail.
remove obsolete initialization. the init is done by the constructor
s3select submodule

Signed-off-by: Gal Salomon <gal.salomon@gmail.com>
src/rgw/rgw_s3select.cc
src/rgw/rgw_s3select_private.h
src/s3select

index 800d276a6aab1c75c4ade9ef8d73e21dbd771d2f..73b55ebbf20d8b42bd5bb57f446f9d7d96afca12 100644 (file)
@@ -287,6 +287,7 @@ RGWSelectObj_ObjStore_S3::RGWSelectObj_ObjStore_S3():
   m_object_size_for_processing(0),
   m_parquet_type(false),
   m_json_type(false),
+  m_outputFormat(OutputFormat::CSV),
   chunk_number(0),
   m_requested_range(0),
   m_scan_offset(1024),
@@ -426,7 +427,9 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_csv(const char* query, const char*
   } else if(m_header_info.compare("USE")==0) {
     csv.use_header_info=true;
   }
-
+  if (m_outputFormat == OutputFormat::JSON) {
+    csv.output_json_format = true;
+  }
   m_s3_csv_object.set_csv_query(&s3select_syntax, csv);
 
   m_s3_csv_object.set_external_system_functions(fp_s3select_continue,
@@ -478,6 +481,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_parquet(const char* query)
   if (!m_s3_parquet_object.is_set()) {
     //parsing the SQL statement.
     s3select_syntax.parse_query(m_sql_query.c_str());
+    parquet_object::csv_definitions parquet;
 
   m_s3_parquet_object.set_external_system_functions(fp_s3select_continue,
                                                fp_s3select_result_format,
@@ -486,7 +490,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_parquet(const char* query)
 
     try {
       //at this stage the Parquet-processing requires for the meta-data that reside on Parquet object 
-      m_s3_parquet_object.set_parquet_object(std::string("s3object"), &s3select_syntax, &m_rgw_api);
+      m_s3_parquet_object.set_parquet_object(std::string("s3object"), &s3select_syntax, &m_rgw_api, parquet);
     } catch(base_s3select_exception& e) {
       ldpp_dout(this, 10) << "S3select: failed upon parquet-reader construction: " << e.what() << dendl;
       fp_result_header_format(m_aws_response_handler.get_sql_result());
@@ -618,6 +622,10 @@ int RGWSelectObj_ObjStore_S3::handle_aws_cli_parameters(std::string& sql_query)
     ldpp_dout(this, 10) << "s3select: engine is set to process Parquet objects" << dendl;
   }
 
+  if (m_s3select_query.find(output_tag+"><JSON") != std::string::npos) {
+    m_outputFormat = OutputFormat::JSON;
+  }
+
   extract_by_tag(m_s3select_query, "Expression", sql_query);
   extract_by_tag(m_s3select_query, "Enabled", m_enable_progress);
   size_t _qi = m_s3select_query.find("<" + input_tag + ">", 0);
@@ -748,7 +756,6 @@ void RGWSelectObj_ObjStore_S3::execute(optional_yield y)
       op_ret = -ERR_INVALID_REQUEST;
       return;
     }
-    s3select_syntax.parse_query(m_sql_query.c_str());
     status = run_s3select_on_parquet(m_sql_query.c_str());
     if (status) {
       ldout(s->cct, 10) << "S3select: failed to process query <" << m_sql_query << "> on object " << s->object->get_name() << dendl;
index 7beac4f4a5d881e60d6f93c5f7634875db6dfd5c..87d180acebc0f32f844c88d72b6ab952e6787d64 100644 (file)
@@ -241,6 +241,11 @@ private:
   const char* s3select_json_error = "InvalidJsonType";
 
 public:
+  enum class OutputFormat {
+            CSV,
+            JSON
+        };
+  OutputFormat m_outputFormat;
   unsigned int chunk_number;
   size_t m_requested_range;
   size_t m_scan_offset;
index f333ec82e6e8a3f7eb9ba1041d1442b2c7cd0f05..a6168396d246184ea4aadfb8ab3b9ab33848294c 160000 (submodule)
@@ -1 +1 @@
-Subproject commit f333ec82e6e8a3f7eb9ba1041d1442b2c7cd0f05
+Subproject commit a6168396d246184ea4aadfb8ab3b9ab33848294c