]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
diagnose zero segment while s3select 38892/head
authorgalsalomon66 <gal.salomon@gmail.com>
Sun, 10 Jan 2021 21:30:46 +0000 (23:30 +0200)
committergalsalomon66 <gal.salomon@gmail.com>
Fri, 29 Jan 2021 06:15:01 +0000 (08:15 +0200)
Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
using s3-test with the fix fix_test_file_permission_add_attrib

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
update s3select{1ea37ae459f957786b9d198510637dfa12f83350} ; add log tracing zero segments locations

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
s3select submodule update

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
fix comments

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
updating the s3select new features; covering the gap between latest documentation and latest faetures update

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
add some more examples; fixes

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
add between expression to doc; typo

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
typo

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
skipping empty segments

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
skip empty segments to the next segments

Signed-off-by: galsalomon66 <gal.salomon@gmail.com>
doc/radosgw/s3select.rst
qa/tasks/s3tests.py
src/rgw/rgw_rest_s3.cc
src/s3select

index faee08e265e8b9ed73146846b66b3e59dc946ec7..4f89e5ca590a966ecce9be43df0b441a464477ea 100644 (file)
@@ -57,60 +57,159 @@ Error Handling
 
 
 
-
 Features Support
 ----------------
-
 .. _feature-table:
 
-  | Currently only part of `AWS select command <https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-glacier-select-sql-reference-select.html>`_ is implemented, table bellow describes what is currently supported.
+  | Currently only part of `AWS select command <https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-glacier-select-sql-reference-select.html>`_ is implemented, table below describes what is currently supported.
   | The following table describes the current implementation for s3-select functionalities:
 
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Feature                         | Detailed        | Example                                                               |
+| Feature                         | Detailed        | Example  / Description                                                |
 +=================================+=================+=======================================================================+
-| Arithmetic operators            | ^ * / + - ( )   | select (int(_1)+int(_2))*int(_9) from stdin;                          |
+| Arithmetic operators            | ^ * % / + - ( ) | select (int(_1)+int(_2))*int(_9) from s3object;                       |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+|                                 | ``%`` modulo    | select count(*) from s3object where cast(_1 as int)%2 == 0;           |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+|                                 | ``^`` power-of  | select cast(2^10 as int) from s3object;                               |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| Compare operators               | > < >= <= == != | select _1,_2 from s3object where (int(_1)+int(_3))>int(_5);           |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | AND OR NOT      | select count(*) from s3object where not (int(1)>123 and int(_5)<200); |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | is null         | return true/false for null indication in expression                   |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | is not null     | return true/false for null indication in expression                   |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator and NULL       | unknown state   | review null-handle_ observe how logical operator result with null.    |
+|                                 |                 | the following query return **0**.                                     |
+|                                 |                 | ``select count(*) from s3object where null and (3>2);``               |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| Arithmetic operator with NULL   | unknown state   | review null-handle_ observe the results of binary operations with NULL|
+|                                 |                 | the following query return **0**.                                     |
+|                                 |                 | ``select count(*) from s3object where (null+1) and (3>2);``           |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| compare with NULL               | unknown state   | review null-handle_ observe results of compare operations with NULL   | 
+|                                 |                 | the following query return **0**.                                     |
+|                                 |                 | ``select count(*) from s3object where (null*1.5) != 3;``              |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| missing column                  | unknown state   | select count(*) from s3object where _1 is null;                       | 
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| projection column               | similar to      | select case                                                           | 
+|                                 | if/then/else    |   when (1+1==(2+1)*3) then 'case_1'                                   |
+|                                 |                 |   when ((4*3)==(12)) then 'case_2' else 'case_else' end,              |
+|                                 |                 |   age*2 from s3object;                                                | 
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | ``coalesce`` :: return first non-null argumnet                                          | 
+|                                 |                   select coalesce(nullif(5,5),nullif(1,1.0),age+12) from s3object;      |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | ``nullif`` :: return null in case both arguments are equal, or else the first one       |
+|                                 |            nullif(1,1)=NULL nullif(null,1)=NULL nullif(2,1)=2                           |
+|                                 |                     select nullif(cast(_1 as int),cast(_2 as int)) from s3object;       |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | ``{expression} in ( .. {expression} ..)``                                               | 
+|                                 |             select count(*) from s3object                                               | 
+|                                 |                 where 'ben' in (trim(_5),substring(_1,char_length(_1)-3,3),last_name);  |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| logical operator                | ``{expression} between {expression} and {expression}``                                  | 
+|                                 |             select count(*) from stdin                                                  | 
+|                                 |                 where substring(_3,char_length(_3),1) between "x" and trim(_1)          |
+|                                 |                     and substring(_3,char_length(_3)-1,1) == ":";                       |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-|                                 |                 | select ((1+2)*3.14) ^ 2 from stdin;                                   |
+| logical operator                | ``{expression} like {match-pattern}``                                                   |
+|                                 |                   select count(*) from s3object where first_name like '%de_';           | 
+|                                 |                   select count(*) from s3object where _1 like \"%a[r-s]\;               | 
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Compare operators               | > < >= <= == != | select _1,_2 from stdin where (int(1)+int(_3))>int(_5);               |
+| casting operator                | select cast(123 as int)%2 from s3object;                                                |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| logical operator                | AND OR          | select count(*) from stdin where int(1)>123 and int(_5)<200;          |
+| casting operator                | select cast(123.456 as float)%2 from s3object;                                          |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| casting operator                | int(expression) | select int(_1),int( 1.2 + 3.4) from stdin;                            |
+| casting operator                | select cast('ABC0-9' as string),cast(substr('ab12cd',3,2) as int)*4  from s3object;     |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-|                                 |float(expression)| select float(1.2) from stdin;                                         |
+| casting operator                | select cast(substring('publish on 2007-01-01',12,10) as timestamp) from s3object;       |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-|                                 | timestamp(...)  | select timestamp("1999:10:10-12:23:44") from stdin;                   |
+| non AWS casting operator        | select int(_1),int( 1.2 + 3.4) from s3object;                                           |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Aggregation Function            | sum             | select sum(int(_1)) from stdin;                                       |
+| non AWS casting operator        | select float(1.2) from s3object;                                                        |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Aggregation Function            | min             | select min( int(_1) * int(_5) ) from stdin;                           |
+| not AWS casting operator        | select timestamp('1999:10:10-12:23:44') from s3object;                                  |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Aggregation Function            | max             | select max(float(_1)),min(int(_5)) from stdin;                        |
+| Aggregation Function            | sum             | select sum(int(_1)) from s3object;                                    |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Aggregation Function            | count           | select count(*) from stdin where (int(1)+int(_3))>int(_5);            |
+| Aggregation Function            | avg             | select avg(cast(_1 a float) + cast(_2 as int)) from s3object;         |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Timestamp Functions             | extract         | select count(*) from stdin where                                      |
-|                                 |                 | extract("year",timestamp(_2)) > 1950                                  |    
-|                                 |                 | and extract("year",timestamp(_1)) < 1960;                             |
+| Aggregation Function            | min             | select min( int(_1) * int(_5) ) from s3object;                        |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Timestamp Functions             | dateadd         | select count(0) from stdin where                                      |
-|                                 |                 | datediff("year",timestamp(_1),dateadd("day",366,timestamp(_1))) == 1; |  
+| Aggregation Function            | max             | select max(float(_1)),min(int(_5)) from s3object;                     |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Timestamp Functions             | datediff        | select count(0) from stdin where                                      |  
-|                                 |                 | datediff("month",timestamp(_1),timestamp(_2))) == 2;                  | 
+| Aggregation Function            | count           | select count(*) from s3object where (int(1)+int(_3))>int(_5);         |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| Timestamp Functions             | utcnow          | select count(0) from stdin where                                      |
-|                                 |                 | datediff("hours",utcnow(),dateadd("day",1,utcnow())) == 24 ;          |
+| Timestamp Functions             | extract         | select count(*) from s3object where                                   |
+|                                 |                 | extract('year',timestamp(_2)) > 1950                                  |    
+|                                 |                 | and extract('year',timestamp(_1)) < 1960;                             |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
-| String Functions                | substr          | select count(0) from stdin where                                      |
-|                                 |                 | int(substr(_1,1,4))>1950 and int(substr(_1,1,4))<1960;                |
+| Timestamp Functions             | dateadd         | select count(0) from s3object where                                   |
+|                                 |                 | datediff('year',timestamp(_1),dateadd('day',366,timestamp(_1))) == 1; |  
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| Timestamp Functions             | datediff        | select count(0) from s3object where                                   |  
+|                                 |                 | datediff('month',timestamp(_1),timestamp(_2))) == 2;                  | 
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| Timestamp Functions             | utcnow          | select count(0) from s3object where                                   |
+|                                 |                 | datediff('hours',utcnow(),dateadd('day',1,utcnow())) == 24 ;          |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | substring       | select count(0) from s3object where                                   |
+|                                 |                 | int(substring(_1,1,4))>1950 and int(substring(_1,1,4))<1960;          |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | trim            | select trim('   foobar   ') from s3object;                            |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | trim            | select trim(trailing from '   foobar   ') from s3object;              |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | trim            | select trim(leading from '   foobar   ') from s3object;               |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | trim            | select trim(both '12' from  '1112211foobar22211122') from s3objects;  |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | lower/upper     | select lower('ABcD12#$e') from s3object;                              |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| String Functions                | char_length     | select count(*) from s3object where char_length(_3)==3;               |
+|                                 | character_length|                                                                       |
++---------------------------------+-----------------+-----------------------------------------------------------------------+
+| Complex queries                 |   select sum(cast(_1 as int)),                                                          |
+|                                 |          max(cast(_3 as int)),                                                          |
+|                                 |          substring('abcdefghijklm',                                                     |
+|                                 |          (2-1)*3+sum(cast(_1 as int))/sum(cast(_1 as int))+1,                           |
+|                                 |          (count() + count(0))/count(0)) from s3object;                                  |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
 | alias support                   |                 |  select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3                  | 
-|                                 |                 |  from stdin where a3>100 and a3<300;                                  |
+|                                 |                 |  from s3object where a3>100 and a3<300;                               |
 +---------------------------------+-----------------+-----------------------------------------------------------------------+
 
+.. _null-handle:
+
+NULL
+~~~~
+| NULL is a legit value in ceph-s3select systems similar to other DB systems, i.e. systems needs to handle the case where a value is NULL.
+| The definition of NULL in our context, is missing/unknown, in that sense **NULL can not produce a value on ANY arithmetic operations** ( a + NULL will produce NULL value).
+| The Same is with arithmetic comaprision, **any comparison to NULL is NULL**, i.e. unknown.
+| Below is a truth table contains the NULL use-case.
+
++---------------------------------+-----------------------------+
+| A is NULL                       | Result (NULL=UNKNOWN)       |
++=================================+=============================+
+| NOT A                           |  NULL                       |
++---------------------------------+-----------------------------+
+| A OR False                      |  NULL                       |
++---------------------------------+-----------------------------+
+| A OR True                       |  True                       |
++---------------------------------+-----------------------------+
+| A OR A                          |  NULL                       |
++---------------------------------+-----------------------------+
+| A AND False                     |  False                      |
++---------------------------------+-----------------------------+
+| A AND True                      |  NULL                       | 
++---------------------------------+-----------------------------+
+| A and A                         |  NULL                       |
++---------------------------------+-----------------------------+
+
 s3-select function interfaces
 -----------------------------
 
@@ -129,7 +228,6 @@ Timestamp functions
     | ``datediff(date-part,timestamp,timestamp)`` : function return an integer, a calculated result for difference between 2 timestamps according to date-part.
     | supported date-part : year,month,day,hours.  
 
-
     | ``utcnow()`` : return timestamp of current time.
 
 Aggregation functions
@@ -139,6 +237,8 @@ Aggregation functions
 
     | ``sum(expression)`` : return a summary of expression per all rows matching condition(if such exist).
 
+    | ``avg(expression)`` : return a average  of expression per all rows matching condition(if such exist).
+
     | ``max(expression)`` : return the maximal result for all expressions matching condition(if such exist).
 
     | ``min(expression)`` : return the minimal result for all expressions matching condition(if such exist).
@@ -146,7 +246,13 @@ Aggregation functions
 String functions
 ~~~~~~~~~~~~~~~~
 
-    | ``substr(string,from,to)`` : return a string extract from input string according to from,to inputs.
+    | ``substring(string,from,to)`` : return a string extract from input string according to from,to inputs.
+
+    | ``char_length`` : return a number of characters in string (``character_length`` does the same).
+
+    | ``trim`` : trims leading/trailing characters from target string, the default is blank character.
+
+    | ``upper\lower`` : converts characters into lowercase/uppercase.
 
 
 Alias
@@ -169,7 +275,7 @@ Sending Query to RGW
 
 
    | Sending s3-select request to RGW using AWS cli, should follow `AWS command reference <https://docs.aws.amazon.com/cli/latest/reference/s3api/select-object-content.html>`_.
-   | bellow is an example for it.
+   | below is an example for it.
 
 ::
 
@@ -180,7 +286,7 @@ Sending Query to RGW
   '{"CSV": {"FieldDelimiter": "," , "QuoteCharacter": "\"" , "RecordDelimiter" : "\n" , "QuoteEscapeCharacter" : "\\" , "FileHeaderInfo": "USE" }, "CompressionType": "NONE"}' 
   --output-serialization '{"CSV": {}}' 
   --key {OBJECT-NAME} 
-  --expression "select count(0) from stdin where int(_1)<10;" output.csv
+  --expression "select count(0) from s3object where int(_1)<10;" output.csv
 
 Syntax
 ~~~~~~
@@ -264,5 +370,5 @@ BOTO3
   run_s3select(
   "my_bucket",
   "my_csv_object",
-  "select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from stdin where a3>100 and a3<300;")
+  "select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;")
 
index d87c2ab0a4303a279fcd69d7046857343268de02..3310d7499a110aadf5f9421156215aad6ae8ad4d 100644 (file)
@@ -402,7 +402,7 @@ def run_tests(ctx, config):
         else:
             args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt']
         # civetweb > 1.8 && beast parsers are strict on rfc2616
-        attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!s3select","!test_of_sts","!webidentity_test"]
+        attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!test_of_sts","!webidentity_test"]
         if client_config.get('calling-format') != 'ordinary':
             attrs += ['!fails_with_subdomain']
        
index 468a1078f73991c1de6e14a84791613a36b737f8..da4400b542b4f6e5423418bad3cc5f8c2edca630 100644 (file)
@@ -6230,6 +6230,8 @@ int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_
     dump_errno(s);
   }
 
+  auto bl_len = bl.get_num_buffers();
+
   // Explicitly use chunked transfer encoding so that we can stream the result
   // to the user without having to wait for the full length of it.
   if (chunk_number == 0) {
@@ -6237,11 +6239,24 @@ int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_
   }
 
   int status=0;
+  int i=0;
+
   for(auto& it : bl.buffers()) {
+
+    ldout(s->cct, 10) << "processing segment " << i << " out of " << bl_len << " off " << ofs
+                      << " len " << len << " obj-size " << s->obj_size << dendl;
+
+    if(it.length() == 0) {
+      ldout(s->cct, 10) << "s3select:it->_len is zero. segment " << i << " out of " << bl_len
+                        <<  " obj-size " << s->obj_size << dendl;
+      continue; 
+    }
+
     status = run_s3select(m_sql_query.c_str(), &(it)[0], it.length());
     if(status<0) {
       break;
     }
+    i++;
   }
 
   chunk_number++;
index 7ae7a12c138d4607d6c012228c06f3802c493c49..1ea37ae459f957786b9d198510637dfa12f83350 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 7ae7a12c138d4607d6c012228c06f3802c493c49
+Subproject commit 1ea37ae459f957786b9d198510637dfa12f83350