From b73c6c40bfa3b5a4af56d3d93ada1eff416b09d8 Mon Sep 17 00:00:00 2001 From: galsalomon66 Date: Sun, 10 Jan 2021 23:30:46 +0200 Subject: [PATCH] diagnose zero segment while s3select Signed-off-by: galsalomon66 using s3-test with the fix fix_test_file_permission_add_attrib Signed-off-by: galsalomon66 update s3select{1ea37ae459f957786b9d198510637dfa12f83350} ; add log tracing zero segments locations Signed-off-by: galsalomon66 s3select submodule update Signed-off-by: galsalomon66 fix comments Signed-off-by: galsalomon66 updating the s3select new features; covering the gap between latest documentation and latest faetures update Signed-off-by: galsalomon66 add some more examples; fixes Signed-off-by: galsalomon66 add between expression to doc; typo Signed-off-by: galsalomon66 typo Signed-off-by: galsalomon66 skipping empty segments Signed-off-by: galsalomon66 skip empty segments to the next segments Signed-off-by: galsalomon66 --- doc/radosgw/s3select.rst | 170 +++++++++++++++++++++++++++++++-------- qa/tasks/s3tests.py | 2 +- src/rgw/rgw_rest_s3.cc | 15 ++++ src/s3select | 2 +- 4 files changed, 155 insertions(+), 34 deletions(-) diff --git a/doc/radosgw/s3select.rst b/doc/radosgw/s3select.rst index faee08e265e8b..4f89e5ca590a9 100644 --- a/doc/radosgw/s3select.rst +++ b/doc/radosgw/s3select.rst @@ -57,60 +57,159 @@ Error Handling - Features Support ---------------- - .. _feature-table: - | Currently only part of `AWS select command `_ is implemented, table bellow describes what is currently supported. + | Currently only part of `AWS select command `_ is implemented, table below describes what is currently supported. | The following table describes the current implementation for s3-select functionalities: +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Feature | Detailed | Example | +| Feature | Detailed | Example / Description | +=================================+=================+=======================================================================+ -| Arithmetic operators | ^ * / + - ( ) | select (int(_1)+int(_2))*int(_9) from stdin; | +| Arithmetic operators | ^ * % / + - ( ) | select (int(_1)+int(_2))*int(_9) from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| | ``%`` modulo | select count(*) from s3object where cast(_1 as int)%2 == 0; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| | ``^`` power-of | select cast(2^10 as int) from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| Compare operators | > < >= <= == != | select _1,_2 from s3object where (int(_1)+int(_3))>int(_5); | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | AND OR NOT | select count(*) from s3object where not (int(1)>123 and int(_5)<200); | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | is null | return true/false for null indication in expression | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | is not null | return true/false for null indication in expression | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator and NULL | unknown state | review null-handle_ observe how logical operator result with null. | +| | | the following query return **0**. | +| | | ``select count(*) from s3object where null and (3>2);`` | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| Arithmetic operator with NULL | unknown state | review null-handle_ observe the results of binary operations with NULL| +| | | the following query return **0**. | +| | | ``select count(*) from s3object where (null+1) and (3>2);`` | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| compare with NULL | unknown state | review null-handle_ observe results of compare operations with NULL | +| | | the following query return **0**. | +| | | ``select count(*) from s3object where (null*1.5) != 3;`` | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| missing column | unknown state | select count(*) from s3object where _1 is null; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| projection column | similar to | select case | +| | if/then/else | when (1+1==(2+1)*3) then 'case_1' | +| | | when ((4*3)==(12)) then 'case_2' else 'case_else' end, | +| | | age*2 from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | ``coalesce`` :: return first non-null argumnet | +| | select coalesce(nullif(5,5),nullif(1,1.0),age+12) from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | ``nullif`` :: return null in case both arguments are equal, or else the first one | +| | nullif(1,1)=NULL nullif(null,1)=NULL nullif(2,1)=2 | +| | select nullif(cast(_1 as int),cast(_2 as int)) from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | ``{expression} in ( .. {expression} ..)`` | +| | select count(*) from s3object | +| | where 'ben' in (trim(_5),substring(_1,char_length(_1)-3,3),last_name); | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| logical operator | ``{expression} between {expression} and {expression}`` | +| | select count(*) from stdin | +| | where substring(_3,char_length(_3),1) between "x" and trim(_1) | +| | and substring(_3,char_length(_3)-1,1) == ":"; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| | | select ((1+2)*3.14) ^ 2 from stdin; | +| logical operator | ``{expression} like {match-pattern}`` | +| | select count(*) from s3object where first_name like '%de_'; | +| | select count(*) from s3object where _1 like \"%a[r-s]\; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Compare operators | > < >= <= == != | select _1,_2 from stdin where (int(1)+int(_3))>int(_5); | +| casting operator | select cast(123 as int)%2 from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| logical operator | AND OR | select count(*) from stdin where int(1)>123 and int(_5)<200; | +| casting operator | select cast(123.456 as float)%2 from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| casting operator | int(expression) | select int(_1),int( 1.2 + 3.4) from stdin; | +| casting operator | select cast('ABC0-9' as string),cast(substr('ab12cd',3,2) as int)*4 from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| |float(expression)| select float(1.2) from stdin; | +| casting operator | select cast(substring('publish on 2007-01-01',12,10) as timestamp) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| | timestamp(...) | select timestamp("1999:10:10-12:23:44") from stdin; | +| non AWS casting operator | select int(_1),int( 1.2 + 3.4) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Aggregation Function | sum | select sum(int(_1)) from stdin; | +| non AWS casting operator | select float(1.2) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Aggregation Function | min | select min( int(_1) * int(_5) ) from stdin; | +| not AWS casting operator | select timestamp('1999:10:10-12:23:44') from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Aggregation Function | max | select max(float(_1)),min(int(_5)) from stdin; | +| Aggregation Function | sum | select sum(int(_1)) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Aggregation Function | count | select count(*) from stdin where (int(1)+int(_3))>int(_5); | +| Aggregation Function | avg | select avg(cast(_1 a float) + cast(_2 as int)) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Timestamp Functions | extract | select count(*) from stdin where | -| | | extract("year",timestamp(_2)) > 1950 | -| | | and extract("year",timestamp(_1)) < 1960; | +| Aggregation Function | min | select min( int(_1) * int(_5) ) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Timestamp Functions | dateadd | select count(0) from stdin where | -| | | datediff("year",timestamp(_1),dateadd("day",366,timestamp(_1))) == 1; | +| Aggregation Function | max | select max(float(_1)),min(int(_5)) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Timestamp Functions | datediff | select count(0) from stdin where | -| | | datediff("month",timestamp(_1),timestamp(_2))) == 2; | +| Aggregation Function | count | select count(*) from s3object where (int(1)+int(_3))>int(_5); | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| Timestamp Functions | utcnow | select count(0) from stdin where | -| | | datediff("hours",utcnow(),dateadd("day",1,utcnow())) == 24 ; | +| Timestamp Functions | extract | select count(*) from s3object where | +| | | extract('year',timestamp(_2)) > 1950 | +| | | and extract('year',timestamp(_1)) < 1960; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ -| String Functions | substr | select count(0) from stdin where | -| | | int(substr(_1,1,4))>1950 and int(substr(_1,1,4))<1960; | +| Timestamp Functions | dateadd | select count(0) from s3object where | +| | | datediff('year',timestamp(_1),dateadd('day',366,timestamp(_1))) == 1; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| Timestamp Functions | datediff | select count(0) from s3object where | +| | | datediff('month',timestamp(_1),timestamp(_2))) == 2; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| Timestamp Functions | utcnow | select count(0) from s3object where | +| | | datediff('hours',utcnow(),dateadd('day',1,utcnow())) == 24 ; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | substring | select count(0) from s3object where | +| | | int(substring(_1,1,4))>1950 and int(substring(_1,1,4))<1960; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | trim | select trim(' foobar ') from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | trim | select trim(trailing from ' foobar ') from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | trim | select trim(leading from ' foobar ') from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | trim | select trim(both '12' from '1112211foobar22211122') from s3objects; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | lower/upper | select lower('ABcD12#$e') from s3object; | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| String Functions | char_length | select count(*) from s3object where char_length(_3)==3; | +| | character_length| | ++---------------------------------+-----------------+-----------------------------------------------------------------------+ +| Complex queries | select sum(cast(_1 as int)), | +| | max(cast(_3 as int)), | +| | substring('abcdefghijklm', | +| | (2-1)*3+sum(cast(_1 as int))/sum(cast(_1 as int))+1, | +| | (count() + count(0))/count(0)) from s3object; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ | alias support | | select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 | -| | | from stdin where a3>100 and a3<300; | +| | | from s3object where a3>100 and a3<300; | +---------------------------------+-----------------+-----------------------------------------------------------------------+ +.. _null-handle: + +NULL +~~~~ +| NULL is a legit value in ceph-s3select systems similar to other DB systems, i.e. systems needs to handle the case where a value is NULL. +| The definition of NULL in our context, is missing/unknown, in that sense **NULL can not produce a value on ANY arithmetic operations** ( a + NULL will produce NULL value). +| The Same is with arithmetic comaprision, **any comparison to NULL is NULL**, i.e. unknown. +| Below is a truth table contains the NULL use-case. + ++---------------------------------+-----------------------------+ +| A is NULL | Result (NULL=UNKNOWN) | ++=================================+=============================+ +| NOT A | NULL | ++---------------------------------+-----------------------------+ +| A OR False | NULL | ++---------------------------------+-----------------------------+ +| A OR True | True | ++---------------------------------+-----------------------------+ +| A OR A | NULL | ++---------------------------------+-----------------------------+ +| A AND False | False | ++---------------------------------+-----------------------------+ +| A AND True | NULL | ++---------------------------------+-----------------------------+ +| A and A | NULL | ++---------------------------------+-----------------------------+ + s3-select function interfaces ----------------------------- @@ -129,7 +228,6 @@ Timestamp functions | ``datediff(date-part,timestamp,timestamp)`` : function return an integer, a calculated result for difference between 2 timestamps according to date-part. | supported date-part : year,month,day,hours. - | ``utcnow()`` : return timestamp of current time. Aggregation functions @@ -139,6 +237,8 @@ Aggregation functions | ``sum(expression)`` : return a summary of expression per all rows matching condition(if such exist). + | ``avg(expression)`` : return a average of expression per all rows matching condition(if such exist). + | ``max(expression)`` : return the maximal result for all expressions matching condition(if such exist). | ``min(expression)`` : return the minimal result for all expressions matching condition(if such exist). @@ -146,7 +246,13 @@ Aggregation functions String functions ~~~~~~~~~~~~~~~~ - | ``substr(string,from,to)`` : return a string extract from input string according to from,to inputs. + | ``substring(string,from,to)`` : return a string extract from input string according to from,to inputs. + + | ``char_length`` : return a number of characters in string (``character_length`` does the same). + + | ``trim`` : trims leading/trailing characters from target string, the default is blank character. + + | ``upper\lower`` : converts characters into lowercase/uppercase. Alias @@ -169,7 +275,7 @@ Sending Query to RGW | Sending s3-select request to RGW using AWS cli, should follow `AWS command reference `_. - | bellow is an example for it. + | below is an example for it. :: @@ -180,7 +286,7 @@ Sending Query to RGW '{"CSV": {"FieldDelimiter": "," , "QuoteCharacter": "\"" , "RecordDelimiter" : "\n" , "QuoteEscapeCharacter" : "\\" , "FileHeaderInfo": "USE" }, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --key {OBJECT-NAME} - --expression "select count(0) from stdin where int(_1)<10;" output.csv + --expression "select count(0) from s3object where int(_1)<10;" output.csv Syntax ~~~~~~ @@ -264,5 +370,5 @@ BOTO3 run_s3select( "my_bucket", "my_csv_object", - "select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from stdin where a3>100 and a3<300;") + "select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;") diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py index d87c2ab0a4303..3310d7499a110 100644 --- a/qa/tasks/s3tests.py +++ b/qa/tasks/s3tests.py @@ -402,7 +402,7 @@ def run_tests(ctx, config): else: args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt'] # civetweb > 1.8 && beast parsers are strict on rfc2616 - attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!s3select","!test_of_sts","!webidentity_test"] + attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!test_of_sts","!webidentity_test"] if client_config.get('calling-format') != 'ordinary': attrs += ['!fails_with_subdomain'] diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 468a1078f7399..da4400b542b4f 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -6230,6 +6230,8 @@ int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_ dump_errno(s); } + auto bl_len = bl.get_num_buffers(); + // Explicitly use chunked transfer encoding so that we can stream the result // to the user without having to wait for the full length of it. if (chunk_number == 0) { @@ -6237,11 +6239,24 @@ int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_ } int status=0; + int i=0; + for(auto& it : bl.buffers()) { + + ldout(s->cct, 10) << "processing segment " << i << " out of " << bl_len << " off " << ofs + << " len " << len << " obj-size " << s->obj_size << dendl; + + if(it.length() == 0) { + ldout(s->cct, 10) << "s3select:it->_len is zero. segment " << i << " out of " << bl_len + << " obj-size " << s->obj_size << dendl; + continue; + } + status = run_s3select(m_sql_query.c_str(), &(it)[0], it.length()); if(status<0) { break; } + i++; } chunk_number++; diff --git a/src/s3select b/src/s3select index 7ae7a12c138d4..1ea37ae459f95 160000 --- a/src/s3select +++ b/src/s3select @@ -1 +1 @@ -Subproject commit 7ae7a12c138d4607d6c012228c06f3802c493c49 +Subproject commit 1ea37ae459f957786b9d198510637dfa12f83350 -- 2.39.5