import string
import re
from nose.plugins.attrib import attr
+from botocore.exceptions import ClientError
import uuid
from nose.tools import eq_ as eq
def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"',esc_char='\\',csv_header_info="NONE"):
s3 = get_client()
-
- r = s3.select_object_content(
+ result = ""
+ try:
+ r = s3.select_object_content(
Bucket=bucket,
Key=key,
ExpressionType='SQL',
InputSerialization = {"CSV": {"RecordDelimiter" : row_delim, "FieldDelimiter" : column_delim,"QuoteEscapeCharacter": esc_char, "QuoteCharacter": quot_char, "FileHeaderInfo": csv_header_info}, "CompressionType": "NONE"},
OutputSerialization = {"CSV": {}},
Expression=query,)
+
+ except ClientError as c:
+ result += str(c)
+ return result
- result = ""
for event in r['Payload']:
if 'Records' in event:
records = event['Records']['Payload'].decode('utf-8')
col_num+=1
return list_of_int
-
+
@attr('s3select')
def test_count_operation():
csv_obj_name = get_random_string()
# the following queries, validates on *random* input an *accurate* relation between condition result,sum operation and count operation.
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name_2,csv_obj_name_2,"select count(0),sum(int(_1)),sum(int(_2)) from s3object where (int(_1)-int(_2)) = 2;" ) )
+
count,sum1,sum2,d = res_s3select.split(",")
s3select_assert_result( int(count)*2 , int(sum1)-int(sum2 ) )
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0),sum(int(_1)),sum(int(_2)) from s3object where (int(_1)-int(_2)) = 4;" ) )
+
count,sum1,sum2,d = res_s3select.split(",")
s3select_assert_result( int(count)*4 , int(sum1)-int(sum2) )
@attr('s3select')
def test_in_expressions():
- # purpose of test: engine is process correctly several projections containing aggregation-functions
+ # purpose of test: engine is process correctly several projections containing aggregation-functions
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
s3select_assert_result( res_s3select_like, res_s3select )
+ res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%" like;')).replace("\n","")
+
+ find_like = res_s3select_like.find("s3select-Syntax-Error")
+
+ assert int(find_like) >= 0
+
res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select (_1 like "cbcd%") from s3object;')).replace("\n","")
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select (substring(_1,1,4) = "cbcd") from s3object;')).replace("\n","")
assert res_multiple_defintion.find("alias {c11} or column not exist in schema") > 0
+ find_processing_error = res_multiple_defintion.find("s3select-ProcessingTime-Error")
+
+ assert int(find_processing_error) >= 0
+
# alias-name is identical to column-name
res_multiple_defintion = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(c1)+int(c2) as c4,c4 from s3object;",csv_header_info="USE") ).replace("\n","")