From: Yuval Lifshitz Date: Tue, 28 Oct 2025 05:30:48 +0000 (+0000) Subject: rgw/s3vector: add integration tests X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ab3f119ce24ad13ac9cec7261e422469e419bb28;p=ceph-ci.git rgw/s3vector: add integration tests do some fixes to message validation based on the tesst Signed-off-by: Yuval Lifshitz --- diff --git a/src/rgw/rgw_s3vector.cc b/src/rgw/rgw_s3vector.cc index 2e6b579c235..513d66865e4 100644 --- a/src/rgw/rgw_s3vector.cc +++ b/src/rgw/rgw_s3vector.cc @@ -332,7 +332,7 @@ void list_vectors_t::dump(ceph::Formatter* f) const { void list_vectors_t::decode_json(JSONObj* obj) { decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj); decode_name("vectorBucketName", vector_bucket_name, obj); - JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj); JSONDecoder::decode_json("nextToken", next_token, obj); JSONDecoder::decode_json("returnData", return_data, obj); JSONDecoder::decode_json("returnMetadata", return_metadata, obj); @@ -377,16 +377,16 @@ void list_vector_buckets_t::dump(ceph::Formatter* f) const { } void list_vector_buckets_t::decode_json(JSONObj* obj) { - JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj); JSONDecoder::decode_json("nextToken", next_token, obj); JSONDecoder::decode_json("prefix", prefix, obj); - if (max_results < 1 || max_results > 1000) { - throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 1000, got {}", max_results)); + if (max_results < 1 || max_results > 500) { + throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 500, got {}", max_results)); } - if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 2048)) { - throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 2048, got {}", next_token.length())); + if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 512)) { + throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 512, got {}", next_token.length())); } if (!prefix.empty() && (prefix.length() < 1 || prefix.length() > 63)) { @@ -456,7 +456,7 @@ void list_indexes_t::dump(ceph::Formatter* f) const { } void list_indexes_t::decode_json(JSONObj* obj) { - JSONDecoder::decode_json("maxResults", max_results, obj); + JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj); JSONDecoder::decode_json("nextToken", next_token, obj); JSONDecoder::decode_json("prefix", prefix, obj); decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj); diff --git a/src/rgw/rgw_s3vector.h b/src/rgw/rgw_s3vector.h index f8542492048..41dc92180fa 100644 --- a/src/rgw/rgw_s3vector.h +++ b/src/rgw/rgw_s3vector.h @@ -281,7 +281,8 @@ struct list_vectors_t { std::string index_arn; std::string index_name; std::string vector_bucket_name; - unsigned int max_results = 500; + static constexpr unsigned int default_max_results = 500; + unsigned int max_results = default_max_results; std::string next_token; bool return_data = false; bool return_metadata = false; @@ -313,7 +314,8 @@ WRITE_CLASS_ENCODER(list_vectors_t) } */ struct list_vector_buckets_t { - unsigned int max_results = 500; + static constexpr unsigned int default_max_results = 500; + unsigned int max_results = default_max_results; std::string next_token; std::string prefix; @@ -400,7 +402,8 @@ WRITE_CLASS_ENCODER(get_index_t) } */ struct list_indexes_t { - unsigned int max_results = 500; + static constexpr unsigned int default_max_results = 500; + unsigned int max_results = default_max_results; std::string next_token; std::string prefix; std::string vector_bucket_arn; diff --git a/src/test/rgw/s3vectors/README.rst b/src/test/rgw/s3vectors/README.rst new file mode 100644 index 00000000000..e5d16c1821d --- /dev/null +++ b/src/test/rgw/s3vectors/README.rst @@ -0,0 +1,12 @@ +=============== +s3vectors Tests +=============== + +* Start the cluster using the `vstart.sh` script +* Run the test from within the `src/test/rgw/s3vectors` directory: + `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox` +* To run a specific tests use: + `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox -- s3vector_test.py::` +* To run a group of tests use: + `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox -- s3vector_test.py -m "" + diff --git a/src/test/rgw/s3vectors/__init__.py b/src/test/rgw/s3vectors/__init__.py new file mode 100644 index 00000000000..6843ecb077a --- /dev/null +++ b/src/test/rgw/s3vectors/__init__.py @@ -0,0 +1,60 @@ +import configparser +import os +import pytest + +def setup(): + cfg = configparser.RawConfigParser() + try: + path = os.environ['S3VTESTS_CONF'] + except KeyError: + raise RuntimeError( + 'To run tests, point environment ' + + 'variable s3VTESTS_CONF to a config file.', + ) + cfg.read(path) + + if not cfg.defaults(): + raise RuntimeError('Your config file is missing the DEFAULT section!') + if not cfg.has_section("s3 main"): + raise RuntimeError('Your config file is missing the "s3 main" section!') + + defaults = cfg.defaults() + + # vars from the DEFAULT section + global default_host + default_host = defaults.get("host") + + global default_port + default_port = int(defaults.get("port")) + # vars from the main section + global main_access_key + main_access_key = cfg.get('s3 main',"access_key") + + global main_secret_key + main_secret_key = cfg.get('s3 main',"secret_key") + + +def get_config_host(): + global default_host + return default_host + + +def get_config_port(): + global default_port + return default_port + + +def get_access_key(): + global main_access_key + return main_access_key + + +def get_secret_key(): + global main_secret_key + return main_secret_key + + +@pytest.fixture(autouse=True, scope="package") +def configfile(): + setup() + diff --git a/src/test/rgw/s3vectors/pytest.ini b/src/test/rgw/s3vectors/pytest.ini new file mode 100644 index 00000000000..4dbe74c9271 --- /dev/null +++ b/src/test/rgw/s3vectors/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +markers = + vector_bucket_test + index_test + vector_test diff --git a/src/test/rgw/s3vectors/requirements.txt b/src/test/rgw/s3vectors/requirements.txt new file mode 100644 index 00000000000..c9d2a69bf4e --- /dev/null +++ b/src/test/rgw/s3vectors/requirements.txt @@ -0,0 +1,3 @@ +boto3 >=1.0.0 +botocore >=1.39.5 +pytest diff --git a/src/test/rgw/s3vectors/s3vector_test.py b/src/test/rgw/s3vectors/s3vector_test.py new file mode 100644 index 00000000000..510ca85fc42 --- /dev/null +++ b/src/test/rgw/s3vectors/s3vector_test.py @@ -0,0 +1,343 @@ +import logging +import json +import tempfile +import random +import socket +import time +import threading +import subprocess +import os +import stat +import string +import pytest +import boto3 +from botocore.config import Config + +from . import( + configfile, + get_config_host, + get_config_port, + get_access_key, + get_secret_key + ) + + +# configure logging for the tests module +log = logging.getLogger(__name__) + +num_buckets = 0 +run_prefix=''.join(random.choice(string.ascii_lowercase) for _ in range(6)) + +test_path = os.path.normpath(os.path.dirname(os.path.realpath(__file__))) + '/../' + +def bash(cmd, **kwargs): + log.debug('running command: %s', ' '.join(cmd)) + kwargs['stdout'] = subprocess.PIPE + process = subprocess.Popen(cmd, **kwargs) + s = process.communicate()[0].decode('utf-8') + return (s, process.returncode) + + +def admin(args, **kwargs): + """ radosgw-admin command """ + cmd = [test_path + 'test-rgw-call.sh', 'call_rgw_admin', 'noname'] + args + return bash(cmd, **kwargs) + + +def gen_bucket_name(): + global num_buckets + + num_buckets += 1 + return run_prefix + '-' + str(num_buckets) + + +def connection(): + hostname = get_config_host() + port_no = get_config_port() + access_key = get_access_key() + secret_key = get_secret_key() + if port_no == 443 or port_no == 8443: + scheme = 'https://' + else: + scheme = 'http://' + + client = boto3.client('s3vectors', + endpoint_url=scheme+hostname+':'+str(port_no), + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + config=Config(signature_version='s3')) + + return client + + +def another_user(tenant=None): + access_key = str(time.time()) + secret_key = str(time.time()) + uid = 'superman' + str(time.time()) + if tenant: + _, result = admin(['user', 'create', '--uid', uid, '--tenant', tenant, '--access-key', access_key, '--secret-key', secret_key, '--display-name', '"Super Man"']) + else: + _, result = admin(['user', 'create', '--uid', uid, '--access-key', access_key, '--secret-key', secret_key, '--display-name', '"Super Man"']) + + assert result == 0 + hostname = get_config_host() + port_no = get_config_port() + if port_no == 443 or port_no == 8443: + scheme = 'https://' + else: + scheme = 'http://' + + client = boto3.client('s3vectors', + endpoint_url=scheme+hostname+':'+str(port_no), + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + config=Config(signature_version='s3')) + + return client + + +################# +# s3vectors tests +################# + +@pytest.mark.vector_bucket_test +def test_create_vector_bucket(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_bucket_test +def test_get_vector_bucket(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.get_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_bucket_test +def test_delete_vector_bucket(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.get_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.delete_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #with pytest.raises(conn.exceptions.NoSuchVectorBucket): + # result = conn.get_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_bucket_test +def test_list_vector_bucket(): + conn = connection() + bucket_name1 = gen_bucket_name() + bucket_name2 = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name1) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.create_vector_bucket(vectorBucketName=bucket_name2) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.list_vector_buckets() + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #bucket_names = [b['Name'] for b in result['VectorBuckets']] + #assert bucket_name1 in bucket_names + #assert bucket_name2 in bucket_names + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name1) + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name2) + + +@pytest.mark.index_test +def test_create_index(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.index_test +def test_get_index(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.index_test +def test_delete_index(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.delete_index(vectorBucketName=bucket_name, indexName=index_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #with pytest.raises(conn.exceptions.NoSuchIndex): + # result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name) + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.index_test +def test_list_indexes(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name1 = 'test-index1' + index_name2 = 'test-index2' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name1, dataType='float32', dimension=128, distanceMetric='cosine') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name2, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.list_indexes(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #index_names = [i['IndexName'] for i in result['Indexes']] + #assert index_name1 in index_names + #assert index_name2 in index_names + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +def generate_data(dimension): + return {'float32': [float(j) for j in range(dimension)]} + + +def generate_vectors(num_vectors, dimension): + vectors = [] + for i in range(num_vectors): + vectors.append({ + 'key': 'vec-' + str(i), + 'data': generate_data(dimension) + }) + return vectors + + +@pytest.mark.vector_test +def test_put_vectors(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vectors = generate_vectors(10, 128) + result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_test +def test_get_vectors(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vectors = generate_vectors(10, 128) + result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vector_ids = ['vec-' + str(i) for i in range(10)] + result = conn.get_vectors(vectorBucketName=bucket_name, indexName=index_name, keys=vector_ids) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #assert len(result['Vectors']) == 10 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_test +def test_list_vectors(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vectors = generate_vectors(10, 128) + result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.list_vectors(vectorBucketName=bucket_name, indexName=index_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #assert len(result['Vectors']) == 10 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_test +def test_delete_vectors(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vectors = generate_vectors(10, 128) + result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vector_ids = ['vec-' + str(i) for i in range(10)] + result = conn.delete_vectors(vectorBucketName=bucket_name, indexName=index_name, keys=vector_ids) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + result = conn.list_vectors(vectorBucketName=bucket_name, indexName=index_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #assert len(result['Vectors']) == 0 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + + +@pytest.mark.vector_test +def test_query_vectors(): + conn = connection() + bucket_name = gen_bucket_name() + result = conn.create_vector_bucket(vectorBucketName=bucket_name) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + index_name = 'test-index' + result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean') + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + vectors = generate_vectors(10, 128) + result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + query_vector = generate_data(128) + result = conn.query_vectors(vectorBucketName=bucket_name, indexName=index_name, queryVector=query_vector, topK=5) + assert result['ResponseMetadata']['HTTPStatusCode'] == 200 + # not implemented yet + #assert len(result['Results']) == 5 + # cleanup + _ = conn.delete_vector_bucket(vectorBucketName=bucket_name) + diff --git a/src/test/rgw/s3vectors/s3vtests.conf.SAMPLE b/src/test/rgw/s3vectors/s3vtests.conf.SAMPLE new file mode 100644 index 00000000000..eb3291dafa8 --- /dev/null +++ b/src/test/rgw/s3vectors/s3vtests.conf.SAMPLE @@ -0,0 +1,10 @@ +[DEFAULT] +port = 8000 +host = localhost + +[s3 main] +access_key = 0555b35654ad1656d804 +secret_key = h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q== +display_name = M. Tester +user_id = testid +email = tester@ceph.com diff --git a/src/test/rgw/s3vectors/tox.ini b/src/test/rgw/s3vectors/tox.ini new file mode 100644 index 00000000000..be9aa92b4cc --- /dev/null +++ b/src/test/rgw/s3vectors/tox.ini @@ -0,0 +1,9 @@ +[tox] +envlist = py +skipsdist = True + +[testenv] +deps = -rrequirements.txt +passenv = + S3VTESTS_CONF +commands = pytest {posargs}