]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw/s3vector: add integration tests
authorYuval Lifshitz <ylifshit@ibm.com>
Tue, 28 Oct 2025 05:30:48 +0000 (05:30 +0000)
committerYuval Lifshitz <ylifshit@ibm.com>
Tue, 28 Oct 2025 05:30:48 +0000 (05:30 +0000)
do some fixes to message validation based on the tesst

Signed-off-by: Yuval Lifshitz <ylifshit@ibm.com>
src/rgw/rgw_s3vector.cc
src/rgw/rgw_s3vector.h
src/test/rgw/s3vectors/README.rst [new file with mode: 0644]
src/test/rgw/s3vectors/__init__.py [new file with mode: 0644]
src/test/rgw/s3vectors/pytest.ini [new file with mode: 0644]
src/test/rgw/s3vectors/requirements.txt [new file with mode: 0644]
src/test/rgw/s3vectors/s3vector_test.py [new file with mode: 0644]
src/test/rgw/s3vectors/s3vtests.conf.SAMPLE [new file with mode: 0644]
src/test/rgw/s3vectors/tox.ini [new file with mode: 0644]

index 2e6b579c2351f8e13c03af0bfc3d128fdde10c76..513d66865e4e551adb0d89a9c4118073563975fb 100644 (file)
@@ -332,7 +332,7 @@ void list_vectors_t::dump(ceph::Formatter* f) const {
 void list_vectors_t::decode_json(JSONObj* obj) {
   decode_name_or_arn("indexName", "indexArn", index_name, index_arn, obj);
   decode_name("vectorBucketName", vector_bucket_name, obj);
-  JSONDecoder::decode_json("maxResults", max_results, obj);
+  JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj);
   JSONDecoder::decode_json("nextToken", next_token, obj);
   JSONDecoder::decode_json("returnData", return_data, obj);
   JSONDecoder::decode_json("returnMetadata", return_metadata, obj);
@@ -377,16 +377,16 @@ void list_vector_buckets_t::dump(ceph::Formatter* f) const {
 }
 
 void list_vector_buckets_t::decode_json(JSONObj* obj) {
-  JSONDecoder::decode_json("maxResults", max_results, obj);
+  JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj);
   JSONDecoder::decode_json("nextToken", next_token, obj);
   JSONDecoder::decode_json("prefix", prefix, obj);
 
-  if (max_results < 1 || max_results > 1000) {
-    throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 1000, got {}", max_results));
+  if (max_results < 1 || max_results > 500) {
+    throw JSONDecoder::err(fmt::format("maxResults must be between 1 and 500, got {}", max_results));
   }
 
-  if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 2048)) {
-    throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 2048, got {}", next_token.length()));
+  if (!next_token.empty() && (next_token.length() < 1 || next_token.length() > 512)) {
+    throw JSONDecoder::err(fmt::format("nextToken length must be between 1 and 512, got {}", next_token.length()));
   }
 
   if (!prefix.empty() && (prefix.length() < 1 || prefix.length() > 63)) {
@@ -456,7 +456,7 @@ void list_indexes_t::dump(ceph::Formatter* f) const {
 }
 
 void list_indexes_t::decode_json(JSONObj* obj) {
-  JSONDecoder::decode_json("maxResults", max_results, obj);
+  JSONDecoder::decode_json("maxResults", max_results, default_max_results, obj);
   JSONDecoder::decode_json("nextToken", next_token, obj);
   JSONDecoder::decode_json("prefix", prefix, obj);
   decode_name_or_arn("vectorBucketName", "vectorBucketArn", vector_bucket_name, vector_bucket_arn, obj);
index f8542492048c7f4f40b599d55e50adbb5673be85..41dc92180fa6200c0e5a14e94c174213a9def2b0 100644 (file)
@@ -281,7 +281,8 @@ struct list_vectors_t {
   std::string index_arn;
   std::string index_name;
   std::string vector_bucket_name;
-  unsigned int max_results = 500;
+  static constexpr unsigned int default_max_results = 500;
+  unsigned int max_results = default_max_results;
   std::string next_token;
   bool return_data = false;
   bool return_metadata = false;
@@ -313,7 +314,8 @@ WRITE_CLASS_ENCODER(list_vectors_t)
   }
 */
 struct list_vector_buckets_t {
-  unsigned int max_results = 500;
+  static constexpr unsigned int default_max_results = 500;
+  unsigned int max_results = default_max_results;
   std::string next_token;
   std::string prefix;
 
@@ -400,7 +402,8 @@ WRITE_CLASS_ENCODER(get_index_t)
   }
 */
 struct list_indexes_t {
-  unsigned int max_results = 500;
+  static constexpr unsigned int default_max_results = 500;
+  unsigned int max_results = default_max_results;
   std::string next_token;
   std::string prefix;
   std::string vector_bucket_arn;
diff --git a/src/test/rgw/s3vectors/README.rst b/src/test/rgw/s3vectors/README.rst
new file mode 100644 (file)
index 0000000..e5d16c1
--- /dev/null
@@ -0,0 +1,12 @@
+===============
+s3vectors Tests
+===============
+
+* Start the cluster using the `vstart.sh` script
+* Run the test from within the `src/test/rgw/s3vectors` directory:
+  `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox`
+* To run a specific tests use:
+  `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox -- s3vector_test.py::<test_name>`
+* To run a group of tests use:
+  `S3VTESTS_CONF=s3vtests.conf.SAMPLE tox -- s3vector_test.py -m "<marker name>"
+
diff --git a/src/test/rgw/s3vectors/__init__.py b/src/test/rgw/s3vectors/__init__.py
new file mode 100644 (file)
index 0000000..6843ecb
--- /dev/null
@@ -0,0 +1,60 @@
+import configparser
+import os
+import pytest
+
+def setup():
+    cfg = configparser.RawConfigParser()
+    try:
+        path = os.environ['S3VTESTS_CONF']
+    except KeyError:
+        raise RuntimeError(
+            'To run tests, point environment '
+            + 'variable s3VTESTS_CONF to a config file.',
+            )
+    cfg.read(path)
+
+    if not cfg.defaults():
+        raise RuntimeError('Your config file is missing the DEFAULT section!')
+    if not cfg.has_section("s3 main"):
+        raise RuntimeError('Your config file is missing the "s3 main" section!')
+
+    defaults = cfg.defaults()
+
+       # vars from the DEFAULT section
+    global default_host
+    default_host = defaults.get("host")
+
+    global default_port
+    default_port = int(defaults.get("port"))
+       # vars from the main section
+    global main_access_key
+    main_access_key = cfg.get('s3 main',"access_key")
+
+    global main_secret_key
+    main_secret_key = cfg.get('s3 main',"secret_key")
+
+
+def get_config_host():
+    global default_host
+    return default_host
+
+
+def get_config_port():
+    global default_port
+    return default_port
+
+
+def get_access_key():
+    global main_access_key
+    return main_access_key
+
+
+def get_secret_key():
+    global main_secret_key
+    return main_secret_key
+
+
+@pytest.fixture(autouse=True, scope="package")
+def configfile():
+    setup()
+
diff --git a/src/test/rgw/s3vectors/pytest.ini b/src/test/rgw/s3vectors/pytest.ini
new file mode 100644 (file)
index 0000000..4dbe74c
--- /dev/null
@@ -0,0 +1,5 @@
+[pytest]
+markers =
+  vector_bucket_test
+  index_test
+  vector_test
diff --git a/src/test/rgw/s3vectors/requirements.txt b/src/test/rgw/s3vectors/requirements.txt
new file mode 100644 (file)
index 0000000..c9d2a69
--- /dev/null
@@ -0,0 +1,3 @@
+boto3 >=1.0.0
+botocore >=1.39.5
+pytest
diff --git a/src/test/rgw/s3vectors/s3vector_test.py b/src/test/rgw/s3vectors/s3vector_test.py
new file mode 100644 (file)
index 0000000..510ca85
--- /dev/null
@@ -0,0 +1,343 @@
+import logging
+import json
+import tempfile
+import random
+import socket
+import time
+import threading
+import subprocess
+import os
+import stat
+import string
+import pytest
+import boto3
+from botocore.config import Config
+
+from . import(
+    configfile,
+    get_config_host,
+    get_config_port,
+    get_access_key,
+    get_secret_key
+    )
+
+
+# configure logging for the tests module
+log = logging.getLogger(__name__)
+
+num_buckets = 0
+run_prefix=''.join(random.choice(string.ascii_lowercase) for _ in range(6))
+
+test_path = os.path.normpath(os.path.dirname(os.path.realpath(__file__))) + '/../'
+
+def bash(cmd, **kwargs):
+    log.debug('running command: %s', ' '.join(cmd))
+    kwargs['stdout'] = subprocess.PIPE
+    process = subprocess.Popen(cmd, **kwargs)
+    s = process.communicate()[0].decode('utf-8')
+    return (s, process.returncode)
+
+
+def admin(args, **kwargs):
+    """ radosgw-admin command """
+    cmd = [test_path + 'test-rgw-call.sh', 'call_rgw_admin', 'noname'] + args
+    return bash(cmd, **kwargs)
+
+
+def gen_bucket_name():
+    global num_buckets
+
+    num_buckets += 1
+    return run_prefix + '-' + str(num_buckets)
+
+
+def connection():
+    hostname = get_config_host()
+    port_no = get_config_port()
+    access_key = get_access_key()
+    secret_key = get_secret_key()
+    if port_no == 443 or port_no == 8443:
+        scheme = 'https://'
+    else:
+        scheme = 'http://'
+
+    client = boto3.client('s3vectors',
+            endpoint_url=scheme+hostname+':'+str(port_no),
+            aws_access_key_id=access_key,
+            aws_secret_access_key=secret_key,
+            config=Config(signature_version='s3'))
+
+    return client
+
+
+def another_user(tenant=None):
+    access_key = str(time.time())
+    secret_key = str(time.time())
+    uid = 'superman' + str(time.time())
+    if tenant:
+        _, result = admin(['user', 'create', '--uid', uid, '--tenant', tenant, '--access-key', access_key, '--secret-key', secret_key, '--display-name', '"Super Man"'])
+    else:
+        _, result = admin(['user', 'create', '--uid', uid, '--access-key', access_key, '--secret-key', secret_key, '--display-name', '"Super Man"'])
+
+    assert result == 0
+    hostname = get_config_host()
+    port_no = get_config_port()
+    if port_no == 443 or port_no == 8443:
+        scheme = 'https://'
+    else:
+        scheme = 'http://'
+
+    client = boto3.client('s3vectors',
+            endpoint_url=scheme+hostname+':'+str(port_no),
+            aws_access_key_id=access_key,
+            aws_secret_access_key=secret_key,
+            config=Config(signature_version='s3'))
+
+    return client
+
+
+#################
+# s3vectors tests
+#################
+
+@pytest.mark.vector_bucket_test
+def test_create_vector_bucket():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_bucket_test
+def test_get_vector_bucket():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.get_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_bucket_test
+def test_delete_vector_bucket():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.get_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #with pytest.raises(conn.exceptions.NoSuchVectorBucket):
+    #    result = conn.get_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_bucket_test
+def test_list_vector_bucket():
+    conn = connection()
+    bucket_name1 = gen_bucket_name()
+    bucket_name2 = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name1)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name2)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.list_vector_buckets()
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #bucket_names = [b['Name'] for b in result['VectorBuckets']]
+    #assert bucket_name1 in bucket_names
+    #assert bucket_name2 in bucket_names
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name1)
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name2)
+
+
+@pytest.mark.index_test
+def test_create_index():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.index_test
+def test_get_index():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.index_test
+def test_delete_index():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.delete_index(vectorBucketName=bucket_name, indexName=index_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #with pytest.raises(conn.exceptions.NoSuchIndex):
+    #    result = conn.get_index(vectorBucketName=bucket_name, indexName=index_name)
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.index_test
+def test_list_indexes():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name1 = 'test-index1'
+    index_name2 = 'test-index2'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name1, dataType='float32', dimension=128, distanceMetric='cosine')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name2, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.list_indexes(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #index_names = [i['IndexName'] for i in result['Indexes']]
+    #assert index_name1 in index_names
+    #assert index_name2 in index_names
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+def generate_data(dimension):
+    return {'float32': [float(j) for j in range(dimension)]}
+
+
+def generate_vectors(num_vectors, dimension):
+    vectors = []
+    for i in range(num_vectors):
+        vectors.append({
+            'key': 'vec-' + str(i),
+            'data': generate_data(dimension)
+            })
+    return vectors
+
+
+@pytest.mark.vector_test
+def test_put_vectors():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vectors = generate_vectors(10, 128)
+    result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_test
+def test_get_vectors():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vectors = generate_vectors(10, 128)
+    result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vector_ids = ['vec-' + str(i) for i in range(10)]
+    result = conn.get_vectors(vectorBucketName=bucket_name, indexName=index_name, keys=vector_ids)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #assert len(result['Vectors']) == 10
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_test
+def test_list_vectors():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vectors = generate_vectors(10, 128)
+    result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.list_vectors(vectorBucketName=bucket_name, indexName=index_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #assert len(result['Vectors']) == 10
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_test
+def test_delete_vectors():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vectors = generate_vectors(10, 128)
+    result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vector_ids = ['vec-' + str(i) for i in range(10)]
+    result = conn.delete_vectors(vectorBucketName=bucket_name, indexName=index_name, keys=vector_ids)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    result = conn.list_vectors(vectorBucketName=bucket_name, indexName=index_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #assert len(result['Vectors']) == 0
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
+
+@pytest.mark.vector_test
+def test_query_vectors():
+    conn = connection()
+    bucket_name = gen_bucket_name()
+    result = conn.create_vector_bucket(vectorBucketName=bucket_name)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    index_name = 'test-index'
+    result = conn.create_index(vectorBucketName=bucket_name, indexName=index_name, dataType='float32', dimension=128, distanceMetric='euclidean')
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    vectors = generate_vectors(10, 128)
+    result = conn.put_vectors(vectorBucketName=bucket_name, indexName=index_name, vectors=vectors)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    query_vector = generate_data(128)
+    result = conn.query_vectors(vectorBucketName=bucket_name, indexName=index_name, queryVector=query_vector, topK=5)
+    assert result['ResponseMetadata']['HTTPStatusCode'] == 200
+    # not implemented yet
+    #assert len(result['Results']) == 5
+    # cleanup
+    _ = conn.delete_vector_bucket(vectorBucketName=bucket_name)
+
diff --git a/src/test/rgw/s3vectors/s3vtests.conf.SAMPLE b/src/test/rgw/s3vectors/s3vtests.conf.SAMPLE
new file mode 100644 (file)
index 0000000..eb3291d
--- /dev/null
@@ -0,0 +1,10 @@
+[DEFAULT]
+port = 8000
+host = localhost
+
+[s3 main]
+access_key = 0555b35654ad1656d804
+secret_key = h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==
+display_name = M. Tester
+user_id = testid
+email = tester@ceph.com
diff --git a/src/test/rgw/s3vectors/tox.ini b/src/test/rgw/s3vectors/tox.ini
new file mode 100644 (file)
index 0000000..be9aa92
--- /dev/null
@@ -0,0 +1,9 @@
+[tox]
+envlist = py
+skipsdist = True
+
+[testenv]
+deps = -rrequirements.txt
+passenv =
+  S3VTESTS_CONF
+commands = pytest {posargs}