From: Casey Bodley Date: Fri, 22 Nov 2019 18:16:10 +0000 (-0500) Subject: common: add iterator-based string splitter X-Git-Tag: v15.1.1~86^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fab7c649fd4498460bbf564fd50bd68da59ce503;p=ceph.git common: add iterator-based string splitter ranged-for loop example: for (std::string_view s : split(input)) { ... container initialization example: auto parts = split(input); std::vector strings; strings.assign(parts.begin(), parts.end()); Signed-off-by: Casey Bodley --- diff --git a/src/common/split.h b/src/common/split.h new file mode 100644 index 000000000000..1b12963cef15 --- /dev/null +++ b/src/common/split.h @@ -0,0 +1,107 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include + +namespace ceph { + +// a forward iterator over the parts of a split string +class spliterator { + std::string_view str; // full string + std::string_view delims; // delimiters + + using size_type = std::string_view::size_type; + size_type pos = 0; // start position of current part + std::string_view part; // view of current part + + // return the next part after the given position + std::string_view next(size_type end) { + pos = str.find_first_not_of(delims, end); + if (pos == str.npos) { + return {}; + } + return str.substr(pos, str.find_first_of(delims, pos) - pos); + } + public: + // types required by std::iterator_traits + using difference_type = int; + using value_type = std::string_view; + using pointer = const value_type*; + using reference = const value_type&; + using iterator_category = std::forward_iterator_tag; + + spliterator() = default; + + spliterator(std::string_view str, std::string_view delims) + : str(str), delims(delims), pos(0), part(next(0)) + {} + + spliterator& operator++() { + part = next(pos + part.size()); + return *this; + } + spliterator operator++(int) { + spliterator tmp = *this; + part = next(pos + part.size()); + return tmp; + } + + reference operator*() const { return part; } + pointer operator->() const { return ∂ } + + friend bool operator==(const spliterator& lhs, const spliterator& rhs) { + return lhs.part.data() == rhs.part.data() + && lhs.part.size() == rhs.part.size(); + } + friend bool operator!=(const spliterator& lhs, const spliterator& rhs) { + return lhs.part.data() != rhs.part.data() + || lhs.part.size() != rhs.part.size(); + } +}; + +// represents an immutable range of split string parts +// +// ranged-for loop example: +// +// for (std::string_view s : split(input)) { +// ... +// +// container initialization example: +// +// auto parts = split(input); +// +// std::vector strings; +// strings.assign(parts.begin(), parts.end()); +// +class split { + std::string_view str; // full string + std::string_view delims; // delimiters + public: + split(std::string_view str, std::string_view delims = ";,= \t\n") + : str(str), delims(delims) {} + + using iterator = spliterator; + using const_iterator = spliterator; + + iterator begin() const { return {str, delims}; } + const_iterator cbegin() const { return {str, delims}; } + + iterator end() const { return {}; } + const_iterator cend() const { return {}; } +}; + +} // namespace ceph diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index 6ca76b7b5bcd..0fa36a24d177 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -303,6 +303,9 @@ add_executable(unittest_bounded_key_counter target_link_libraries(unittest_bounded_key_counter global) add_ceph_unittest(unittest_bounded_key_counter) +add_executable(unittest_split test_split.cc) +add_ceph_unittest(unittest_split) + add_executable(unittest_static_ptr test_static_ptr.cc) add_ceph_unittest(unittest_static_ptr) diff --git a/src/test/common/test_split.cc b/src/test/common/test_split.cc new file mode 100644 index 000000000000..285dea752fcc --- /dev/null +++ b/src/test/common/test_split.cc @@ -0,0 +1,119 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/split.h" +#include +#include + +namespace ceph { + +using string_list = std::initializer_list; + +bool operator==(const split& lhs, const string_list& rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} +bool operator==(const string_list& lhs, const split& rhs) { + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +TEST(split, split) +{ + EXPECT_EQ(string_list({}), split("")); + EXPECT_EQ(string_list({}), split(",")); + EXPECT_EQ(string_list({}), split(",;")); + + EXPECT_EQ(string_list({"a"}), split("a,;")); + EXPECT_EQ(string_list({"a"}), split(",a;")); + EXPECT_EQ(string_list({"a"}), split(",;a")); + + EXPECT_EQ(string_list({"a", "b"}), split("a,b;")); + EXPECT_EQ(string_list({"a", "b"}), split("a,;b")); + EXPECT_EQ(string_list({"a", "b"}), split(",a;b")); +} + +TEST(split, iterator_indirection) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + ASSERT_NE(i, parts.end()); + EXPECT_EQ("a", *i); // test operator* +} + +TEST(split, iterator_dereference) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + ASSERT_NE(i, parts.end()); + EXPECT_EQ(1, i->size()); // test operator-> +} + +TEST(split, iterator_pre_increment) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + ASSERT_NE(i, parts.end()); + + ASSERT_EQ("a", *i); + EXPECT_EQ("b", *++i); // test operator++() + EXPECT_EQ("b", *i); +} + +TEST(split, iterator_post_increment) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + ASSERT_NE(i, parts.end()); + + ASSERT_EQ("a", *i); + EXPECT_EQ("a", *i++); // test operator++(int) + ASSERT_NE(parts.end(), i); + EXPECT_EQ("b", *i); +} + +TEST(split, iterator_singular) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + + // test comparions against default-constructed 'singular' iterators + split::iterator j; + split::iterator k; + EXPECT_EQ(j, parts.end()); // singular == end + EXPECT_EQ(j, k); // singular == singular + EXPECT_NE(j, i); // singular != valid +} + +TEST(split, iterator_multipass) +{ + const auto parts = split("a,b"); + auto i = parts.begin(); + ASSERT_NE(i, parts.end()); + + // copy the iterator to test LegacyForwardIterator's multipass guarantee + auto j = i; + ASSERT_EQ(i, j); + + ASSERT_EQ("a", *i); + ASSERT_NE(parts.end(), ++i); + EXPECT_EQ("b", *i); + + ASSERT_EQ("a", *j); // test that ++i left j unmodified + ASSERT_NE(parts.end(), ++j); + EXPECT_EQ("b", *j); + + EXPECT_EQ(i, j); +} + +} // namespace ceph