From 91343a8328b0b13cdc111443fe7ace2dd1765166 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Thu, 14 Jul 2011 15:07:47 -0700 Subject: [PATCH] Add control character detection Signed-off-by: Colin McCabe --- src/common/utf8.c | 17 +++++++++++++++++ src/common/utf8.h | 9 +++++++++ src/test/utf8.cc | 17 +++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/src/common/utf8.c b/src/common/utf8.c index 11957b3143564..f6a1b07b5956f 100644 --- a/src/common/utf8.c +++ b/src/common/utf8.c @@ -164,3 +164,20 @@ int check_utf8_cstr(const char *buf) { return check_utf8(buf, strlen(buf)); } + +int check_for_control_characters(const char *buf, int len) +{ + int i; + for (i = 0; i < len; ++i) { + unsigned char c = (unsigned char)buf[i]; + if (((c != 0U) && (c < 0x20U)) || (c == 0x7fU)) { + return i + 1; + } + } + return 0; +} + +int check_for_control_characters_cstr(const char *buf) +{ + return check_for_control_characters(buf, strlen(buf)); +} diff --git a/src/common/utf8.h b/src/common/utf8.h index e1c891cef5926..e2b25b94c33d3 100644 --- a/src/common/utf8.h +++ b/src/common/utf8.h @@ -31,6 +31,15 @@ int check_utf8(const char *buf, int len); */ int check_utf8_cstr(const char *buf); +/* Checks if a buffer contains control characters. + * We do count newline as a control character, but not NULL. + */ +int check_for_control_characters(const char *buf, int len); + +/* Checks if a null-terminated string contains control characters. + */ +int check_for_control_characters_cstr(const char *buf); + #ifdef __cplusplus } #endif diff --git a/src/test/utf8.cc b/src/test/utf8.cc index 7c1a4de24eb64..8e22e524ec17f 100644 --- a/src/test/utf8.cc +++ b/src/test/utf8.cc @@ -47,3 +47,20 @@ TEST(IsValidUtf8, InvalidUtf8) { uint8_t invalid2[] = { 0xc3, 0x28 }; ASSERT_NE(0, check_utf8((char*)invalid2, sizeof(invalid2))); } + +TEST(HasControlChars, HasControlChars1) { + uint8_t has_control_chars[] = { 0x41, 0x01, 0x00 }; + ASSERT_NE(0, check_for_control_characters_cstr((const char*)has_control_chars)); + uint8_t has_control_chars2[] = { 0x7f, 0x41, 0x00 }; + ASSERT_NE(0, check_for_control_characters_cstr((const char*)has_control_chars2)); + + char has_newline[] = "blah blah\n"; + ASSERT_NE(0, check_for_control_characters_cstr(has_newline)); + + char no_control_chars[] = "blah blah"; + ASSERT_EQ(0, check_for_control_characters_cstr(no_control_chars)); + + uint8_t validutf[] = { 0x66, 0xd1, 0x86, 0xd1, 0x9d, 0xd2, 0xa0, 0xd3, + 0xad, 0xd3, 0xae, 0x0 }; + ASSERT_EQ(0, check_for_control_characters_cstr((const char*)validutf)); +} -- 2.39.5