From: Alex Markuze Date: Thu, 15 May 2025 19:34:06 +0000 (+0000) Subject: ceph_san code X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8a1cb95e58001067ea33908f1762ca31d6f93b69;p=ceph-client.git ceph_san code --- diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 6292db198f61..72dbf7fc1d25 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -2,9 +2,18 @@ #ifndef _FS_CEPH_DEBUG_H #define _FS_CEPH_DEBUG_H +#undef pr_fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include + +#define CEPH_SAN_STRNCPY(dest, dest_len, src, src_len) ({ \ + size_t __len = (dest_len) - 1; \ + memcpy((dest), (src), min((size_t)(src_len), __len)); \ + (dest)[min((size_t)(src_len), __len)] = '\0'; \ +}) + #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG @@ -48,6 +57,20 @@ pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ client->monc.auth->global_id, __func__, ##__VA_ARGS__) +#define bout_dbg(fmt, ...) \ + do { \ + __CEPH_SAN_LOG(1, 0, fmt, ##__VA_ARGS__); \ + } while (0) + +#define bout(fmt, ...) \ + do { \ + CEPH_SAN_LOG(fmt, ##__VA_ARGS__); \ + } while (0) + +#define boutc(client, fmt, ...) \ + do { \ + CEPH_SAN_LOG_CLIENT(client, fmt, ##__VA_ARGS__); \ + } while (0) #endif #define pr_notice_client(client, fmt, ...) \ diff --git a/include/linux/ceph/ceph_san_batch.h b/include/linux/ceph/ceph_san_batch.h new file mode 100644 index 000000000000..8c99c617b6e4 --- /dev/null +++ b/include/linux/ceph/ceph_san_batch.h @@ -0,0 +1,47 @@ +#ifndef CEPH_SAN_BATCH_H +#define CEPH_SAN_BATCH_H + +#include +#include +#include + +/* Size of each magazine (number of elements it can hold) */ +#define CEPH_SAN_MAGAZINE_SIZE 16 + +/* Structure representing a single magazine */ +struct ceph_san_magazine { + struct list_head list; /* For linking in global pools */ + unsigned int count; /* Number of elements currently in magazine */ + void *elements[CEPH_SAN_MAGAZINE_SIZE]; +}; + +/* Per-CPU magazine state */ +struct ceph_san_cpu_magazine { + struct ceph_san_magazine *mag; /* Current magazine for this CPU */ +}; + +/* Global magazine pools */ +struct ceph_san_batch { + struct list_head full_magazines; /* List of full magazines */ + struct list_head empty_magazines; /* List of empty magazines */ + spinlock_t full_lock; /* Protects full magazine list and count */ + spinlock_t empty_lock; /* Protects empty magazine list and count */ + unsigned int nr_full; /* Protected by full_lock */ + unsigned int nr_empty; /* Protected by empty_lock */ + struct ceph_san_cpu_magazine __percpu *cpu_magazines; /* Per-CPU magazines */ + struct kmem_cache *magazine_cache; /* Cache for magazine allocations */ +}; + +/* Initialize the batching system */ +int ceph_san_batch_init(struct ceph_san_batch *batch); + +/* Clean up the batching system */ +void ceph_san_batch_cleanup(struct ceph_san_batch *batch); + +/* Get an element from the batch */ +void *ceph_san_batch_get(struct ceph_san_batch *batch); + +/* Put an element back into the batch */ +void ceph_san_batch_put(struct ceph_san_batch *batch, void *element); + +#endif /* CEPH_SAN_BATCH_H */ diff --git a/include/linux/ceph/ceph_san_des.h b/include/linux/ceph/ceph_san_des.h new file mode 100644 index 000000000000..eb9d75957f66 --- /dev/null +++ b/include/linux/ceph/ceph_san_des.h @@ -0,0 +1,22 @@ +#ifndef CEPH_SAN_DES_H +#define CEPH_SAN_DES_H + +#include /* For size_t */ + +/** + * Reconstructs a formatted string from a buffer containing serialized values. + * The function uses the format string to determine the types and number of values + * to extract from the buffer. + * + * @param fmt Format string containing % specifiers + * @param buffer Buffer containing serialized values + * @param nr_args Number of arguments to process + * @param size Size of the buffer in bytes + * @param out Buffer to store the reconstructed string + * @param out_size Size of the output buffer + * @return Number of bytes written to out buffer, or -1 on error + */ +int ceph_san_des_reconstruct(const char *fmt, const void *buffer, size_t nr_args, + size_t size, char *out, size_t out_size); + +#endif /* CEPH_SAN_DES_H */ diff --git a/include/linux/ceph/ceph_san_logger.h b/include/linux/ceph/ceph_san_logger.h new file mode 100644 index 000000000000..22663a139fc6 --- /dev/null +++ b/include/linux/ceph/ceph_san_logger.h @@ -0,0 +1,196 @@ +#ifndef CEPH_SAN_LOGGER_H +#define CEPH_SAN_LOGGER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Debug poison values */ +#define CEPH_SAN_DEBUG_POISON 0 +#if CEPH_SAN_DEBUG_POISON +#define CEPH_SAN_LOG_ENTRY_POISON 0xD1E7C0DE +#endif + +/* Enable usage statistics tracking */ +#define CEPH_SAN_TRACK_USAGE 0 + +/* Global logger instance */ +extern struct ceph_san_logger g_logger; + +/* Maximum length of a log entry buffer */ +#define CEPH_SAN_CTX_POISON 0xCAFEBABE +#define CEPH_SAN_LOG_MAX_LEN 256 +#define CEPH_SAN_MAX_SOURCE_IDS 4096 +#define CEPH_SAN_MAX_CLIENT_IDS 256 + +/* Client ID cache entry */ +struct ceph_san_client_id { + char fsid[16]; /* Client FSID */ + u64 global_id; /* Client global ID */ +}; + +/* Source information mapping structure */ +struct ceph_san_source_info { + const char *file; + const char *func; + unsigned int line; + const char *fmt; /* Format string */ + int warn_count; +#if CEPH_SAN_TRACK_USAGE + atomic_t napi_usage; /* Number of times used in NAPI context */ + atomic_t task_usage; /* Number of times used in task context */ + atomic_t napi_bytes; /* Total bytes used in NAPI context */ + atomic_t task_bytes; /* Total bytes used in task context */ +#endif +}; + +/* Log entry structure - optimized for size */ +struct ceph_san_log_entry { +#if CEPH_SAN_DEBUG_POISON + u64 debug_poison; /* Debug poison value */ +#endif + u32 ts_delta; /* Time delta from base_jiffies */ + u16 source_id; /* Source ID */ + u8 client_id; /* Client ID */ + u8 len; /* Length of buffer */ + char buffer[]; /* Variable length buffer */ +}; + +/* TLS context structure */ +struct ceph_san_tls_ctx { + struct list_head list; /* List entry for global list */ + struct cephsan_pagefrag pf; /* Page fragment for this context */ + void (*release)(void *); /* Release function */ + atomic_t refcount; /* Reference count */ + struct task_struct *task; /* Associated task */ + pid_t pid; /* Process ID */ + char comm[TASK_COMM_LEN]; /* Command name */ + u64 id; /* Unique context ID */ + u64 debug_poison; /* Debug poison value */ + unsigned long base_jiffies; /* Base jiffies value for this context */ +}; + +/* Global logger state */ +struct ceph_san_logger { + struct list_head contexts; /* List of all TLS contexts */ + spinlock_t lock; /* Protects contexts list */ + struct ceph_san_batch alloc_batch; /* Batch for allocating new entries */ + struct ceph_san_batch log_batch; /* Batch for storing log entries */ + struct ceph_san_source_info source_map[CEPH_SAN_MAX_SOURCE_IDS]; /* Source info mapping */ + struct ceph_san_client_id client_map[CEPH_SAN_MAX_CLIENT_IDS]; /* Client ID mapping */ + atomic_t next_source_id; /* Next source ID to assign */ + u32 next_client_id; /* Next client ID to assign */ + spinlock_t client_lock; /* Protects client ID operations */ + unsigned long total_contexts_allocated; + u64 next_ctx_id; /* Next context ID to assign */ + spinlock_t ctx_id_lock; /* Protects context ID counter */ + struct ceph_san_tls_ctx __percpu *napi_ctxs; /* Per-CPU NAPI contexts */ +}; + +static inline void ceph_san_logger_print_stats(struct ceph_san_logger *logger) +{ + pr_debug("ceph_san_logger: total_contexts=%lu, alloc_batch={empty=%d, full=%d}, log_batch={empty=%d, full=%d}\n", + logger->total_contexts_allocated, + logger->alloc_batch.nr_empty, logger->alloc_batch.nr_full, + logger->log_batch.nr_empty, logger->log_batch.nr_full); +} + +/* Iterator for log entries in a single pagefrag */ +struct ceph_san_log_iter { + struct cephsan_pagefrag *pf; /* Pagefrag being iterated */ + u64 current_offset; /* Current offset in pagefrag */ + u64 end_offset; /* End offset in pagefrag */ + u64 prev_offset; /* Previous offset for debugging */ + u64 steps; /* Number of steps taken */ +}; + +/* Initialize the iterator for a specific pagefrag */ +void ceph_san_log_iter_init(struct ceph_san_log_iter *iter, struct cephsan_pagefrag *pf); + +/* Get next log entry, returns NULL when no more entries */ +struct ceph_san_log_entry *ceph_san_log_iter_next(struct ceph_san_log_iter *iter); + +/* Reconstruct a formatted string from a log entry */ +int ceph_san_log_reconstruct(const struct ceph_san_log_entry *entry, char *output, size_t output_size); + +/* Initialize the logging system */ +int ceph_san_logger_init(void); + +/* Clean up the logging system */ +void ceph_san_logger_cleanup(void); + +/* Get or create source ID */ +u32 ceph_san_get_source_id(const char *file, const char *func, unsigned int line, const char *fmt); + +/* Get source information for ID */ +struct ceph_san_source_info *ceph_san_get_source_info(u32 id); + +/* Check if client ID matches given fsid and global_id, returning the actual ID */ +u32 ceph_san_check_client_id(u32 id, const char *fsid, u64 global_id); + +/* Get client information for ID */ +const struct ceph_san_client_id *ceph_san_get_client_info(u32 id); + +/* Log a message */ +void* ceph_san_log(u32 source_id, u32 client_id, size_t needed_size); + +/* Get current TLS context, creating if necessary */ +struct ceph_san_tls_ctx *ceph_san_get_tls_ctx(void); + +/* Get NAPI context for current CPU */ +struct ceph_san_tls_ctx *ceph_san_get_napi_ctx(void); + +/* Set NAPI context for current CPU */ +void ceph_san_set_napi_ctx(struct ceph_san_tls_ctx *ctx); + +/* Get appropriate context based on context type */ +struct ceph_san_tls_ctx *ceph_san_get_ctx(void); + +/* Trim the current context's pagefrag by n bytes */ +int ceph_san_log_trim(unsigned int n); + +/** + * is_valid_kernel_addr - Check if address is in valid kernel address range + * @addr: Address to check + * + * Returns true if address is in valid kernel address range + */ +bool is_valid_kernel_addr(const void *addr); + +/* Helper macro for logging */ +#define __CEPH_SAN_LOG(dbg, __client_id, fmt, ...) \ + do { \ + static u32 __source_id = 0; \ + static size_t __size = 0; \ + void *___buffer = NULL; \ + if (unlikely(__source_id == 0)) { \ + __source_id = ceph_san_get_source_id(kbasename(__FILE__), __func__, __LINE__, fmt); \ + __size = ceph_san_cnt(__VA_ARGS__); \ + } \ + ___buffer = ceph_san_log(__source_id, __client_id, __size); \ + if (likely(___buffer) && __size > 0) { \ + void *___tmp = ___buffer; \ + size_t actual_size; \ + ceph_san_ser(___buffer, ##__VA_ARGS__);\ + actual_size = ___buffer - ___tmp; \ + ceph_san_log_trim(__size - actual_size); \ + } \ + } while (0) + +#define CEPH_SAN_LOG(fmt, ...) \ + __CEPH_SAN_LOG(0, 0, fmt, ##__VA_ARGS__) + +/* Helper macro for logging with client ID */ +#define CEPH_SAN_LOG_CLIENT(client, fmt, ...) \ + do { \ + static u32 __client_id; \ + __client_id = ceph_san_check_client_id(__client_id, client->fsid.fsid, client->monc.auth->global_id); \ + __CEPH_SAN_LOG(0, __client_id, fmt, ##__VA_ARGS__); \ + } while (0) + +#endif /* CEPH_SAN_LOGGER_H */ diff --git a/include/linux/ceph/ceph_san_pagefrag.h b/include/linux/ceph/ceph_san_pagefrag.h new file mode 100644 index 000000000000..8135ae527a41 --- /dev/null +++ b/include/linux/ceph/ceph_san_pagefrag.h @@ -0,0 +1,40 @@ +#ifndef CEPH_SAN_PAGEFRAG_H +#define CEPH_SAN_PAGEFRAG_H + +#include +#include +#include + +#define CEPHSAN_PAGEFRAG_SIZE (1<<19) /* 512KB */ +#define CEPHSAN_PAGEFRAG_MASK (CEPHSAN_PAGEFRAG_SIZE - 1) + +/* Pagefrag allocator structure */ +struct cephsan_pagefrag { + struct page *pages; + void *buffer; + spinlock_t lock; /* protects head */ + unsigned int head; + unsigned int alloc_count; + int active_elements; + void *last_entry; /* Pointer to the last allocated entry */ +}; + +int cephsan_pagefrag_init(struct cephsan_pagefrag *pf); +int cephsan_pagefrag_init_with_buffer(struct cephsan_pagefrag *pf, void *buffer, size_t size); +int cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n); +void *cephsan_pagefrag_get_ptr_from_tail(struct cephsan_pagefrag *pf); +void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n); +void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf); +void cephsan_pagefrag_reset(struct cephsan_pagefrag *pf); +void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val); +bool cephsan_pagefrag_is_wraparound(u64 val); + +/* Get allocation size from pagefrag allocation result */ +u64 cephsan_pagefrag_get_alloc_size(u64 val); + +#define CEPHSAN_PAGEFRAG_GET_N(val) ((val) >> 32) + +void cephsan_pagefrag_trim_head(struct cephsan_pagefrag *pf, unsigned int n); +void cephsan_pagefrag_trim(struct cephsan_pagefrag *pf, unsigned int n); + +#endif /* CEPH_SAN_PAGEFRAG_H */ diff --git a/include/linux/ceph/ceph_san_ser.h b/include/linux/ceph/ceph_san_ser.h new file mode 100644 index 000000000000..8f714890cbf6 --- /dev/null +++ b/include/linux/ceph/ceph_san_ser.h @@ -0,0 +1,218 @@ +#ifndef CEPH_SAN_SER_H +#define CEPH_SAN_SER_H + +#include +#include + +#define IS_CONST_STR_PTR(t) \ + __builtin_types_compatible_p(typeof(t), const char *) + +#define IS_STR_PTR(t) \ + __builtin_types_compatible_p(typeof(t), char *) + +#define IS_STR(t) \ + (__builtin_types_compatible_p(typeof(t), const char *) || \ + __builtin_types_compatible_p(typeof(t), char *)) + +#define __suppress_cast_warning__orig(type, value) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-to-pointer-cast\"") \ + _Pragma("GCC diagnostic ignored \"-Wpointer-to-int-cast\"") \ + ((type)(value)) \ + _Pragma("GCC diagnostic pop") + +#define __suppress_cast_warning(type, value) \ +({ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-to-pointer-cast\"") \ + _Pragma("GCC diagnostic ignored \"-Wpointer-to-int-cast\"") \ + type __scw_result; \ + __scw_result = ((type)(value)); \ + _Pragma("GCC diagnostic pop") \ + __scw_result; \ +}) + +#define ___ceph_san_concat(__a, __b) __a ## __b +#define ___ceph_san_apply(__fn, __n) ___ceph_san_concat(__fn, __n) + +#define ___ceph_san_nth(_, __1, __2, __3, __4, __5, __6, __7, __8, __9, __10, __11, __12, __13, __14, __15, \ + __16, __17, __18, __19, __20, __21, __22, __23, __24, __25, __26, __27, __28, __29, __30, __31, __32, __N, ...) __N +#define ___ceph_san_narg(...) ___ceph_san_nth(_, ##__VA_ARGS__, \ + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, \ + 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ceph_san_narg(...) ___ceph_san_narg(__VA_ARGS__) + +#define STR_MAX_SIZE 64 +#define __sizeof(x) \ + (IS_STR(x) ? STR_MAX_SIZE : \ + (sizeof(x) < 4) ? 4 : sizeof(x)) +#define ___ceph_san_cnt0() (0) +#define ___ceph_san_cnt1(__t) (__sizeof(__t)) +#define ___ceph_san_cnt2(__t, __args...) (___ceph_san_cnt1(__args) + __sizeof(__t)) +#define ___ceph_san_cnt3(__t, __args...) (___ceph_san_cnt2(__args) + __sizeof(__t)) +#define ___ceph_san_cnt4(__t, __args...) (___ceph_san_cnt3(__args) + __sizeof(__t)) +#define ___ceph_san_cnt5(__t, __args...) (___ceph_san_cnt4(__args) + __sizeof(__t)) +#define ___ceph_san_cnt6(__t, __args...) (___ceph_san_cnt5(__args) + __sizeof(__t)) +#define ___ceph_san_cnt7(__t, __args...) (___ceph_san_cnt6(__args) + __sizeof(__t)) +#define ___ceph_san_cnt8(__t, __args...) (___ceph_san_cnt7(__args) + __sizeof(__t)) +#define ___ceph_san_cnt9(__t, __args...) (___ceph_san_cnt8(__args) + __sizeof(__t)) +#define ___ceph_san_cnt10(__t, __args...) (___ceph_san_cnt9(__args) + __sizeof(__t)) +#define ___ceph_san_cnt11(__t, __args...) (___ceph_san_cnt10(__args) + __sizeof(__t)) +#define ___ceph_san_cnt12(__t, __args...) (___ceph_san_cnt11(__args) + __sizeof(__t)) +#define ___ceph_san_cnt13(__t, __args...) (___ceph_san_cnt12(__args) + __sizeof(__t)) +#define ___ceph_san_cnt14(__t, __args...) (___ceph_san_cnt13(__args) + __sizeof(__t)) +#define ___ceph_san_cnt15(__t, __args...) (___ceph_san_cnt14(__args) + __sizeof(__t)) +#define ___ceph_san_cnt16(__t, __args...) (___ceph_san_cnt15(__args) + __sizeof(__t)) +#define ___ceph_san_cnt17(__t, __args...) (___ceph_san_cnt16(__args) + __sizeof(__t)) +#define ___ceph_san_cnt18(__t, __args...) (___ceph_san_cnt17(__args) + __sizeof(__t)) +#define ___ceph_san_cnt19(__t, __args...) (___ceph_san_cnt18(__args) + __sizeof(__t)) +#define ___ceph_san_cnt20(__t, __args...) (___ceph_san_cnt19(__args) + __sizeof(__t)) +#define ___ceph_san_cnt21(__t, __args...) (___ceph_san_cnt20(__args) + __sizeof(__t)) +#define ___ceph_san_cnt22(__t, __args...) (___ceph_san_cnt21(__args) + __sizeof(__t)) +#define ___ceph_san_cnt23(__t, __args...) (___ceph_san_cnt22(__args) + __sizeof(__t)) +#define ___ceph_san_cnt24(__t, __args...) (___ceph_san_cnt23(__args) + __sizeof(__t)) +#define ___ceph_san_cnt25(__t, __args...) (___ceph_san_cnt24(__args) + __sizeof(__t)) +#define ___ceph_san_cnt26(__t, __args...) (___ceph_san_cnt25(__args) + __sizeof(__t)) +#define ___ceph_san_cnt27(__t, __args...) (___ceph_san_cnt26(__args) + __sizeof(__t)) +#define ___ceph_san_cnt28(__t, __args...) (___ceph_san_cnt27(__args) + __sizeof(__t)) +#define ___ceph_san_cnt29(__t, __args...) (___ceph_san_cnt28(__args) + __sizeof(__t)) +#define ___ceph_san_cnt30(__t, __args...) (___ceph_san_cnt29(__args) + __sizeof(__t)) +#define ___ceph_san_cnt31(__t, __args...) (___ceph_san_cnt30(__args) + __sizeof(__t)) +#define ___ceph_san_cnt32(__t, __args...) (___ceph_san_cnt31(__args) + __sizeof(__t)) +#define ceph_san_cnt(...) ___ceph_san_apply(___ceph_san_cnt, ceph_san_narg(__VA_ARGS__))(__VA_ARGS__) + +#define IS_STR_ARRAY(t) \ + __builtin_types_compatible_p(typeof(t), char []) + +#define IS_DYNAMIC_CHAR_PTR(t) \ + (__builtin_classify_type((t)) == 14 && \ + __builtin_types_compatible_p(typeof(t), char *) && \ + !__builtin_constant_p((t))) + +#define IS_STATIC_CHAR_ARRAY(t) \ + (__builtin_classify_type((t)) == 5 && \ + __builtin_types_compatible_p(typeof(t), char[]) && \ + __builtin_constant_p((t))) + +#define IS_DYNAMIC_CHAR_ARRAY(t) \ + (__builtin_classify_type((t)) == 5 && \ + __builtin_types_compatible_p(typeof(t), char[]) && \ + !__builtin_constant_p((t))) + +#define char_ptr(str) __suppress_cast_warning(char *, (str)) + +union null_str_u { + char str[8]; + unsigned long force_align; +}; + +static const union null_str_u null_str = { + .str = "(NULL) \0" +}; + +static inline size_t write_null_str(char *dst) +{ + *(union null_str_u *)dst = null_str; + static_assert(sizeof(null_str.str) == sizeof(unsigned long), + "null_str.str size must match unsigned long for proper alignment"); + return __builtin_strlen(null_str.str); +} + +static inline size_t strscpy_n(char *dst, const char *src) +{ + size_t count = 0; + + while (count < STR_MAX_SIZE - 1) { + dst[count] = src[count]; + if (src[count] == '\0') + goto out; + count++; + } + + dst[count] = '\0'; + pr_err("strscpy_n: string truncated, exceeded max size %d\n", STR_MAX_SIZE); +out: + return count + 1; +} + +static inline ssize_t __strscpy(char *dst, const char *src) +{ + if (src != NULL) + return strscpy_n(dst, src); + return write_null_str(dst); +} + +static inline void* strscpy_n_update(char *dst, const char *src, const char *file, int line) +{ + ssize_t ret = __strscpy(dst, src); + if (!(unlikely(ret > 0 && ret < STR_MAX_SIZE))) { + panic("strscpy_n_update: ret = %zd at %s:%d :: %s < - %s\n", ret, file, line, dst, src); + } + return dst + round_up(ret, 4); +} + +#define __ceph_san_ser_type(__buffer, __t) \ + (__builtin_choose_expr((IS_DYNAMIC_CHAR_PTR((__t)) || IS_STATIC_CHAR_ARRAY((__t))), \ + /* For static arrays (like __func__), just save pointer */ \ + (pr_err("DYNAMIC_PTR: %s:%d: saving pointer %llx\n", kbasename(__FILE__), __LINE__, (unsigned long long)(__t)), \ + *(void **)(__buffer) = __suppress_cast_warning(void *, (__t)), \ + (__buffer) = (void *)((char *)(__buffer) + sizeof(void *))), \ + __builtin_choose_expr(IS_STR((__t)), \ + ((__buffer) = (void *)strscpy_n_update((__buffer), char_ptr(__t), kbasename(__FILE__), __LINE__)), \ + __builtin_choose_expr(IS_STR_ARRAY((__t)), \ + /* For dynamic arrays, save NULL and string bytes */ \ + ((__buffer) = (void *)strscpy_n_update((__buffer), char_ptr(__t), kbasename(__FILE__), __LINE__)), \ + __builtin_choose_expr(sizeof((__t)) == 1, \ + (*(uint32_t *)(__buffer) = __suppress_cast_warning(uint32_t, (__t)), \ + /*pr_err("SERIALIZING_U8: %s:%d: saving uint8_t %u\n", kbasename(__FILE__), __LINE__, (__t)),*/ \ + (__buffer) = (void *)((char *)(__buffer) + 4)), \ + __builtin_choose_expr(sizeof((__t)) == 2, /* we have no way to differentiate u16 and u32 in deserialization */ \ + (*(uint32_t *)(__buffer) = __suppress_cast_warning(uint32_t, (__t)), \ + (__buffer) = (void *)((char *)(__buffer) + 4)), \ + __builtin_choose_expr(sizeof((__t)) == 4, \ + (*(uint32_t *)(__buffer) = __suppress_cast_warning(uint32_t, (__t)), \ + (__buffer) = (void *)((char *)(__buffer) + 4)), \ + __builtin_choose_expr(sizeof((__t)) == 8, \ + (*(uint64_t *)(__buffer) = __suppress_cast_warning(uint64_t, (__t)), \ + (__buffer) = (void *)((char *)(__buffer) + 8)), \ + (pr_err("UNSUPPORTED_TYPE: %s:%d: unsupported type size %s\n", kbasename(__FILE__), __LINE__, #__t)) \ + )))))))) + +#define ___ceph_san_ser0(__buffer) +#define ___ceph_san_ser1(__buffer, __t) (__ceph_san_ser_type(__buffer, __t)) +#define ___ceph_san_ser2(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser1(__buffer, __args)) +#define ___ceph_san_ser3(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser2(__buffer, __args)) +#define ___ceph_san_ser4(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser3(__buffer, __args)) +#define ___ceph_san_ser5(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser4(__buffer, __args)) +#define ___ceph_san_ser6(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser5(__buffer, __args)) +#define ___ceph_san_ser7(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser6(__buffer, __args)) +#define ___ceph_san_ser8(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser7(__buffer, __args)) +#define ___ceph_san_ser9(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser8(__buffer, __args)) +#define ___ceph_san_ser10(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser9(__buffer, __args)) +#define ___ceph_san_ser11(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser10(__buffer, __args)) +#define ___ceph_san_ser12(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser11(__buffer, __args)) +#define ___ceph_san_ser13(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser12(__buffer, __args)) +#define ___ceph_san_ser14(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser13(__buffer, __args)) +#define ___ceph_san_ser15(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser14(__buffer, __args)) +#define ___ceph_san_ser16(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser15(__buffer, __args)) +#define ___ceph_san_ser17(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser16(__buffer, __args)) +#define ___ceph_san_ser18(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser17(__buffer, __args)) +#define ___ceph_san_ser19(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser18(__buffer, __args)) +#define ___ceph_san_ser20(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser19(__buffer, __args)) +#define ___ceph_san_ser21(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser20(__buffer, __args)) +#define ___ceph_san_ser22(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser21(__buffer, __args)) +#define ___ceph_san_ser23(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser22(__buffer, __args)) +#define ___ceph_san_ser24(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser23(__buffer, __args)) +#define ___ceph_san_ser25(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser24(__buffer, __args)) +#define ___ceph_san_ser26(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser25(__buffer, __args)) +#define ___ceph_san_ser27(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser26(__buffer, __args)) +#define ___ceph_san_ser28(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser27(__buffer, __args)) +#define ___ceph_san_ser29(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser28(__buffer, __args)) +#define ___ceph_san_ser30(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser29(__buffer, __args)) +#define ___ceph_san_ser31(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser30(__buffer, __args)) +#define ___ceph_san_ser32(__buffer, __t, __args...) (__ceph_san_ser_type(__buffer, __t), ___ceph_san_ser31(__buffer, __args)) +#define ___ceph_san_ser(__buffer, ...) ___ceph_san_apply(___ceph_san_ser, ceph_san_narg(__VA_ARGS__))(__buffer, ##__VA_ARGS__) +#define ceph_san_ser(...) ___ceph_san_ser(__VA_ARGS__) + +#endif /* CEPH_SAN_SER_H */ + diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 8802a0c0155d..be61857dafcb 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -10,6 +10,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ striper.o \ debugfs.o \ + ceph_san_pagefrag.o ceph_san_batch.o ceph_san_logger.o ceph_san_des.o \ auth.o auth_none.o \ crypto.o armor.o \ auth_x.o \ diff --git a/net/ceph/ceph_san_batch.c b/net/ceph/ceph_san_batch.c new file mode 100644 index 000000000000..08d33bc79900 --- /dev/null +++ b/net/ceph/ceph_san_batch.c @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include + +/* Number of magazines to preallocate during initialization */ +#define CEPH_SAN_INIT_MAGAZINES 4 + +static struct ceph_san_magazine *alloc_magazine(struct ceph_san_batch *batch) +{ + struct ceph_san_magazine *mag; + + mag = kmem_cache_alloc(batch->magazine_cache, GFP_KERNEL); + if (!mag) + return NULL; + + INIT_LIST_HEAD(&mag->list); + mag->count = 0; + return mag; +} + +static void free_magazine(struct ceph_san_batch *batch, struct ceph_san_magazine *mag) +{ + kmem_cache_free(batch->magazine_cache, mag); +} + +/** + * ceph_san_batch_init - Initialize the batching system + * @batch: Batch structure to initialize + * + * Allocates and initializes the per-CPU magazines and global pools. + * + * Return: 0 on success, negative error code on failure + */ +int ceph_san_batch_init(struct ceph_san_batch *batch) +{ + int cpu, i; + struct ceph_san_cpu_magazine *cpu_mag; + struct ceph_san_magazine *mag; + + /* Initialize counters */ + batch->nr_full = 0; + batch->nr_empty = 0; + + /* Create magazine cache */ + batch->magazine_cache = kmem_cache_create("ceph_san_magazine", + sizeof(struct ceph_san_magazine), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!batch->magazine_cache) + return -ENOMEM; + + /* Initialize global magazine lists */ + INIT_LIST_HEAD(&batch->full_magazines); + INIT_LIST_HEAD(&batch->empty_magazines); + spin_lock_init(&batch->full_lock); + spin_lock_init(&batch->empty_lock); + + /* Allocate per-CPU magazines */ + batch->cpu_magazines = alloc_percpu(struct ceph_san_cpu_magazine); + if (!batch->cpu_magazines) + goto cleanup_cache; + + /* Initialize per-CPU magazines */ + for_each_possible_cpu(cpu) { + cpu_mag = per_cpu_ptr(batch->cpu_magazines, cpu); + cpu_mag->mag = NULL; + } + + /* Pre-allocate empty magazines */ + for (i = 0; i < CEPH_SAN_INIT_MAGAZINES; i++) { + mag = alloc_magazine(batch); + if (!mag) + goto cleanup; + + spin_lock(&batch->empty_lock); + list_add(&mag->list, &batch->empty_magazines); + batch->nr_empty++; + spin_unlock(&batch->empty_lock); + } + + return 0; + +cleanup: + ceph_san_batch_cleanup(batch); + return -ENOMEM; + +cleanup_cache: + kmem_cache_destroy(batch->magazine_cache); + return -ENOMEM; +} +EXPORT_SYMBOL(ceph_san_batch_init); + +/** + * ceph_san_batch_cleanup - Clean up the batching system + * @batch: Batch structure to clean up + */ +void ceph_san_batch_cleanup(struct ceph_san_batch *batch) +{ + int cpu; + struct ceph_san_magazine *mag, *tmp; + struct ceph_san_cpu_magazine *cpu_mag; + + /* Free per-CPU magazines */ + if (batch->cpu_magazines) { + for_each_possible_cpu(cpu) { + cpu_mag = per_cpu_ptr(batch->cpu_magazines, cpu); + if (cpu_mag->mag) + free_magazine(batch, cpu_mag->mag); + } + free_percpu(batch->cpu_magazines); + } + + /* Free magazines in the full pool */ + spin_lock(&batch->full_lock); + list_for_each_entry_safe(mag, tmp, &batch->full_magazines, list) { + list_del(&mag->list); + batch->nr_full--; + free_magazine(batch, mag); + } + spin_unlock(&batch->full_lock); + + /* Free magazines in the empty pool */ + spin_lock(&batch->empty_lock); + list_for_each_entry_safe(mag, tmp, &batch->empty_magazines, list) { + list_del(&mag->list); + batch->nr_empty--; + free_magazine(batch, mag); + } + spin_unlock(&batch->empty_lock); + + /* Destroy magazine cache */ + if (batch->magazine_cache) + kmem_cache_destroy(batch->magazine_cache); +} +EXPORT_SYMBOL(ceph_san_batch_cleanup); + +/** + * ceph_san_batch_get - Get an element from the batch + * @batch: Batch to get element from + * + * Return: Element from the magazine, or NULL if none available + */ +void *ceph_san_batch_get(struct ceph_san_batch *batch) +{ + struct ceph_san_cpu_magazine *cpu_mag; + struct ceph_san_magazine *old_mag, *new_mag; + void *element = NULL; + + cpu_mag = this_cpu_ptr(batch->cpu_magazines); + + /* If we have a magazine and it has elements, use it */ + if (cpu_mag->mag && cpu_mag->mag->count > 0) { + element = cpu_mag->mag->elements[--cpu_mag->mag->count]; + return element; + } + + /* Current magazine is empty, try to get a full one */ + old_mag = cpu_mag->mag; + + /* Return old magazine to empty pool if we have one */ + if (old_mag) { + spin_lock(&batch->empty_lock); + list_add(&old_mag->list, &batch->empty_magazines); + batch->nr_empty++; + spin_unlock(&batch->empty_lock); + cpu_mag->mag = NULL; + } + + if (batch->nr_full > 0) { + /* Try to get a full magazine */ + spin_lock(&batch->full_lock); + if (!list_empty(&batch->full_magazines)) { + new_mag = list_first_entry(&batch->full_magazines, + struct ceph_san_magazine, list); + list_del(&new_mag->list); + batch->nr_full--; + spin_unlock(&batch->full_lock); + + cpu_mag->mag = new_mag; + if (new_mag->count > 0) + element = new_mag->elements[--new_mag->count]; + } else { + spin_unlock(&batch->full_lock); + } + } + return element; +} +EXPORT_SYMBOL(ceph_san_batch_get); + +/** + * ceph_san_batch_put - Put an element back into the batch + * @batch: Batch to put element into + * @element: Element to put back + */ +void ceph_san_batch_put(struct ceph_san_batch *batch, void *element) +{ + struct ceph_san_cpu_magazine *cpu_mag; + struct ceph_san_magazine *mag; + + cpu_mag = this_cpu_ptr(batch->cpu_magazines); + + /* Optimistically try to add to current magazine */ + if (likely(cpu_mag->mag && cpu_mag->mag->count < CEPH_SAN_MAGAZINE_SIZE)) { + cpu_mag->mag->elements[cpu_mag->mag->count++] = element; + return; + } + + /* If current magazine is full, move it to full pool */ + if (likely(cpu_mag->mag && cpu_mag->mag->count >= CEPH_SAN_MAGAZINE_SIZE)) { + spin_lock(&batch->full_lock); + list_add_tail(&cpu_mag->mag->list, &batch->full_magazines); + batch->nr_full++; + spin_unlock(&batch->full_lock); + cpu_mag->mag = NULL; + } + + /* Get new magazine if needed */ + if (likely(!cpu_mag->mag)) { + /* Try to get from empty pool first */ + spin_lock(&batch->empty_lock); + if (!list_empty(&batch->empty_magazines)) { + mag = list_first_entry(&batch->empty_magazines, + struct ceph_san_magazine, list); + list_del(&mag->list); + batch->nr_empty--; + spin_unlock(&batch->empty_lock); + cpu_mag->mag = mag; + } else { + spin_unlock(&batch->empty_lock); + cpu_mag->mag = alloc_magazine(batch); + } + + if (unlikely(!cpu_mag->mag)) + return; + } + /* Add element to magazine */ + cpu_mag->mag->elements[cpu_mag->mag->count++] = element; +} +EXPORT_SYMBOL(ceph_san_batch_put); diff --git a/net/ceph/ceph_san_des.c b/net/ceph/ceph_san_des.c new file mode 100644 index 000000000000..b37ae7c849c5 --- /dev/null +++ b/net/ceph/ceph_san_des.c @@ -0,0 +1,162 @@ +#include +#include /* For strchr, strlen */ +#include /* For isdigit */ +#include /* For size_t */ +#include /* For snprintf */ + +static int parse_format_specifier(const char **fmt, char *spec) { + const char *p = *fmt; + char *s = spec; + + /* Skip the '%' */ + if (*p != '%') return -1; + *s++ = *p++; + + /* Skip flags */ + while (*p && (*p == '-' || *p == '+' || *p == ' ' || *p == '#' || *p == '0')) { + *s++ = *p++; + } + + /* Skip field width */ + while (*p && isdigit(*p)) { + *s++ = *p++; + } + + /* Skip precision */ + if (*p == '.') { + *s++ = *p++; + while (*p && isdigit(*p)) { + *s++ = *p++; + } + } + + /* Get length modifier */ + if (*p == 'h' || *p == 'l' || *p == 'L' || *p == 'j' || *p == 'z' || *p == 't') { + *s++ = *p++; + if ((*p == 'h' || *p == 'l') && *(p-1) == *p) { + *s++ = *p++; + } + } + + /* Get conversion specifier */ + if (*p && strchr("diouxXeEfFgGaAcspn%", *p)) { + *s++ = *p++; + } else { + return -1; + } + + *s = '\0'; + *fmt = p; + return 0; +} + +int ceph_san_des_reconstruct(const char *fmt, const void *buffer, size_t nr_args, + size_t size, char *out, size_t out_size) { + const unsigned char *buf = buffer; + const char *p = fmt; + char spec[32]; + size_t offset = 0; + size_t out_offset = 0; + size_t arg_count = 0; + + if (!fmt || !buffer || !out || !out_size) { + return -1; + } + //printf("Starting reconstruction with buffer at %p, size %zu, nr_args %zu, out_size %zu\n", + // buffer, size, nr_args, out_size); + while (*p && out_offset < out_size - 1) { + if (*p != '%') { + out[out_offset++] = *p++; + continue; + } + + if (parse_format_specifier(&p, spec) < 0) { + return -1; + } + + if (arg_count >= nr_args) { + return -1; + } + + /* Check buffer overflow */ + if (offset >= size) { + return -1; + } + + //printf("Processing specifier '%s' at offset %zu\n", spec, offset); + + /* Handle different format specifiers */ + switch (spec[strlen(spec)-1]) { + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': { + long long val; + const void *ptr = buf + offset; + if (strchr(spec, 'l')) { + val = *(const long long*)ptr; + offset += sizeof(long long); + } else { + val = *(const int*)ptr; + offset += sizeof(int); + } + //printf("Read integer value: %lld at address %p (offset %zu)\n", val, ptr, offset); + out_offset += snprintf(out + out_offset, out_size - out_offset, spec, val); + break; + } + + case 'f': + case 'e': + case 'E': + case 'g': + case 'G': + case 'a': + case 'A': { + double val = *(const double*)(buf + offset); + offset += sizeof(double); + //printf("Read double value: %f at offset %zu\n", val, offset - sizeof(double)); + out_offset += snprintf(out + out_offset, out_size - out_offset, spec, val); + break; + } + + case 'c': { + char val = *(const char*)(buf + offset); + offset += sizeof(char); + //printf("Read char value: %c at offset %zu\n", val, offset - sizeof(char)); + out_offset += snprintf(out + out_offset, out_size - out_offset, spec, val); + break; + } + + case 's': { + const char *val = *(const char**)(buf + offset); + offset += sizeof(const char*); + //printf("Read string pointer: %p at offset %zu\n", val, offset - sizeof(const char*)); + out_offset += snprintf(out + out_offset, out_size - out_offset, spec, val); + break; + } + + case 'p': { + const void *val = *(const void**)(buf + offset); + offset += sizeof(const void*); + //printf("Read pointer value: %p at offset %zu\n", val, offset - sizeof(const void*)); + out_offset += snprintf(out + out_offset, out_size - out_offset, spec, val); + break; + } + + case '%': { + out[out_offset++] = '%'; + break; + } + + default: + return -1; + } + + arg_count++; + } + + out[out_offset] = '\0'; + return out_offset; +} diff --git a/net/ceph/ceph_san_logger.c b/net/ceph/ceph_san_logger.c new file mode 100644 index 000000000000..7cea1eceb4de --- /dev/null +++ b/net/ceph/ceph_san_logger.c @@ -0,0 +1,1133 @@ +/* Standard kernel includes */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Ceph SAN includes */ +#include +#include +#include +#include +#include + +static void ceph_san_tls_release_verbose(void *ptr); +#define NULL_STR "(NULL)" +/** + * is_valid_kernel_addr - Check if address is in valid kernel address range + * @addr: Address to check + * + * Returns true if address is in valid kernel address range + */ +bool is_valid_kernel_addr(const void *addr) +{ + if (virt_addr_valid(addr)) { + return true; + } + return false; +} +EXPORT_SYMBOL(is_valid_kernel_addr); + +#define CEPH_SAN_LOG_BATCH_MAX_FULL 16 +/* Global logger instance */ +struct ceph_san_logger g_logger; +EXPORT_SYMBOL(g_logger); + +/** + * get_context_id - Get a unique context ID + * + * Acquires a unique ID for a TLS context using the global counter + * + * Returns a unique context ID + */ +static u64 get_context_id(void) +{ + u64 id; + spin_lock(&g_logger.ctx_id_lock); + id = g_logger.next_ctx_id++; + spin_unlock(&g_logger.ctx_id_lock); + return id; +} + +/** + * validate_tls_ctx - Validate a TLS context + * @ctx: Context to validate + * + * Returns true if context is valid, false otherwise + */ +static inline bool validate_tls_ctx(struct ceph_san_tls_ctx *ctx) +{ + if (!ctx) + return false; + + if (ctx->debug_poison != CEPH_SAN_CTX_POISON) { + pr_err("BUG: TLS context id=%llu (%llx) has invalid debug_poison value 0x%llx\n", + ctx->id, (unsigned long long)ctx, (unsigned long long)ctx->debug_poison); + return false; + } + + if (atomic_read(&ctx->refcount) != 1) { + pr_err("BUG: TLS context id=%llu (%llx) refcount %d, expected 1\n", + ctx->id, (unsigned long long)ctx, atomic_read(&ctx->refcount)); + return false; + } + + return true; +} + +static inline struct ceph_san_tls_ctx *get_tls_ctx(void) +{ + struct ceph_san_tls_ctx *ctx = current->tls_ctx; + if (likely(ctx)) { + ctx = container_of((void *)ctx, struct ceph_san_tls_ctx, release); + } + return ctx; +} + +/** + * add_context_to_global_list - Add a context to the global list + * @ctx: The context to add to the global list + * + * Adds the context to the global list of contexts and updates stats + */ +static void add_context_to_global_list(struct ceph_san_tls_ctx *ctx) +{ + spin_lock(&g_logger.lock); + list_add(&ctx->list, &g_logger.contexts); + g_logger.total_contexts_allocated++; + spin_unlock(&g_logger.lock); +} + +static void *alloc_tls_ctx(void) +{ + struct ceph_san_tls_ctx *ctx; + ctx = kmem_cache_alloc(g_logger.alloc_batch.magazine_cache, GFP_KERNEL); + if (!ctx) { + pr_err("Failed to allocate TLS context from magazine cache\n"); + return NULL; + } + + /* Initialize pagefrag */ + memset(&ctx->pf, 0, sizeof(ctx->pf)); + if (cephsan_pagefrag_init(&ctx->pf)) { + pr_err("Failed to initialize pagefrag for TLS context\n"); + kmem_cache_free(g_logger.alloc_batch.magazine_cache, ctx); + return NULL; + } + + /* Assign unique ID and initialize debug poison */ + ctx->debug_poison = CEPH_SAN_CTX_POISON; + atomic_set(&ctx->refcount, 0); + ctx->id = get_context_id(); + add_context_to_global_list(ctx); + + ctx->release = ceph_san_tls_release_verbose; + + pr_debug("[%d]ceph_san_logger: initialized refcount=0 for new context id=%llu (%llx)\n", + smp_processor_id(), ctx->id, (unsigned long long)ctx); + + return ctx; +} + +static inline struct ceph_san_tls_ctx *get_new_ctx(void) +{ + struct ceph_san_tls_ctx *ctx; + + /* Try to get context from batch first */ + ctx = ceph_san_batch_get(&g_logger.alloc_batch); + if (!ctx) { + /* Create new context if batch is empty */ + ctx = alloc_tls_ctx(); /* alloc_tls_ctx sets poison, id, refcount=0 */ + if (!ctx) + return NULL; /* alloc_tls_ctx already prints error if kmem_cache_alloc fails */ + } + + /* Verify debug poison on context from batch or fresh allocation */ + if (ctx->debug_poison != CEPH_SAN_CTX_POISON) { + pr_err("BUG: Context id=%llu from batch/alloc has invalid debug_poison 0x%llx\n", + ctx->id, (unsigned long long)ctx->debug_poison); + BUG(); + } + + ctx->base_jiffies = jiffies; + cephsan_pagefrag_reset(&ctx->pf); + ceph_san_logger_print_stats(&g_logger); /* Moved from original new context block */ + return ctx; /* Context returned with refcount = 0 */ +} + +/** + * is_valid_active_ctx - Validate an active TLS context + * @ctx: Context to validate + * @context_description: String describing the context for error messages + * + * Returns true if context is valid (poison OK, refcount == 1), false otherwise + */ +static inline bool is_valid_active_ctx(struct ceph_san_tls_ctx *ctx, const char *context_description) +{ + if (!ctx) { + pr_err("BUG: %s context is NULL.\n", context_description); + return false; /* Should not happen if called after a NULL check */ + } + + if (ctx->debug_poison != CEPH_SAN_CTX_POISON) { + pr_err("BUG: %s context id=%llu (%llx) has invalid debug_poison value 0x%llx\n", + context_description, ctx->id, (unsigned long long)ctx, + (unsigned long long)ctx->debug_poison); + return false; + } + + if (atomic_read(&ctx->refcount) != 1) { + pr_err("BUG: %s context id=%llu (%llx) refcount %d, expected 1\n", + context_description, ctx->id, (unsigned long long)ctx, + atomic_read(&ctx->refcount)); + return false; + } + return true; +} + +static void free_tls_ctx(void *ptr) +{ + struct ceph_san_tls_ctx *ctx = ptr; + + if (!ctx) { + pr_err("BUG: Trying to free NULL TLS context\n"); + return; + } + + if (ctx->debug_poison != CEPH_SAN_CTX_POISON) { + pr_err("BUG: TLS context id=%llu has invalid debug_poison value 0x%llx\n", + ctx->id, (unsigned long long)ctx->debug_poison); + return; + } + + if (atomic_read(&ctx->refcount) != 0) { + pr_err("BUG: Freeing TLS context id=%llu with non-zero refcount %d\n", + ctx->id, atomic_read(&ctx->refcount)); + return; + } + + pr_err("ceph_san_logger: freeing context id=%llu\n", ctx->id); + cephsan_pagefrag_deinit(&ctx->pf); + kmem_cache_free(g_logger.alloc_batch.magazine_cache, ctx); +} + +/* Release function for TLS storage */ +static void ceph_san_tls_release(void *ptr) +{ + struct ceph_san_tls_ctx *ctx = ptr; + + if (!ctx) + return; + + if (atomic_dec_return(&ctx->refcount) != 0) { + pr_err("BUG: TLS context id=%llu refcount %d after release\n", + ctx->id, atomic_read(&ctx->refcount)); + panic("ceph_san_logger: TLS context id=%llu refcount %d after release\n", ctx->id, atomic_read(&ctx->refcount)); + } + pr_debug("ceph_san_logger: decremented refcount=0 for context id=%llu\n", ctx->id); + + /* Add context to log batch */ + ctx->task = NULL; + pr_debug("ceph_san_logger: releasing TLS context for pid %d [%s]\n", + ctx->pid, ctx->comm); + ceph_san_batch_put(&g_logger.log_batch, ctx); + + /* If log_batch has too many full magazines, move one to alloc_batch */ + if (g_logger.log_batch.nr_full > CEPH_SAN_LOG_BATCH_MAX_FULL) { + struct ceph_san_magazine *mag; + spin_lock(&g_logger.log_batch.full_lock); + if (!list_empty(&g_logger.log_batch.full_magazines)) { + mag = list_first_entry(&g_logger.log_batch.full_magazines, + struct ceph_san_magazine, list); + list_del(&mag->list); + g_logger.log_batch.nr_full--; + spin_unlock(&g_logger.log_batch.full_lock); + + spin_lock(&g_logger.alloc_batch.full_lock); + list_add(&mag->list, &g_logger.alloc_batch.full_magazines); + g_logger.alloc_batch.nr_full++; + spin_unlock(&g_logger.alloc_batch.full_lock); + } else { + spin_unlock(&g_logger.log_batch.full_lock); + } + } +} + +static void ceph_san_tls_release_verbose(void *ptr) +{ + struct ceph_san_tls_ctx *ctx = container_of(ptr, struct ceph_san_tls_ctx, release); + if (!ctx) { + pr_err("ceph_san_logger -- Callback : invalid TLS context pointer %d\n", current->pid); + return; + } + if (ctx->debug_poison != CEPH_SAN_CTX_POISON) { + pr_err("ceph_san_logger -- Callback : invalid TLS context id=%llu has invalid debug_poison value 0x%llx\n", + ctx->id, (unsigned long long)ctx->debug_poison); + BUG(); + } + if (atomic_read(&ctx->refcount) != 1) { + pr_err("ceph_san_logger -- Callback : invalid TLS context refcount %d for pid %d [%s]\n", + atomic_read(&ctx->refcount), ctx->pid, ctx->comm); + BUG(); + } + ceph_san_tls_release(ctx); +} +/** + * ceph_san_get_tls_ctx - Get or create TLS context for current task + * + * Returns pointer to TLS context or NULL on error + */ +struct ceph_san_tls_ctx *ceph_san_get_tls_ctx(void) +{ + struct ceph_san_tls_ctx *ctx = get_tls_ctx(); /* Inline helper, gets container_of */ + + if (ctx) { + if (!is_valid_active_ctx(ctx, "Existing TLS")) { + current->tls_ctx = NULL; /* Invalidate bad pointer */ + BUG(); + } + return ctx; + } + + /* Create new context */ + pr_debug("ceph_san_logger: creating new TLS context for pid %d [%s]\n", + current->pid, current->comm); + + ctx = get_new_ctx(); /* Get base context with refcount 0 */ + if (!ctx) + return NULL; + + /* Set up TLS specific parts */ + current->tls_ctx = (void *)&ctx->release; + ctx->task = current; + ctx->pid = current->pid; + strncpy(ctx->comm, current->comm, TASK_COMM_LEN); + ctx->comm[TASK_COMM_LEN - 1] = '\0'; /* Ensure null termination */ + + /* Increment refcount from 0 to 1 */ + if (atomic_inc_return(&ctx->refcount) != 1) { + pr_err("BUG: Failed to set refcount=1 for new TLS context id=%llu (was %d before inc)\n", + ctx->id, atomic_read(&ctx->refcount) - 1); + current->tls_ctx = NULL; /* Don't leave partially set up context */ + BUG(); + } + + pr_debug("ceph_san_logger: successfully created new TLS context id=%llu for pid %d [%s]\n", + ctx->id, ctx->pid, ctx->comm); + return ctx; +} +EXPORT_SYMBOL(ceph_san_get_tls_ctx); + +/** + * ceph_san_get_source_id - Get or create a source ID for the given location + * @file: Source file name + * @func: Function name + * @line: Line number + * @fmt: Format string + * + * Returns a unique ID for this source location + */ +u32 ceph_san_get_source_id(const char *file, const char *func, unsigned int line, const char *fmt) +{ + u32 id = atomic_inc_return(&g_logger.next_source_id); + + if (id >= CEPH_SAN_MAX_SOURCE_IDS) { + /* If we run out of IDs, just use the first one */ + pr_warn("ceph_san_logger: source ID overflow, reusing ID 1\n"); + id = 1; + } + + /* Store the source information in the global map */ + g_logger.source_map[id].file = file; + g_logger.source_map[id].func = func; + g_logger.source_map[id].line = line; + g_logger.source_map[id].fmt = fmt; + g_logger.source_map[id].warn_count = 0; + return id; +} +EXPORT_SYMBOL(ceph_san_get_source_id); + +/** + * ceph_san_get_source_info - Get source info for a given ID + * @id: Source ID + * + * Returns the source information for this ID + */ +struct ceph_san_source_info *ceph_san_get_source_info(u32 id) +{ + if (unlikely(id == 0 || id >= CEPH_SAN_MAX_SOURCE_IDS)) + return NULL; + return &g_logger.source_map[id]; +} +EXPORT_SYMBOL(ceph_san_get_source_info); + +/** + * ceph_san_check_client_id - Check if a client ID matches the given fsid:global_id pair + * @id: Client ID to check + * @fsid: Client FSID to compare + * @global_id: Client global ID to compare + * + * Returns the actual ID of the pair. If the given ID doesn't match, scans for + * existing matches or allocates a new ID if no match is found. + */ +u32 ceph_san_check_client_id(u32 id, const char *fsid, u64 global_id) +{ + u32 found_id = 0; + char fsid_readable[64]; + struct ceph_san_client_id *entry; + u32 max_id; + + /* First check if the given ID matches */ + if (id != 0 && id < CEPH_SAN_MAX_CLIENT_IDS) { + entry = &g_logger.client_map[id]; + if (memcmp(entry->fsid, fsid, sizeof(entry->fsid)) == 0 && + entry->global_id == global_id) { + found_id = id; + goto out_fast; + } + } + + spin_lock(&g_logger.client_lock); + max_id = g_logger.next_client_id; + + /* Scan for existing match */ + for (id = 1; id < max_id && id < CEPH_SAN_MAX_CLIENT_IDS; id++) { + entry = &g_logger.client_map[id]; + if (memcmp(entry->fsid, fsid, sizeof(entry->fsid)) == 0 && + entry->global_id == global_id) { + found_id = id; + goto out; + } + } + + /* No match found, allocate new ID */ + found_id = ++g_logger.next_client_id; + if (found_id >= CEPH_SAN_MAX_CLIENT_IDS) { + /* If we run out of IDs, just use the first one */ + pr_warn("ceph_san_logger: client ID overflow, reusing ID 1\n"); + found_id = 1; + } + snprintf(fsid_readable, sizeof(fsid_readable), + "%02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x%02x%02x", + fsid[0], fsid[1], fsid[2], fsid[3], fsid[4], fsid[5], fsid[6], fsid[7], + fsid[8], fsid[9], fsid[10], fsid[11], fsid[12], fsid[13], fsid[14], fsid[15]); + pr_info("ceph_san_logger: allocating new client ID %u (next=%u) for fsid=%s global_id=%llu\n", + found_id, g_logger.next_client_id, fsid_readable, global_id); + entry = &g_logger.client_map[found_id]; + memcpy(entry->fsid, fsid, sizeof(entry->fsid)); + entry->global_id = global_id; + +out: + spin_unlock(&g_logger.client_lock); +out_fast: + return found_id; +} +EXPORT_SYMBOL(ceph_san_check_client_id); + +/** + * ceph_san_get_client_info - Get client info for a given ID + * @id: Client ID + * + * Returns the client information for this ID + */ +const struct ceph_san_client_id *ceph_san_get_client_info(u32 id) +{ + if (id == 0 || id >= CEPH_SAN_MAX_CLIENT_IDS) + return NULL; + return &g_logger.client_map[id]; +} +EXPORT_SYMBOL(ceph_san_get_client_info); + +/** + * ceph_san_log - Log a message + * @source_id: Source ID for this location + * @client_id: Client ID for this message + * @needed_size: Size needed for the message + * + * Returns a buffer to write the message into + */ +void* ceph_san_log(u32 source_id, u32 client_id, size_t needed_size) +{ + struct ceph_san_tls_ctx *ctx; + struct ceph_san_log_entry *entry = NULL; + u64 alloc; + int retry_count = 0; + +#if CEPH_SAN_TRACK_USAGE + struct ceph_san_source_info *source; +#endif + needed_size = round_up(needed_size + sizeof(struct ceph_san_log_entry), 8); +#if CEPH_SAN_TRACK_USAGE + /* Get source info to update stats */ + source = ceph_san_get_source_info(source_id); + if (unlikely(source)) { + if (in_serving_softirq()) { + atomic_inc(&source->napi_usage); + atomic_add(needed_size, &source->napi_bytes); + } else { + atomic_inc(&source->task_usage); + atomic_add(needed_size, &source->task_bytes); + } + } +#endif + + while (entry == NULL) { + ctx = ceph_san_get_ctx(); + if (!ctx) { + pr_err("Failed to get TLS context\n"); + return NULL; + } + if (!is_valid_kernel_addr(ctx)) { + pr_err("ceph_san_log: invalid TLS context address: %pK\n", ctx); + return NULL; + } + if (unlikely(retry_count)) { + pr_debug("[%d]Retrying allocation with ctx %llu (%s, pid %d) (retry %d, needed_size=%zu @ %d)\n", + smp_processor_id(), ctx->id, ctx->comm, ctx->pid, retry_count, needed_size, source_id); + } + + alloc = cephsan_pagefrag_alloc(&ctx->pf, needed_size); + if (alloc == (u64)-ENOMEM) { + pr_debug("[%d]ceph_san_log: pagefrag full for ctx %llu (%s, pid %d), refcount=%d. Alloc failed (retry=%d): pf head=%u active_elements=%d alloc_count=%u, needed_size=%zu, pagefrag_size=%u\n", + smp_processor_id(), + ctx->id, ctx->comm, ctx->pid, atomic_read(&ctx->refcount), retry_count, ctx->pf.head, + ctx->pf.active_elements, ctx->pf.alloc_count, + needed_size, CEPHSAN_PAGEFRAG_SIZE); + + /* Invalidate the correct active context slot before releasing and retrying */ + if (in_serving_softirq()) { + if (this_cpu_read(g_logger.napi_ctxs) == ctx) { + pr_debug("[%d]ceph_san_log: Clearing NAPI slot for ctx %llu (CPU %d) due to ENOMEM.\n", smp_processor_id(), ctx->id, smp_processor_id()); + this_cpu_write(g_logger.napi_ctxs, NULL); + } else { + pr_warn("[%d]ceph_san_log: ENOMEM for ctx %llu (%s, pid %d) in softirq, but it wasn't in current CPU's NAPI slot. NAPI slot holds %p. Refcount: %d.\n", + smp_processor_id(), ctx->id, ctx->comm, ctx->pid, this_cpu_read(g_logger.napi_ctxs), atomic_read(&ctx->refcount)); + } + } else { + if (current->tls_ctx == (void *)&ctx->release) { + pr_debug("[%d]ceph_san_log: Clearing current->tls_ctx for TLS ctx %llu due to ENOMEM.\n", smp_processor_id(), ctx->id); + current->tls_ctx = NULL; + } else { + pr_warn("[%d]ceph_san_log: ENOMEM for ctx %llu (%s, pid %d) not in softirq, but it wasn't current->tls_ctx. current->tls_ctx is %p. Refcount: %d.\n", + smp_processor_id(), ctx->id, ctx->comm, ctx->pid, current->tls_ctx, atomic_read(&ctx->refcount)); + } + } + + ++retry_count; + ceph_san_tls_release(ctx); /* This decrements refcount, ctx may be reused or freed */ + entry = NULL; /* Ensure we loop to get a new context */ + continue; + } + //TODO:: remove this shit alloc should return a ptr + entry = cephsan_pagefrag_get_ptr(&ctx->pf, alloc); + if (unlikely(!is_valid_kernel_addr(entry))) { + pr_debug("[%d]ceph_san_log: invalid log entry pointer: %llx from ctx %llu (%s, pid %d)\n", + smp_processor_id(), (unsigned long long)entry, ctx->id, ctx->comm, ctx->pid); + ceph_san_tls_release(ctx); /* Release the context as we can't use the entry */ + entry = NULL; /* force retry to get a new context and page */ + continue; + } + if (unlikely(retry_count)) { + pr_debug("[%d]Successfully allocated with ctx %llu (%s, pid %d) after %d retries (needed_size=%zu @ %d)\n", + smp_processor_id(), ctx->id, ctx->comm, ctx->pid, retry_count, needed_size, source_id); + } + } + + /* Update last_entry pointer */ + ctx->pf.last_entry = entry; + + /* Fill in entry details */ +#if CEPH_SAN_DEBUG_POISON + entry->debug_poison = CEPH_SAN_LOG_ENTRY_POISON; +#endif + entry->ts_delta = (u32)(jiffies - ctx->base_jiffies); + entry->source_id = (u16)source_id; + entry->client_id = (u8)client_id; + entry->len = (u8)needed_size; + return entry->buffer; +} +EXPORT_SYMBOL(ceph_san_log); + +/** + * ceph_san_get_napi_ctx - Get NAPI context for current CPU + * + * Returns pointer to NAPI context or NULL if not set + */ +struct ceph_san_tls_ctx *ceph_san_get_napi_ctx(void) +{ + struct ceph_san_tls_ctx *ctx = this_cpu_read(g_logger.napi_ctxs); + + if (ctx) { + if (!is_valid_active_ctx(ctx, "NAPI")) { + pr_err("BUG: Invalid NAPI context found for CPU %d, clearing.\n", smp_processor_id()); + this_cpu_write(g_logger.napi_ctxs, NULL); + return NULL; + } + } + return ctx; +} +EXPORT_SYMBOL(ceph_san_get_napi_ctx); + +/** + * ceph_san_set_napi_ctx - Set NAPI context for current CPU + * @ctx: Context to set + */ +void ceph_san_set_napi_ctx(struct ceph_san_tls_ctx *ctx) +{ + if (ctx && !is_valid_active_ctx(ctx, "New NAPI being set")) { + BUG(); /* Context should be valid and refcount 1 before being set */ + } + this_cpu_write(g_logger.napi_ctxs, ctx); +} +EXPORT_SYMBOL(ceph_san_set_napi_ctx); + +/** + * ceph_san_get_ctx - Get appropriate context based on context type + * + * Returns pointer to appropriate context or NULL on error + */ +struct ceph_san_tls_ctx *ceph_san_get_ctx(void) +{ + /* If we're in NAPI context, use per-CPU context */ + if (in_serving_softirq()) { + struct ceph_san_tls_ctx *ctx = ceph_san_get_napi_ctx(); /* This validates existing NAPI ctx */ + if (ctx) { + return ctx; + } + /* Create new NAPI context if none exists */ + pr_debug("ceph_san_logger: creating new NAPI context for CPU %d\n", smp_processor_id()); + + ctx = get_new_ctx(); /* Get base context with refcount 0 */ + if (!ctx) + return NULL; + + /* Set up NAPI specific parts */ + ctx->task = NULL; + ctx->pid = 0; /* Or some other indicator like -1 or smp_processor_id() */ + snprintf(ctx->comm, TASK_COMM_LEN, "NAPI-%d", smp_processor_id()); + ctx->comm[TASK_COMM_LEN - 1] = '\0'; /* Ensure null termination */ + + /* Increment refcount from 0 to 1 */ + if (atomic_inc_return(&ctx->refcount) != 1) { + pr_err("BUG: Failed to set refcount=1 for new NAPI context id=%llu (was %d before inc)\n", + ctx->id, atomic_read(&ctx->refcount) - 1); + /* TODO: Consider if ctx needs to be removed from global list or freed differently if BUGging here */ + BUG(); + } + + ceph_san_set_napi_ctx(ctx); /* Stores it in per-CPU slot and does poison check */ + + pr_debug("ceph_san_logger: successfully created new NAPI context id=%llu for CPU %d\n", + ctx->id, smp_processor_id()); + return ctx; + } + /* Otherwise use thread-local context */ + return ceph_san_get_tls_ctx(); +} +EXPORT_SYMBOL(ceph_san_get_ctx); + +/** + * ceph_san_logger_init - Initialize the logging system + * + * Returns 0 on success, negative error code on failure + */ +int ceph_san_logger_init(void) +{ + int ret; + + /* Initialize global state */ + INIT_LIST_HEAD(&g_logger.contexts); + spin_lock_init(&g_logger.lock); + spin_lock_init(&g_logger.ctx_id_lock); + atomic_set(&g_logger.next_source_id, 0); + g_logger.next_ctx_id = 1; /* Start IDs from 1 */ + + /* Initialize per-CPU NAPI contexts */ + g_logger.napi_ctxs = alloc_percpu(struct ceph_san_tls_ctx); + if (!g_logger.napi_ctxs) { + pr_err("Failed to allocate per-CPU NAPI contexts\n"); + return -ENOMEM; + } + + /* Initialize allocation batch */ + ret = ceph_san_batch_init(&g_logger.alloc_batch); + if (ret) + goto cleanup_napi; + + /* Initialize log batch */ + ret = ceph_san_batch_init(&g_logger.log_batch); + if (ret) + goto cleanup_alloc; + + return 0; + +cleanup_alloc: + ceph_san_batch_cleanup(&g_logger.alloc_batch); +cleanup_napi: + free_percpu(g_logger.napi_ctxs); + return ret; +} +EXPORT_SYMBOL(ceph_san_logger_init); + +/** + * ceph_san_logger_cleanup - Clean up the logging system + */ +void ceph_san_logger_cleanup(void) +{ + struct ceph_san_tls_ctx *ctx, *tmp; + int cpu; + + /* Clean up all TLS contexts */ + spin_lock(&g_logger.lock); + list_for_each_entry_safe(ctx, tmp, &g_logger.contexts, list) { + list_del(&ctx->list); + free_tls_ctx(ctx); + } + spin_unlock(&g_logger.lock); + + /* Clean up per-CPU NAPI contexts */ + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(g_logger.napi_ctxs, cpu); + if (ctx) { + free_tls_ctx(ctx); + } + } + free_percpu(g_logger.napi_ctxs); + + /* Clean up batches */ + ceph_san_batch_cleanup(&g_logger.alloc_batch); + ceph_san_batch_cleanup(&g_logger.log_batch); +} +EXPORT_SYMBOL(ceph_san_logger_cleanup); + +/** + * ceph_san_log_iter_init - Initialize the log entry iterator for a specific pagefrag + * @iter: Iterator structure to initialize + * @pf: Pagefrag to iterate over + */ +void ceph_san_log_iter_init(struct ceph_san_log_iter *iter, struct cephsan_pagefrag *pf) +{ + /* Initialize iterator state */ + iter->pf = pf; + iter->steps = 0; + iter->current_offset = 0; // Start from the beginning + iter->end_offset = pf->head; + iter->prev_offset = 0; +} +EXPORT_SYMBOL(ceph_san_log_iter_init); + +/** + * ceph_san_log_iter_next - Get the next log entry from the iterator + * @iter: Iterator structure + * + * Returns the next log entry or NULL if no more entries are available. + */ +struct ceph_san_log_entry *ceph_san_log_iter_next(struct ceph_san_log_iter *iter) +{ + struct ceph_san_log_entry *entry; + + if (!iter->pf || iter->current_offset == iter->end_offset) + return NULL; + + entry = cephsan_pagefrag_get_ptr(iter->pf, iter->current_offset); + + if (!entry || !is_valid_kernel_addr(entry)) { + pr_err("ceph_san_log_iter_next: invalid entry pointer %p\n", entry); + return NULL; + } + +#if CEPH_SAN_DEBUG_POISON + if (entry->debug_poison != CEPH_SAN_LOG_ENTRY_POISON || entry->len == 0) { + if (iter->steps > iter->pf->active_elements) { + pr_err("ceph_san_log_iter_next: invalid entry pointer %p\n", entry); + } + return NULL; + } +#endif + iter->steps++; + iter->prev_offset = iter->current_offset; + iter->current_offset += entry->len; + + if (iter->steps > iter->pf->active_elements || iter->current_offset > iter->end_offset) { + pr_err("ceph_san_log_iter_next: steps: %llu, active_elements: %u, entry_len: %u\n", + iter->steps, iter->pf->active_elements, entry->len); + pr_err("ceph_san_log_iter_next: pagefrag details:\n" + " head: %u, current: %llu\n" + " prev_offset: %llu, end_offset: %llu\n" + " active_elements: %d, alloc_count: %u\n", + iter->pf->head, iter->current_offset, + iter->prev_offset, iter->end_offset, + iter->pf->active_elements, iter->pf->alloc_count); + BUG(); + } + + return entry; +} +EXPORT_SYMBOL(ceph_san_log_iter_next); + +/** + * ceph_san_log_reconstruct - Reconstruct a formatted string from a log entry + * @entry: Log entry containing serialized data + * @output: Buffer to write the formatted string to + * @output_size: Size of the output buffer + * + * Returns length of formatted string, or negative error code on failure + */ +int ceph_san_log_reconstruct(const struct ceph_san_log_entry *entry, char *output, size_t output_size) +{ + const struct ceph_san_source_info *info; + const char *fmt; + char *in_buffer, *out_ptr; + int ret; + int arg_count = 0; + size_t remaining = output_size - 1; // Reserve space for null terminator + + if (!entry || !output || output_size == 0) { + pr_err("ceph_san_log_reconstruct: invalid parameters\n"); + return -EINVAL; + } + + /* Verify entry is a valid kernel address */ + if (!is_valid_kernel_addr(entry)) { + pr_err("ceph_san_log_reconstruct: invalid entry pointer %p\n", entry); + return -EFAULT; + } + /* Dump entry buffer pointer and validate */ + pr_debug("ceph_san_log_reconstruct: entry buffer pointer %llx (len %u) is %s\n", + (unsigned long long)entry->buffer, entry->len, + is_valid_kernel_addr(entry->buffer) ? "valid" : "invalid"); + /* Verify entry buffer is a valid kernel address */ + if (!is_valid_kernel_addr(entry->buffer)) { + pr_err("ceph_san_log_reconstruct: invalid buffer pointer %p for entry %p\n", + entry->buffer, entry); + return -EFAULT; + } + + // Get format string from source info + info = ceph_san_get_source_info(entry->source_id); + if (!info) { + pr_err("ceph_san_log_reconstruct: source info not found for ID %u\n", entry->source_id); + return -EINVAL; + } + + fmt = info->fmt; + if (!fmt) { + pr_err("ceph_san_log_reconstruct: format string not found in source info for ID %u\n", entry->source_id); + return -EINVAL; + } + + in_buffer = (char *)(entry->buffer); + out_ptr = output; + *out_ptr = '\0'; + + // Process the format string + while (*fmt && remaining > 0) { + if (*fmt != '%') { + // Copy regular characters + *out_ptr++ = *fmt++; + remaining--; + continue; + } + arg_count++; + fmt++; // Skip the '%' + // Skip width specifiers + while (*fmt >= '0' && *fmt <= '9') + fmt++; + + // Handle format specifiers + switch (*fmt) { + case '%': // Literal % + *out_ptr++ = '%'; + remaining--; + break; + case 's': { // String + const char *str; + int type = 0; + // Inline string + str = in_buffer; + if (!is_valid_kernel_addr(str)) { + pr_err("ceph_san_log_reconstruct (%d): invalid inline string pointer %llx\n", arg_count, (unsigned long long)str); + return -EFAULT; + } + in_buffer += round_up(strlen(str) + 1, 4); + size_t len = strlen(str); + if (len > remaining) + len = remaining; + pr_debug("reconstruct: writing %s string '%s' (len=%zu) at out_offset=%ld\n", + type ? "pointer" : "inline", str, strlen(str), in_buffer - entry->buffer); + memcpy(out_ptr, str, len); + out_ptr += len; + remaining -= len; + break; + } + + case 'd': case 'i': { // Integer + int val = *(int *)in_buffer; + in_buffer += sizeof(int); + pr_debug("reconstruct: reading int %d at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%d", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'u': { // Unsigned integer + unsigned int val = *(unsigned int *)in_buffer; + in_buffer += sizeof(unsigned int); + pr_debug("reconstruct: reading int %u at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%u", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'x': case 'X': { // Hex + unsigned int val = *(unsigned int *)in_buffer; + in_buffer += sizeof(unsigned int); + pr_debug("reconstruct: reading int %u at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, (*fmt == 'x') ? "%x" : "%X", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'p': { // Pointer + void *val = *(void **)in_buffer; + in_buffer += sizeof(void *); + pr_debug("reconstruct: reading pointer %llx at in_offset=%ld\n", + (unsigned long long)val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%p", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'o': { // Octal + unsigned int val = *(unsigned int *)in_buffer; + in_buffer += sizeof(unsigned int); + pr_debug("reconstruct: reading int %u (octal: %o) at in_offset=%ld\n", + val, val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%o", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'l': { // Long or long long + fmt++; + if (*fmt == 'l') { // Long long + fmt++; + if (*fmt == 'd' || *fmt == 'i') { + long long val = *(long long *)in_buffer; + in_buffer += sizeof(long long); + pr_debug("reconstruct: reading long long %lld at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%lld", val); + } else if (*fmt == 'u') { + unsigned long long val = *(unsigned long long *)in_buffer; + in_buffer += sizeof(unsigned long long); + pr_debug("reconstruct: reading long long %llu at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%llu", val); + } else if (*fmt == 'x') { + unsigned long long val = *(unsigned long long *)in_buffer; + in_buffer += sizeof(unsigned long long); + pr_debug("reconstruct: reading long long %llu at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%llx", val); + } else if (*fmt == 'X') { + unsigned long long val = *(unsigned long long *)in_buffer; + in_buffer += sizeof(unsigned long long); + pr_debug("reconstruct: reading long long %llu at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%llX", val); + } else if (*fmt == 'o') { + unsigned long long val = *(unsigned long long *)in_buffer; + in_buffer += sizeof(unsigned long long); + pr_debug("reconstruct: reading long long %llu (octal: %llo) at in_offset=%ld\n", + val, val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%llo", val); + } else { + pr_err("ceph_san_log_reconstruct: invalid long long format specifier '%%%c%c%c'\n", 'l', 'l', *fmt); + return -EINVAL; + } + } else { // Long + if (*fmt == 'd' || *fmt == 'i') { + long val = *(long *)in_buffer; + in_buffer += sizeof(long); + pr_debug("reconstruct: reading long %ld at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%ld", val); + } else if (*fmt == 'u') { + unsigned long val = *(unsigned long *)in_buffer; + in_buffer += sizeof(unsigned long); + pr_debug("reconstruct: reading long %lu at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%lu", val); + } else if (*fmt == 'x') { + unsigned long val = *(unsigned long *)in_buffer; + in_buffer += sizeof(unsigned long); + pr_debug("reconstruct: reading long %lx at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%lx", val); + } else if (*fmt == 'X') { + unsigned long val = *(unsigned long *)in_buffer; + in_buffer += sizeof(unsigned long); + pr_debug("reconstruct: reading long %lx at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%lX", val); + } else if (*fmt == 'o') { + unsigned long val = *(unsigned long *)in_buffer; + in_buffer += sizeof(unsigned long); + pr_debug("reconstruct: reading long %lu (octal: %lo) at in_offset=%ld\n", + val, val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%lo", val); + } else { + pr_err("ceph_san_log_reconstruct: invalid long format specifier '%%l%c'\n", *fmt); + return -EINVAL; + } + } + + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + break; + } + + case 'z': { + fmt++; // Look at next character + if (*fmt == 'u' || *fmt == 'd') { + size_t val = *(size_t *)in_buffer; + in_buffer += sizeof(size_t); + pr_debug("reconstruct: reading size_t %zu at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, (*fmt == 'u') ? "%zu" : "%zd", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + } else if (*fmt == 'x' || *fmt == 'X') { + size_t val = *(size_t *)in_buffer; + in_buffer += sizeof(size_t); + pr_debug("reconstruct: reading size_t %zx at in_offset=%ld\n", + val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, (*fmt == 'x') ? "%zx" : "%zX", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + } else if (*fmt == 'o') { + size_t val = *(size_t *)in_buffer; + in_buffer += sizeof(size_t); + pr_debug("reconstruct: reading size_t %zu (octal: %zo) at in_offset=%ld\n", + val, val, in_buffer - entry->buffer); + ret = snprintf(out_ptr, remaining, "%zo", val); + if (ret > 0) { + if (ret > remaining) + ret = remaining; + out_ptr += ret; + remaining -= ret; + } + } else { + pr_err("ceph_san_log_reconstruct: invalid size_t format specifier '%%z%c'\n", *fmt); + return -EINVAL; + } + break; + } + + default: + // Unknown format specifier + pr_debug("ceph_san_log_reconstruct: unknown format specifier '%%%c' in fmt string\n", *fmt); + return -EINVAL; + } + + fmt++; + } + if (out_ptr > output && *(out_ptr - 1) == '\n') { + *(out_ptr - 1) = '\0'; + } else { + *out_ptr = '\0'; + } + + return output_size - remaining - 1; +} +EXPORT_SYMBOL(ceph_san_log_reconstruct); + +/** + * ceph_san_log_trim - Trim the current context's pagefrag by n bytes + * @n: number of bytes to trim from the head + * + * Returns 0 on success, negative error code on failure. + */ +int ceph_san_log_trim(unsigned int n) +{ + struct ceph_san_tls_ctx *ctx; + struct ceph_san_log_entry *entry; +#if CEPH_SAN_TRACK_USAGE + struct ceph_san_source_info *source; +#endif + + ctx = ceph_san_get_tls_ctx(); + if (!ctx) + return -ENOMEM; + + entry = ctx->pf.last_entry; + if (!entry) + return -EINVAL; + + /* Get the source info to update bytes */ +#if CEPH_SAN_TRACK_USAGE + source = ceph_san_get_source_info(entry->source_id); + if (source) { + if (in_serving_softirq()) { + atomic_sub(n, &source->napi_bytes); + } else { + atomic_sub(n, &source->task_bytes); + } + } +#endif + + entry->len -= n; + cephsan_pagefrag_trim(&ctx->pf, n); + return 0; +} +EXPORT_SYMBOL(ceph_san_log_trim); diff --git a/net/ceph/ceph_san_pagefrag.c b/net/ceph/ceph_san_pagefrag.c new file mode 100644 index 000000000000..1a13b27baa5e --- /dev/null +++ b/net/ceph/ceph_san_pagefrag.c @@ -0,0 +1,154 @@ +#include "linux/printk.h" +#include +#include +#include +#include + +/** + * cephsan_pagefrag_init - Initialize the pagefrag allocator. + * + * Allocates a 4MB contiguous buffer and resets head and tail pointers. + * + * Return: 0 on success, negative error code on failure. + */ +int cephsan_pagefrag_init(struct cephsan_pagefrag *pf) +{ + spin_lock_init(&pf->lock); + pf->pages = alloc_pages(GFP_KERNEL, get_order(CEPHSAN_PAGEFRAG_SIZE)); + if (!pf->pages) { + pr_err("ceph_san_pagefrag_init: alloc_pages failed\n"); + return -ENOMEM; + } + + pf->buffer = page_address(pf->pages); + pf->head = 0; + pf->active_elements = 0; + pf->alloc_count = 0; + pf->last_entry = NULL; + memset(pf->buffer, 0xc, CEPHSAN_PAGEFRAG_SIZE); + pr_debug("ceph_san_pagefrag_init: buffer range %llx - %llx\n", + (unsigned long long)pf->buffer, (unsigned long long)pf->buffer + CEPHSAN_PAGEFRAG_SIZE); + return 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_init); + +/** + * cephsan_pagefrag_init_with_buffer - Initialize pagefrag with an existing buffer + * @pf: pagefrag allocator to initialize + * @buffer: pre-allocated buffer to use + * @size: size of the buffer + * + * Return: 0 on success + */ +int cephsan_pagefrag_init_with_buffer(struct cephsan_pagefrag *pf, void *buffer, size_t size) +{ + spin_lock_init(&pf->lock); + pf->pages = NULL; /* No pages allocated, using provided buffer */ + pf->buffer = buffer; + pf->head = 0; + pf->active_elements = 0; + pf->alloc_count = 0; + pf->last_entry = NULL; + return 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_init_with_buffer); + +/** + * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer. + * @n: number of bytes to allocate. + * + * Allocates @n bytes if there is sufficient free space in the buffer. + * Advances the head pointer by @n bytes (wrapping around if needed). + * + * Return: pointer to the allocated memory, or NULL if not enough space. + */ +int cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n) +{ + u64 offset; + if (pf->head + n > CEPHSAN_PAGEFRAG_SIZE) { + return -ENOMEM; // No space left + } + offset = pf->head; + pf->head += n; + pf->alloc_count++; + pf->active_elements++; + return offset; +} +EXPORT_SYMBOL(cephsan_pagefrag_alloc); + +/** + * cephsan_pagefrag_get_ptr - Get buffer pointer from pagefrag allocation result + * @pf: pagefrag allocator + * @val: return value from cephsan_pagefrag_alloc + * + * Return: pointer to allocated buffer region + */ +void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val) +{ + void *rc = (void *)(pf->buffer + val); + if (unlikely(pf->buffer != page_address(pf->pages))) { + pr_err("ceph_san_pagefrag_get_ptr: invalid buffer pointer %llx @ %s\n", (unsigned long long)pf->buffer, current->comm); + BUG(); + } + if (unlikely((rc) < pf->buffer || (rc) >= (pf->buffer + CEPHSAN_PAGEFRAG_SIZE))) { + pr_err("ceph_san_pagefrag_get_ptr: invalid pointer %llx\n", (unsigned long long)rc); + BUG(); + } + return rc; +} +EXPORT_SYMBOL(cephsan_pagefrag_get_ptr); + +/** + * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator. + * + * Frees the allocated buffer and resets the head and tail pointers. + */ +void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf) +{ + if (pf->pages) { + free_pages((unsigned long)pf->pages, get_order(CEPHSAN_PAGEFRAG_SIZE)); + pf->pages = NULL; + } + /* Don't free buffer if it was provided externally */ + pf->buffer = NULL; + pf->head = 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_deinit); + +/** + * cephsan_pagefrag_reset - Reset the pagefrag allocator. + * + * Resets the head and tail pointers to the beginning of the buffer. + */ +void cephsan_pagefrag_reset(struct cephsan_pagefrag *pf) +{ + spin_lock(&pf->lock); + pf->head = 0; + pf->active_elements = 0; + pf->alloc_count = 0; + pf->last_entry = NULL; + spin_unlock(&pf->lock); +} +EXPORT_SYMBOL(cephsan_pagefrag_reset); + +void cephsan_pagefrag_trim_head(struct cephsan_pagefrag *pf, unsigned int n) +{ + if (n > pf->head) + pf->head = 0; + else + pf->head -= n; +} +EXPORT_SYMBOL(cephsan_pagefrag_trim_head); + +void cephsan_pagefrag_trim(struct cephsan_pagefrag *pf, unsigned int n) +{ + if (n >= pf->head) { + pf->head = 0; + pf->active_elements = 0; + pf->alloc_count = 0; + pf->last_entry = NULL; + } else { + pf->head -= n; + } +} +EXPORT_SYMBOL(cephsan_pagefrag_trim);