From c5a281b054e6c6c757e2ec15cb53b63783dca023 Mon Sep 17 00:00:00 2001 From: Alex Markuze Date: Mon, 10 Feb 2025 16:05:50 +0000 Subject: [PATCH] Adding ceph_san --- fs/ceph/debugfs.c | 50 ++++++++- fs/ceph/super.c | 8 ++ fs/ceph/super.h | 1 + include/linux/ceph/ceph_debug.h | 16 ++- include/linux/ceph/ceph_san.h | 116 +++++++++++++++++++ net/ceph/Makefile | 1 + net/ceph/ceph_san.c | 191 ++++++++++++++++++++++++++++++++ net/ceph/messenger_v2.c | 24 ++-- 8 files changed, 391 insertions(+), 16 deletions(-) create mode 100644 include/linux/ceph/ceph_san.h create mode 100644 net/ceph/ceph_san.c diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fdf9dc15eafae..9fa6203bb7411 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -22,6 +23,46 @@ #include "mds_client.h" #include "metric.h" +static int ceph_san_show(struct seq_file *s, void *p) +{ + struct ceph_san_tls_logger *tls; + size_t cpu; + + seq_printf(s, "Ceph SAN logs:\n"); + seq_printf(s, "%-16s %-8s %-32s\n", + "Task", "PID", "Log"); + seq_printf(s, "--------------------------------------------------------------\n"); + + for_each_possible_cpu(cpu) { + tls = &per_cpu(ceph_san_tls, cpu); + int i; + + int idx = 0; + int head_idx = tls->head_idx & (CEPH_SAN_MAX_LOGS - 1); + int tail_idx = (head_idx + 1) & (CEPH_SAN_MAX_LOGS - 1); + + for (i = tail_idx; (i & (CEPH_SAN_MAX_LOGS -1)) != head_idx; i++) { + struct timespec64 ts; + struct ceph_san_log_entry *log = &tls->logs[i & (CEPH_SAN_MAX_LOGS -1)]; + jiffies_to_timespec64(log->ts, &ts); + + if (log->ts == 0) { + continue; + } + seq_printf(s, "%zu:%lld.%09ld:%d) %-16s %-8d:%s\n", + cpu, + (long long)ts.tv_sec, + ts.tv_nsec, + idx++, + log->comm, + log->pid, + log->buf); + } + } + + return 0; +} + static int mdsmap_show(struct seq_file *s, void *p) { int i; @@ -371,6 +412,7 @@ DEFINE_SHOW_ATTRIBUTE(metrics_file); DEFINE_SHOW_ATTRIBUTE(metrics_latency); DEFINE_SHOW_ATTRIBUTE(metrics_size); DEFINE_SHOW_ATTRIBUTE(metrics_caps); +DEFINE_SHOW_ATTRIBUTE(ceph_san); /* @@ -406,13 +448,14 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_caps); debugfs_remove(fsc->debugfs_status); debugfs_remove(fsc->debugfs_mdsc); + debugfs_remove(fsc->debugfs_cephsan); debugfs_remove_recursive(fsc->debugfs_metrics_dir); doutc(fsc->client, "done\n"); } void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { - char name[100]; + char name[NAME_MAX]; doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = @@ -458,6 +501,11 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) fsc->client->debugfs_dir, fsc, &status_fops); + fsc->debugfs_cephsan = debugfs_create_file("cephsan", + 0444, + fsc->client->debugfs_dir, + fsc, + &ceph_san_fops); fsc->debugfs_metrics_dir = debugfs_create_dir("metrics", fsc->client->debugfs_dir); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4344e1f118069..6b5c1657febb2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only +#include "linux/mm_types.h" #include +#include #include #include @@ -30,6 +32,10 @@ #include + +// If no header file, declare the function prototype +// int allocate_and_add_ceph_san(void); + static DEFINE_SPINLOCK(ceph_fsc_lock); static LIST_HEAD(ceph_fsc_list); @@ -1633,6 +1639,7 @@ static int __init init_ceph(void) goto out; ceph_flock_init(); + ret = cephsan_init(); ret = register_filesystem(&ceph_fs_type); if (ret) goto out_caches; @@ -1650,6 +1657,7 @@ out: static void __exit exit_ceph(void) { dout("exit_ceph\n"); + cephsan_cleanup(); unregister_filesystem(&ceph_fs_type); destroy_caches(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index af14ec3822462..d7e52f0c79a4c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -153,6 +153,7 @@ struct ceph_fs_client { struct dentry *debugfs_status; struct dentry *debugfs_mds_sessions; struct dentry *debugfs_metrics_dir; + struct dentry *debugfs_cephsan; #endif #ifdef CONFIG_CEPH_FSCACHE diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 5f904591fa5f9..229dbda82df93 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -2,9 +2,11 @@ #ifndef _FS_CEPH_DEBUG_H #define _FS_CEPH_DEBUG_H +#undef pr_fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG @@ -16,15 +18,23 @@ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) # define dout(fmt, ...) \ - pr_debug("%.*s %12.12s:%-4d : " fmt, \ + do { \ + pr_debug("%.*s %12.12s:%-4d:" fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ - kbasename(__FILE__), __LINE__, ##__VA_ARGS__) + kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \ + CEPH_SAN_LOG("%12.12s:%-4d : " fmt, \ + kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } while (0) # define doutc(client, fmt, ...) \ + do { \ pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, \ &client->fsid, client->monc.auth->global_id, \ - ##__VA_ARGS__) + ##__VA_ARGS__); \ + CEPH_SAN_LOG("%12.12s:%-4d:" fmt, \ + kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \ + } while (0) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) \ diff --git a/include/linux/ceph/ceph_san.h b/include/linux/ceph/ceph_san.h new file mode 100644 index 0000000000000..e6d3c34ce6cae --- /dev/null +++ b/include/linux/ceph/ceph_san.h @@ -0,0 +1,116 @@ +#ifndef CEPHSAN_H +#define CEPHSAN_H + +#include +#include +#include +#include + + +DECLARE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls); +DECLARE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag); + +/* + * Pagefrag Allocator for ceph_san: + * - A contiguous 4-page buffer (16KB) is allocated. + * - The allocator maintains two unsigned int indices (head and tail) into the buffer. + * - cephsan_pagefrag_alloc(n) returns a pointer to n contiguous bytes (if available) and + * advances the head pointer by n bytes (wrapping around at the end). + * - cephsan_pagefrag_free(n) advances the tail pointer by n bytes. + * + * This simple ring-buffer allocator is intended for short-lived allocations in the Ceph SAN code. + */ + +#define CEPHSAN_PAGEFRAG_SIZE (1<<22) /* 4MB */ + +/* Pagefrag allocator structure */ +struct cephsan_pagefrag { + struct page *pages; + void *buffer; + unsigned int head; + unsigned int tail; +}; + +/* The ceph san log entry structure is now private to ceph_san.c. + * Use log_cephsan() below. + */ + +/* get_cephsan() and alloc_cephsan() have been removed from the public API. */ + +/* New log_cephsan now accepts a line number, a pointer to a u8 buffer (typically function name), + * and an optional parameter. It uses the current task's journal_info field. + */ + +int cephsan_pagefrag_init(struct cephsan_pagefrag *pf); + + +/** + * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer. + * @n: number of bytes to allocate. + * + * Allocates @n bytes if there is sufficient free space in the buffer. + * Advances the head pointer by @n bytes (wrapping around if needed). + * + * Return: pointer to the allocated memory, or NULL if not enough space. + */ +u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n); + +/** + * cephsan_pagefrag_free - Free bytes in the pagefrag allocator. + * @n: number of bytes to free. + * + * Advances the tail pointer by @n bytes (wrapping around if needed). + */ +void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n); +/** + * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator. + * + * Frees the allocated buffer and resets the head and tail pointers. + */ +void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf); + + +#ifdef CONFIG_DEBUG_FS +#define CEPH_SAN_MAX_LOGS (8192 << 2) //4MB per core +#define LOG_BUF_SIZE 256 + +void cephsan_cleanup(void); +int cephsan_init(void); + +void log_cephsan(char *buf); +#define CEPH_SAN_LOG(fmt, ...) do { \ + char buf[LOG_BUF_SIZE] = {0}; \ + snprintf(buf, LOG_BUF_SIZE, fmt, ##__VA_ARGS__); \ + log_cephsan(buf); \ +} while (0) +/* + * Internal definitions for Ceph SAN logs. + * These definitions are not part of the public API but are required by debugfs.c. + */ +struct ceph_san_log_entry { + char comm[TASK_COMM_LEN]; + char *buf; + u64 ts; + pid_t pid; + u32 len; +}; + +struct ceph_san_tls_logger { + size_t head_idx; + struct page *pages; + struct ceph_san_log_entry *logs; +}; +#else /* CONFIG_DEBUG_FS */ + +#define CEPH_SAN_LOG(param) do {} while (0) + +static inline void cephsan_cleanup(void) {} +static inline int __init cephsan_init(void) { return 0; } + +#endif /* CONFIG_DEBUG_FS */ + +#define CEPH_SAN_SET_REQ(req) do { current->journal_info = req; } while (0) +#define CEPH_SAN_RESET_REQ() do { current->journal_info = NULL; } while (0) +#define CEPH_SAN_GET_REQ() (current->journal_info) + +#endif /* CEPHSAN_H */ diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 8802a0c0155d5..bbfff0dd9081a 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -10,6 +10,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ striper.o \ debugfs.o \ + ceph_san.o \ auth.o auth_none.o \ crypto.o armor.o \ auth_x.o \ diff --git a/net/ceph/ceph_san.c b/net/ceph/ceph_san.c new file mode 100644 index 0000000000000..eb04740e5fedc --- /dev/null +++ b/net/ceph/ceph_san.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include +#include +#include + +/* Use per-core TLS logger; no global list or lock needed */ +DEFINE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls); +EXPORT_SYMBOL(ceph_san_tls); + +DEFINE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag); +EXPORT_SYMBOL(ceph_san_pagefrag); + + +static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val); +/* The definitions for struct ceph_san_log_entry and struct ceph_san_tls_logger + * have been moved to cephsan.h (under CONFIG_DEBUG_FS) to avoid duplication. + */ + +void log_cephsan(char *buf) { + /* Use the per-core TLS logger */ + u64 buf_idx; + int len = strlen(buf); + struct ceph_san_tls_logger *tls = this_cpu_ptr(&ceph_san_tls); + struct cephsan_pagefrag *pf = this_cpu_ptr(&ceph_san_pagefrag); + + int head_idx = tls->head_idx++ & (CEPH_SAN_MAX_LOGS - 1); + int pre_len = tls->logs[head_idx].len; + + buf[len-1] = '\0'; + tls->logs[head_idx].pid = current->pid; + tls->logs[head_idx].ts = jiffies; + memcpy(tls->logs[head_idx].comm, current->comm, TASK_COMM_LEN); + + cephsan_pagefrag_free(pf, pre_len); + + buf_idx = cephsan_pagefrag_alloc(pf, len); + if (buf_idx) { + tls->logs[head_idx].len = len; + tls->logs[head_idx].buf = cephsan_pagefrag_get_ptr(pf, buf_idx); + memcpy(tls->logs[head_idx].buf, buf, len); + } +} +EXPORT_SYMBOL(log_cephsan); + +/* Cleanup function to free all TLS logger objects. + * Call this at module exit to free allocated TLS loggers. + */ +void cephsan_cleanup(void) +{ + int cpu; + struct ceph_san_tls_logger *tls; + + for_each_possible_cpu(cpu) { + tls = per_cpu_ptr(&ceph_san_tls, cpu); + if (tls->pages) { + free_pages((unsigned long)tls->pages, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry))); + tls->pages = NULL; + } + } +} +EXPORT_SYMBOL(cephsan_cleanup); +/* Initialize the Ceph SAN logging infrastructure. + * Call this at module init to set up the global list and lock. + */ +int cephsan_init(void) +{ + int cpu; + struct ceph_san_tls_logger *tls; + struct cephsan_pagefrag *pf; + + for_each_possible_cpu(cpu) { + tls = per_cpu_ptr(&ceph_san_tls, cpu); + tls->pages = alloc_pages(GFP_KERNEL, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry))); + if (!tls->pages) { + pr_err("Failed to allocate TLS logs for CPU %d\n", cpu); + return -ENOMEM; + } + tls->logs = (struct ceph_san_log_entry *)page_address(tls->pages); + } + + for_each_possible_cpu(cpu) { + pf = per_cpu_ptr(&ceph_san_pagefrag, cpu); + cephsan_pagefrag_init(pf); + } + return 0; +} +EXPORT_SYMBOL(cephsan_init); + +/** + * cephsan_pagefrag_init - Initialize the pagefrag allocator. + * + * Allocates a 16KB contiguous buffer and resets head and tail pointers. + * + * Return: 0 on success, negative error code on failure. + */ +int cephsan_pagefrag_init(struct cephsan_pagefrag *pf) +{ + pf->pages = alloc_pages(GFP_KERNEL, get_order(CEPHSAN_PAGEFRAG_SIZE)); + if (!pf->pages) + return -ENOMEM; + + pf->buffer = page_address(pf->pages); + pf->head = 0; + pf->tail = 0; + return 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_init); + +/** + * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer. + * @n: number of bytes to allocate. + * + * Allocates @n bytes if there is sufficient free space in the buffer. + * Advances the head pointer by @n bytes (wrapping around if needed). + * + * Return: pointer to the allocated memory, or NULL if not enough space. + */ +u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n) +{ + /* Case 1: tail > head */ + if (pf->tail > pf->head) { + if (pf->tail - pf->head >= n) { + unsigned int prev_head = pf->head; + pf->head += n; + return ((u64)n << 32) | prev_head; + } else { + pr_err("Not enough space in pagefrag buffer\n"); + return 0; + } + } + /* Case 2: tail <= head */ + if (pf->head + n <= CEPHSAN_PAGEFRAG_SIZE) { + /* Normal allocation */ + unsigned int prev_head = pf->head; + pf->head += n; + return ((u64)n << 32) | prev_head; + } else { + /* Need to wrap around */ + if (n <= pf->tail) { + pf->head = n; + n += CEPHSAN_PAGEFRAG_SIZE - pf->head; + return ((u64)n << 32) | 0; + } else { + pr_err("Not enough space for wrap-around allocation\n"); + return 0; + } + } + pr_err("impossible: Not enough space in pagefrag buffer\n"); + return 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_alloc); +/** + * cephsan_pagefrag_get_ptr - Get buffer pointer from pagefrag allocation result + * @pf: pagefrag allocator + * @val: return value from cephsan_pagefrag_alloc + * + * Return: pointer to allocated buffer region + */ +static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val) +{ + return pf->buffer + (val & 0xFFFFFFFF); +} + +#define CEPHSAN_PAGEFRAG_GET_N(val) ((val) >> 32) + +/** + * cephsan_pagefrag_free - Free bytes in the pagefrag allocator. + * @n: number of bytes to free. + * + * Advances the tail pointer by @n bytes (wrapping around if needed). + */ +void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n) +{ + pf->tail = (pf->tail + n) & (CEPHSAN_PAGEFRAG_SIZE - 1); +} +EXPORT_SYMBOL(cephsan_pagefrag_free); +/** + * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator. + * + * Frees the allocated buffer and resets the head and tail pointers. + */ +void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf) +{ + kfree(pf->buffer); + pf->buffer = NULL; + pf->head = pf->tail = 0; +} +EXPORT_SYMBOL(cephsan_pagefrag_deinit); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index bd608ffa06279..12954398720df 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -197,14 +197,14 @@ static int ceph_tcp_send(struct ceph_connection *con) { int ret; - dout("%s con %p have %zu try_sendpage %d\n", __func__, con, - iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage); + //dout("%s con %p have %zu try_sendpage %d\n", __func__, con, + // iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage); if (con->v2.out_iter_sendpage) ret = do_try_sendpage(con->sock, &con->v2.out_iter); else ret = do_sendmsg(con->sock, &con->v2.out_iter); - dout("%s con %p ret %d left %zu\n", __func__, con, ret, - iov_iter_count(&con->v2.out_iter)); + //dout("%s con %p ret %d left %zu\n", __func__, con, ret, + // iov_iter_count(&con->v2.out_iter)); return ret; } @@ -3156,8 +3156,8 @@ int ceph_con_v2_try_read(struct ceph_connection *con) { int ret; - dout("%s con %p state %d need %zu\n", __func__, con, con->state, - iov_iter_count(&con->v2.in_iter)); + //dout("%s con %p state %d need %zu\n", __func__, con, con->state, + // iov_iter_count(&con->v2.in_iter)); if (con->state == CEPH_CON_S_PREOPEN) return 0; @@ -3273,7 +3273,7 @@ static void queue_zeros(struct ceph_connection *con) static void finish_message(struct ceph_connection *con) { - dout("%s con %p msg %p\n", __func__, con, con->out_msg); + //dout("%s con %p msg %p\n", __func__, con, con->out_msg); /* we end up here both plain and secure modes */ if (con->v2.out_enc_pages) { @@ -3296,8 +3296,8 @@ static int populate_out_iter(struct ceph_connection *con) { int ret; - dout("%s con %p state %d out_state %d\n", __func__, con, con->state, - con->v2.out_state); + //dout("%s con %p state %d out_state %d\n", __func__, con, con->state, + // con->v2.out_state); WARN_ON(iov_iter_count(&con->v2.out_iter)); if (con->state != CEPH_CON_S_OPEN) { @@ -3359,13 +3359,13 @@ static int populate_out_iter(struct ceph_connection *con) populated: if (WARN_ON(!iov_iter_count(&con->v2.out_iter))) return -ENODATA; - dout("%s con %p populated %zu\n", __func__, con, - iov_iter_count(&con->v2.out_iter)); + //dout("%s con %p populated %zu\n", __func__, con, + // iov_iter_count(&con->v2.out_iter)); return 1; nothing_pending: WARN_ON(iov_iter_count(&con->v2.out_iter)); - dout("%s con %p nothing pending\n", __func__, con); + //dout("%s con %p nothing pending\n", __func__, con); ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); return 0; } -- 2.39.5