]> git.apps.os.sepia.ceph.com Git - ceph-client.git/commitdiff
Adding ceph_san
authorAlex Markuze <amarkuze@redhat.com>
Mon, 10 Feb 2025 16:05:50 +0000 (16:05 +0000)
committerAlex Markuze <amarkuze@redhat.com>
Wed, 26 Feb 2025 12:43:34 +0000 (12:43 +0000)
fs/ceph/debugfs.c
fs/ceph/super.c
fs/ceph/super.h
include/linux/ceph/ceph_debug.h
include/linux/ceph/ceph_san.h [new file with mode: 0644]
net/ceph/Makefile
net/ceph/ceph_san.c [new file with mode: 0644]
net/ceph/messenger_v2.c

index fdf9dc15eafaefcc63be128c23b5339475163b79..9fa6203bb74114e3a658092d6b9183d9eda9fd24 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/ktime.h>
 
 #include <linux/ceph/libceph.h>
+#include <linux/ceph/ceph_san.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/auth.h>
 #include <linux/ceph/debugfs.h>
 #include "mds_client.h"
 #include "metric.h"
 
+static int ceph_san_show(struct seq_file *s, void *p)
+{
+       struct ceph_san_tls_logger *tls;
+       size_t cpu;
+
+       seq_printf(s, "Ceph SAN logs:\n");
+       seq_printf(s, "%-16s %-8s %-32s\n",
+                  "Task", "PID", "Log");
+       seq_printf(s, "--------------------------------------------------------------\n");
+
+       for_each_possible_cpu(cpu) {
+               tls = &per_cpu(ceph_san_tls, cpu);
+               int i;
+
+               int idx = 0;
+               int head_idx = tls->head_idx & (CEPH_SAN_MAX_LOGS - 1);
+               int tail_idx = (head_idx + 1) & (CEPH_SAN_MAX_LOGS - 1);
+
+               for (i = tail_idx; (i & (CEPH_SAN_MAX_LOGS -1)) != head_idx; i++) {
+                       struct timespec64 ts;
+                       struct ceph_san_log_entry *log = &tls->logs[i & (CEPH_SAN_MAX_LOGS -1)];
+                       jiffies_to_timespec64(log->ts, &ts);
+
+                       if (log->ts == 0) {
+                               continue;
+                       }
+                       seq_printf(s, "%zu:%lld.%09ld:%d) %-16s %-8d:%s\n",
+                               cpu,
+                               (long long)ts.tv_sec,
+                               ts.tv_nsec,
+                               idx++,
+                               log->comm,
+                               log->pid,
+                               log->buf);
+               }
+       }
+
+       return 0;
+}
+
 static int mdsmap_show(struct seq_file *s, void *p)
 {
        int i;
@@ -371,6 +412,7 @@ DEFINE_SHOW_ATTRIBUTE(metrics_file);
 DEFINE_SHOW_ATTRIBUTE(metrics_latency);
 DEFINE_SHOW_ATTRIBUTE(metrics_size);
 DEFINE_SHOW_ATTRIBUTE(metrics_caps);
+DEFINE_SHOW_ATTRIBUTE(ceph_san);
 
 
 /*
@@ -406,13 +448,14 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
        debugfs_remove(fsc->debugfs_caps);
        debugfs_remove(fsc->debugfs_status);
        debugfs_remove(fsc->debugfs_mdsc);
+       debugfs_remove(fsc->debugfs_cephsan);
        debugfs_remove_recursive(fsc->debugfs_metrics_dir);
        doutc(fsc->client, "done\n");
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-       char name[100];
+       char name[NAME_MAX];
 
        doutc(fsc->client, "begin\n");
        fsc->debugfs_congestion_kb =
@@ -458,6 +501,11 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                                  fsc->client->debugfs_dir,
                                                  fsc,
                                                  &status_fops);
+       fsc->debugfs_cephsan = debugfs_create_file("cephsan",
+                                                       0444,
+                                                       fsc->client->debugfs_dir,
+                                                       fsc,
+                                                       &ceph_san_fops);
 
        fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
                                                      fsc->client->debugfs_dir);
index 4344e1f118069ae54b5c23b0c778faf19d04d25e..6b5c1657febb2e312d277cbdc6a3e16c9bcc716b 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include "linux/mm_types.h"
 #include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/ceph_san.h>
 
 #include <linux/backing-dev.h>
 #include <linux/ctype.h>
 
 #include <uapi/linux/magic.h>
 
+
+// If no header file, declare the function prototype
+// int allocate_and_add_ceph_san(void);
+
 static DEFINE_SPINLOCK(ceph_fsc_lock);
 static LIST_HEAD(ceph_fsc_list);
 
@@ -1633,6 +1639,7 @@ static int __init init_ceph(void)
                goto out;
 
        ceph_flock_init();
+       ret = cephsan_init();
        ret = register_filesystem(&ceph_fs_type);
        if (ret)
                goto out_caches;
@@ -1650,6 +1657,7 @@ out:
 static void __exit exit_ceph(void)
 {
        dout("exit_ceph\n");
+       cephsan_cleanup();
        unregister_filesystem(&ceph_fs_type);
        destroy_caches();
 }
index af14ec382246257aaf5b1f7445168a6039697376..d7e52f0c79a4c0c23ed5f029d3687c983655121c 100644 (file)
@@ -153,6 +153,7 @@ struct ceph_fs_client {
        struct dentry *debugfs_status;
        struct dentry *debugfs_mds_sessions;
        struct dentry *debugfs_metrics_dir;
+       struct dentry *debugfs_cephsan;
 #endif
 
 #ifdef CONFIG_CEPH_FSCACHE
index 5f904591fa5f9e57eff115dd88ee948c38fe2260..229dbda82df936f5dcc5602939dd21842432366a 100644 (file)
@@ -2,9 +2,11 @@
 #ifndef _FS_CEPH_DEBUG_H
 #define _FS_CEPH_DEBUG_H
 
+#undef pr_fmt
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/string.h>
+#include <linux/ceph/ceph_san.h>
 
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
 #  define dout(fmt, ...)                                               \
-       pr_debug("%.*s %12.12s:%-4d : " fmt,                            \
+       do {                                                            \
+       pr_debug("%.*s %12.12s:%-4d:" fmt,                              \
                 8 - (int)sizeof(KBUILD_MODNAME), "    ",               \
-                kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
+                kbasename(__FILE__), __LINE__, ##__VA_ARGS__);         \
+       CEPH_SAN_LOG("%12.12s:%-4d : " fmt,                             \
+                kbasename(__FILE__), __LINE__, ##__VA_ARGS__);         \
+       } while (0)
 #  define doutc(client, fmt, ...)                                      \
+       do {                                                            \
        pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt,                 \
                 8 - (int)sizeof(KBUILD_MODNAME), "    ",               \
                 kbasename(__FILE__), __LINE__,                         \
                 &client->fsid, client->monc.auth->global_id,           \
-                ##__VA_ARGS__)
+                ##__VA_ARGS__);                                        \
+       CEPH_SAN_LOG("%12.12s:%-4d:" fmt,                               \
+                kbasename(__FILE__), __LINE__, ##__VA_ARGS__);         \
+       } while (0)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)                                       \
diff --git a/include/linux/ceph/ceph_san.h b/include/linux/ceph/ceph_san.h
new file mode 100644 (file)
index 0000000..e6d3c34
--- /dev/null
@@ -0,0 +1,116 @@
+#ifndef CEPHSAN_H
+#define CEPHSAN_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+
+DECLARE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls);
+DECLARE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag);
+
+/*
+ * Pagefrag Allocator for ceph_san:
+ *  - A contiguous 4-page buffer (16KB) is allocated.
+ *  - The allocator maintains two unsigned int indices (head and tail) into the buffer.
+ *  - cephsan_pagefrag_alloc(n) returns a pointer to n contiguous bytes (if available) and
+ *    advances the head pointer by n bytes (wrapping around at the end).
+ *  - cephsan_pagefrag_free(n) advances the tail pointer by n bytes.
+ *
+ * This simple ring-buffer allocator is intended for short-lived allocations in the Ceph SAN code.
+ */
+
+#define CEPHSAN_PAGEFRAG_SIZE  (1<<22)  /* 4MB */
+
+/* Pagefrag allocator structure */
+struct cephsan_pagefrag {
+    struct page *pages;
+    void *buffer;
+    unsigned int head;
+    unsigned int tail;
+};
+
+/* The ceph san log entry structure is now private to ceph_san.c.
+ * Use log_cephsan() below.
+ */
+
+/* get_cephsan() and alloc_cephsan() have been removed from the public API. */
+
+/* New log_cephsan now accepts a line number, a pointer to a u8 buffer (typically function name),
+ * and an optional parameter. It uses the current task's journal_info field.
+ */
+
+int cephsan_pagefrag_init(struct cephsan_pagefrag *pf);
+
+
+/**
+ * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer.
+ * @n: number of bytes to allocate.
+ *
+ * Allocates @n bytes if there is sufficient free space in the buffer.
+ * Advances the head pointer by @n bytes (wrapping around if needed).
+ *
+ * Return: pointer to the allocated memory, or NULL if not enough space.
+ */
+u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n);
+
+/**
+ * cephsan_pagefrag_free - Free bytes in the pagefrag allocator.
+ * @n: number of bytes to free.
+ *
+ * Advances the tail pointer by @n bytes (wrapping around if needed).
+ */
+void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n);
+/**
+ * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator.
+ *
+ * Frees the allocated buffer and resets the head and tail pointers.
+ */
+void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf);
+
+
+#ifdef CONFIG_DEBUG_FS
+#define CEPH_SAN_MAX_LOGS (8192 << 2) //4MB per core
+#define LOG_BUF_SIZE 256
+
+void cephsan_cleanup(void);
+int cephsan_init(void);
+
+void log_cephsan(char *buf);
+#define CEPH_SAN_LOG(fmt, ...) do { \
+    char buf[LOG_BUF_SIZE] = {0}; \
+    snprintf(buf, LOG_BUF_SIZE, fmt, ##__VA_ARGS__); \
+    log_cephsan(buf); \
+} while (0)
+/*
+ * Internal definitions for Ceph SAN logs.
+ * These definitions are not part of the public API but are required by debugfs.c.
+ */
+struct ceph_san_log_entry {
+    char comm[TASK_COMM_LEN];
+    char *buf;
+    u64 ts;
+    pid_t pid;
+    u32 len;
+};
+
+struct ceph_san_tls_logger {
+    size_t head_idx;
+    struct page *pages;
+    struct ceph_san_log_entry *logs;
+};
+#else /* CONFIG_DEBUG_FS */
+
+#define CEPH_SAN_LOG(param) do {} while (0)
+
+static inline void cephsan_cleanup(void) {}
+static inline int __init cephsan_init(void) { return 0; }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#define CEPH_SAN_SET_REQ(req) do { current->journal_info = req; } while (0)
+#define CEPH_SAN_RESET_REQ() do { current->journal_info = NULL; } while (0)
+#define CEPH_SAN_GET_REQ() (current->journal_info)
+
+#endif /* CEPHSAN_H */
index 8802a0c0155d5764e158c278e3cd87d30ff45abc..bbfff0dd9081a72a2e8674201ea0826a63f36c33 100644 (file)
@@ -10,6 +10,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
        osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
        striper.o \
        debugfs.o \
+       ceph_san.o \
        auth.o auth_none.o \
        crypto.o armor.o \
        auth_x.o \
diff --git a/net/ceph/ceph_san.c b/net/ceph/ceph_san.c
new file mode 100644 (file)
index 0000000..eb04740
--- /dev/null
@@ -0,0 +1,191 @@
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/ceph/ceph_san.h>
+#include <linux/mm.h>
+
+/* Use per-core TLS logger; no global list or lock needed */
+DEFINE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls);
+EXPORT_SYMBOL(ceph_san_tls);
+
+DEFINE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag);
+EXPORT_SYMBOL(ceph_san_pagefrag);
+
+
+static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val);
+/* The definitions for struct ceph_san_log_entry and struct ceph_san_tls_logger
+ * have been moved to cephsan.h (under CONFIG_DEBUG_FS) to avoid duplication.
+ */
+
+void log_cephsan(char *buf) {
+    /* Use the per-core TLS logger */
+    u64 buf_idx;
+    int len = strlen(buf);
+    struct ceph_san_tls_logger *tls = this_cpu_ptr(&ceph_san_tls);
+    struct cephsan_pagefrag *pf = this_cpu_ptr(&ceph_san_pagefrag);
+
+    int head_idx = tls->head_idx++ & (CEPH_SAN_MAX_LOGS - 1);
+    int pre_len = tls->logs[head_idx].len;
+
+    buf[len-1] = '\0';
+    tls->logs[head_idx].pid = current->pid;
+    tls->logs[head_idx].ts = jiffies;
+    memcpy(tls->logs[head_idx].comm, current->comm, TASK_COMM_LEN);
+
+    cephsan_pagefrag_free(pf, pre_len);
+
+    buf_idx = cephsan_pagefrag_alloc(pf, len);
+    if (buf_idx) {
+               tls->logs[head_idx].len = len;
+        tls->logs[head_idx].buf = cephsan_pagefrag_get_ptr(pf, buf_idx);
+               memcpy(tls->logs[head_idx].buf, buf, len);
+    }
+}
+EXPORT_SYMBOL(log_cephsan);
+
+/* Cleanup function to free all TLS logger objects.
+ * Call this at module exit to free allocated TLS loggers.
+ */
+void cephsan_cleanup(void)
+{
+       int cpu;
+       struct ceph_san_tls_logger *tls;
+
+       for_each_possible_cpu(cpu) {
+               tls = per_cpu_ptr(&ceph_san_tls, cpu);
+               if (tls->pages) {
+                       free_pages((unsigned long)tls->pages, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry)));
+                       tls->pages = NULL;
+               }
+       }
+}
+EXPORT_SYMBOL(cephsan_cleanup);
+/* Initialize the Ceph SAN logging infrastructure.
+ * Call this at module init to set up the global list and lock.
+ */
+int cephsan_init(void)
+{
+       int cpu;
+       struct ceph_san_tls_logger *tls;
+       struct cephsan_pagefrag *pf;
+
+       for_each_possible_cpu(cpu) {
+               tls = per_cpu_ptr(&ceph_san_tls, cpu);
+               tls->pages = alloc_pages(GFP_KERNEL, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry)));
+               if (!tls->pages) {
+                       pr_err("Failed to allocate TLS logs for CPU %d\n", cpu);
+                       return -ENOMEM;
+               }
+               tls->logs = (struct ceph_san_log_entry *)page_address(tls->pages);
+       }
+
+       for_each_possible_cpu(cpu) {
+               pf = per_cpu_ptr(&ceph_san_pagefrag, cpu);
+               cephsan_pagefrag_init(pf);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(cephsan_init);
+
+/**
+ * cephsan_pagefrag_init - Initialize the pagefrag allocator.
+ *
+ * Allocates a 16KB contiguous buffer and resets head and tail pointers.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int cephsan_pagefrag_init(struct cephsan_pagefrag *pf)
+{
+       pf->pages = alloc_pages(GFP_KERNEL, get_order(CEPHSAN_PAGEFRAG_SIZE));
+       if (!pf->pages)
+               return -ENOMEM;
+
+       pf->buffer = page_address(pf->pages);
+       pf->head = 0;
+       pf->tail = 0;
+       return 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_init);
+
+/**
+ * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer.
+ * @n: number of bytes to allocate.
+ *
+ * Allocates @n bytes if there is sufficient free space in the buffer.
+ * Advances the head pointer by @n bytes (wrapping around if needed).
+ *
+ * Return: pointer to the allocated memory, or NULL if not enough space.
+ */
+u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n)
+{
+       /* Case 1: tail > head */
+       if (pf->tail > pf->head) {
+               if (pf->tail - pf->head >= n) {
+                       unsigned int prev_head = pf->head;
+                       pf->head += n;
+                       return ((u64)n << 32) | prev_head;
+               } else {
+                       pr_err("Not enough space in pagefrag buffer\n");
+                       return 0;
+               }
+       }
+       /* Case 2: tail <= head */
+       if (pf->head + n <= CEPHSAN_PAGEFRAG_SIZE) {
+               /* Normal allocation */
+               unsigned int prev_head = pf->head;
+               pf->head += n;
+               return ((u64)n << 32) | prev_head;
+       } else {
+               /* Need to wrap around */
+               if (n <= pf->tail) {
+                       pf->head = n;
+                       n += CEPHSAN_PAGEFRAG_SIZE - pf->head;
+                       return ((u64)n << 32) | 0;
+               } else {
+                       pr_err("Not enough space for wrap-around allocation\n");
+                       return 0;
+               }
+       }
+       pr_err("impossible: Not enough space in pagefrag buffer\n");
+       return 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_alloc);
+/**
+ * cephsan_pagefrag_get_ptr - Get buffer pointer from pagefrag allocation result
+ * @pf: pagefrag allocator
+ * @val: return value from cephsan_pagefrag_alloc
+ *
+ * Return: pointer to allocated buffer region
+ */
+static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val)
+{
+       return pf->buffer + (val & 0xFFFFFFFF);
+}
+
+#define CEPHSAN_PAGEFRAG_GET_N(val)  ((val) >> 32)
+
+/**
+ * cephsan_pagefrag_free - Free bytes in the pagefrag allocator.
+ * @n: number of bytes to free.
+ *
+ * Advances the tail pointer by @n bytes (wrapping around if needed).
+ */
+void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n)
+{
+       pf->tail = (pf->tail + n) & (CEPHSAN_PAGEFRAG_SIZE - 1);
+}
+EXPORT_SYMBOL(cephsan_pagefrag_free);
+/**
+ * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator.
+ *
+ * Frees the allocated buffer and resets the head and tail pointers.
+ */
+void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf)
+{
+       kfree(pf->buffer);
+       pf->buffer = NULL;
+       pf->head = pf->tail = 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_deinit);
index bd608ffa06279704b5f4f43e5e369035e3ff032c..12954398720df05fb5c7f7f32c0217364c3cd5d0 100644 (file)
@@ -197,14 +197,14 @@ static int ceph_tcp_send(struct ceph_connection *con)
 {
        int ret;
 
-       dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
-            iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
+       //dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
+       //     iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
        if (con->v2.out_iter_sendpage)
                ret = do_try_sendpage(con->sock, &con->v2.out_iter);
        else
                ret = do_sendmsg(con->sock, &con->v2.out_iter);
-       dout("%s con %p ret %d left %zu\n", __func__, con, ret,
-            iov_iter_count(&con->v2.out_iter));
+       //dout("%s con %p ret %d left %zu\n", __func__, con, ret,
+       //     iov_iter_count(&con->v2.out_iter));
        return ret;
 }
 
@@ -3156,8 +3156,8 @@ int ceph_con_v2_try_read(struct ceph_connection *con)
 {
        int ret;
 
-       dout("%s con %p state %d need %zu\n", __func__, con, con->state,
-            iov_iter_count(&con->v2.in_iter));
+       //dout("%s con %p state %d need %zu\n", __func__, con, con->state,
+       //     iov_iter_count(&con->v2.in_iter));
 
        if (con->state == CEPH_CON_S_PREOPEN)
                return 0;
@@ -3273,7 +3273,7 @@ static void queue_zeros(struct ceph_connection *con)
 
 static void finish_message(struct ceph_connection *con)
 {
-       dout("%s con %p msg %p\n", __func__, con, con->out_msg);
+       //dout("%s con %p msg %p\n", __func__, con, con->out_msg);
 
        /* we end up here both plain and secure modes */
        if (con->v2.out_enc_pages) {
@@ -3296,8 +3296,8 @@ static int populate_out_iter(struct ceph_connection *con)
 {
        int ret;
 
-       dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
-            con->v2.out_state);
+       //dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
+       //     con->v2.out_state);
        WARN_ON(iov_iter_count(&con->v2.out_iter));
 
        if (con->state != CEPH_CON_S_OPEN) {
@@ -3359,13 +3359,13 @@ static int populate_out_iter(struct ceph_connection *con)
 populated:
        if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
                return -ENODATA;
-       dout("%s con %p populated %zu\n", __func__, con,
-            iov_iter_count(&con->v2.out_iter));
+       //dout("%s con %p populated %zu\n", __func__, con,
+       //     iov_iter_count(&con->v2.out_iter));
        return 1;
 
 nothing_pending:
        WARN_ON(iov_iter_count(&con->v2.out_iter));
-       dout("%s con %p nothing pending\n", __func__, con);
+       //dout("%s con %p nothing pending\n", __func__, con);
        ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
        return 0;
 }