#include <linux/ktime.h>
#include <linux/ceph/libceph.h>
+#include <linux/ceph/ceph_san.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
#include "mds_client.h"
#include "metric.h"
+static int ceph_san_show(struct seq_file *s, void *p)
+{
+ struct ceph_san_tls_logger *tls;
+ size_t cpu;
+
+ seq_printf(s, "Ceph SAN logs:\n");
+ seq_printf(s, "%-16s %-8s %-32s\n",
+ "Task", "PID", "Log");
+ seq_printf(s, "--------------------------------------------------------------\n");
+
+ for_each_possible_cpu(cpu) {
+ tls = &per_cpu(ceph_san_tls, cpu);
+ int i;
+
+ int idx = 0;
+ int head_idx = tls->head_idx & (CEPH_SAN_MAX_LOGS - 1);
+ int tail_idx = (head_idx + 1) & (CEPH_SAN_MAX_LOGS - 1);
+
+ for (i = tail_idx; (i & (CEPH_SAN_MAX_LOGS -1)) != head_idx; i++) {
+ struct timespec64 ts;
+ struct ceph_san_log_entry *log = &tls->logs[i & (CEPH_SAN_MAX_LOGS -1)];
+ jiffies_to_timespec64(log->ts, &ts);
+
+ if (log->ts == 0) {
+ continue;
+ }
+ seq_printf(s, "%zu:%lld.%09ld:%d) %-16s %-8d:%s\n",
+ cpu,
+ (long long)ts.tv_sec,
+ ts.tv_nsec,
+ idx++,
+ log->comm,
+ log->pid,
+ log->buf);
+ }
+ }
+
+ return 0;
+}
+
static int mdsmap_show(struct seq_file *s, void *p)
{
int i;
DEFINE_SHOW_ATTRIBUTE(metrics_latency);
DEFINE_SHOW_ATTRIBUTE(metrics_size);
DEFINE_SHOW_ATTRIBUTE(metrics_caps);
+DEFINE_SHOW_ATTRIBUTE(ceph_san);
/*
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_status);
debugfs_remove(fsc->debugfs_mdsc);
+ debugfs_remove(fsc->debugfs_cephsan);
debugfs_remove_recursive(fsc->debugfs_metrics_dir);
doutc(fsc->client, "done\n");
}
void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
{
- char name[100];
+ char name[NAME_MAX];
doutc(fsc->client, "begin\n");
fsc->debugfs_congestion_kb =
fsc->client->debugfs_dir,
fsc,
&status_fops);
+ fsc->debugfs_cephsan = debugfs_create_file("cephsan",
+ 0444,
+ fsc->client->debugfs_dir,
+ fsc,
+ &ceph_san_fops);
fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
fsc->client->debugfs_dir);
// SPDX-License-Identifier: GPL-2.0-only
+#include "linux/mm_types.h"
#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/ceph_san.h>
#include <linux/backing-dev.h>
#include <linux/ctype.h>
#include <uapi/linux/magic.h>
+
+// If no header file, declare the function prototype
+// int allocate_and_add_ceph_san(void);
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
goto out;
ceph_flock_init();
+ ret = cephsan_init();
ret = register_filesystem(&ceph_fs_type);
if (ret)
goto out_caches;
static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
+ cephsan_cleanup();
unregister_filesystem(&ceph_fs_type);
destroy_caches();
}
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
struct dentry *debugfs_metrics_dir;
+ struct dentry *debugfs_cephsan;
#endif
#ifdef CONFIG_CEPH_FSCACHE
#ifndef _FS_CEPH_DEBUG_H
#define _FS_CEPH_DEBUG_H
+#undef pr_fmt
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/string.h>
+#include <linux/ceph/ceph_san.h>
#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
# define dout(fmt, ...) \
- pr_debug("%.*s %12.12s:%-4d : " fmt, \
+ do { \
+ pr_debug("%.*s %12.12s:%-4d:" fmt, \
8 - (int)sizeof(KBUILD_MODNAME), " ", \
- kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
+ kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \
+ CEPH_SAN_LOG("%12.12s:%-4d : " fmt, \
+ kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \
+ } while (0)
# define doutc(client, fmt, ...) \
+ do { \
pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \
8 - (int)sizeof(KBUILD_MODNAME), " ", \
kbasename(__FILE__), __LINE__, \
&client->fsid, client->monc.auth->global_id, \
- ##__VA_ARGS__)
+ ##__VA_ARGS__); \
+ CEPH_SAN_LOG("%12.12s:%-4d:" fmt, \
+ kbasename(__FILE__), __LINE__, ##__VA_ARGS__); \
+ } while (0)
# else
/* faux printk call just to see any compiler warnings. */
# define dout(fmt, ...) \
--- /dev/null
+#ifndef CEPHSAN_H
+#define CEPHSAN_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+
+DECLARE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls);
+DECLARE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag);
+
+/*
+ * Pagefrag Allocator for ceph_san:
+ * - A contiguous 4-page buffer (16KB) is allocated.
+ * - The allocator maintains two unsigned int indices (head and tail) into the buffer.
+ * - cephsan_pagefrag_alloc(n) returns a pointer to n contiguous bytes (if available) and
+ * advances the head pointer by n bytes (wrapping around at the end).
+ * - cephsan_pagefrag_free(n) advances the tail pointer by n bytes.
+ *
+ * This simple ring-buffer allocator is intended for short-lived allocations in the Ceph SAN code.
+ */
+
+#define CEPHSAN_PAGEFRAG_SIZE (1<<22) /* 4MB */
+
+/* Pagefrag allocator structure */
+struct cephsan_pagefrag {
+ struct page *pages;
+ void *buffer;
+ unsigned int head;
+ unsigned int tail;
+};
+
+/* The ceph san log entry structure is now private to ceph_san.c.
+ * Use log_cephsan() below.
+ */
+
+/* get_cephsan() and alloc_cephsan() have been removed from the public API. */
+
+/* New log_cephsan now accepts a line number, a pointer to a u8 buffer (typically function name),
+ * and an optional parameter. It uses the current task's journal_info field.
+ */
+
+int cephsan_pagefrag_init(struct cephsan_pagefrag *pf);
+
+
+/**
+ * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer.
+ * @n: number of bytes to allocate.
+ *
+ * Allocates @n bytes if there is sufficient free space in the buffer.
+ * Advances the head pointer by @n bytes (wrapping around if needed).
+ *
+ * Return: pointer to the allocated memory, or NULL if not enough space.
+ */
+u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n);
+
+/**
+ * cephsan_pagefrag_free - Free bytes in the pagefrag allocator.
+ * @n: number of bytes to free.
+ *
+ * Advances the tail pointer by @n bytes (wrapping around if needed).
+ */
+void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n);
+/**
+ * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator.
+ *
+ * Frees the allocated buffer and resets the head and tail pointers.
+ */
+void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf);
+
+
+#ifdef CONFIG_DEBUG_FS
+#define CEPH_SAN_MAX_LOGS (8192 << 2) //4MB per core
+#define LOG_BUF_SIZE 256
+
+void cephsan_cleanup(void);
+int cephsan_init(void);
+
+void log_cephsan(char *buf);
+#define CEPH_SAN_LOG(fmt, ...) do { \
+ char buf[LOG_BUF_SIZE] = {0}; \
+ snprintf(buf, LOG_BUF_SIZE, fmt, ##__VA_ARGS__); \
+ log_cephsan(buf); \
+} while (0)
+/*
+ * Internal definitions for Ceph SAN logs.
+ * These definitions are not part of the public API but are required by debugfs.c.
+ */
+struct ceph_san_log_entry {
+ char comm[TASK_COMM_LEN];
+ char *buf;
+ u64 ts;
+ pid_t pid;
+ u32 len;
+};
+
+struct ceph_san_tls_logger {
+ size_t head_idx;
+ struct page *pages;
+ struct ceph_san_log_entry *logs;
+};
+#else /* CONFIG_DEBUG_FS */
+
+#define CEPH_SAN_LOG(param) do {} while (0)
+
+static inline void cephsan_cleanup(void) {}
+static inline int __init cephsan_init(void) { return 0; }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#define CEPH_SAN_SET_REQ(req) do { current->journal_info = req; } while (0)
+#define CEPH_SAN_RESET_REQ() do { current->journal_info = NULL; } while (0)
+#define CEPH_SAN_GET_REQ() (current->journal_info)
+
+#endif /* CEPHSAN_H */
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
striper.o \
debugfs.o \
+ ceph_san.o \
auth.o auth_none.o \
crypto.o armor.o \
auth_x.o \
--- /dev/null
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/ceph/ceph_san.h>
+#include <linux/mm.h>
+
+/* Use per-core TLS logger; no global list or lock needed */
+DEFINE_PER_CPU(struct ceph_san_tls_logger, ceph_san_tls);
+EXPORT_SYMBOL(ceph_san_tls);
+
+DEFINE_PER_CPU(struct cephsan_pagefrag, ceph_san_pagefrag);
+EXPORT_SYMBOL(ceph_san_pagefrag);
+
+
+static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val);
+/* The definitions for struct ceph_san_log_entry and struct ceph_san_tls_logger
+ * have been moved to cephsan.h (under CONFIG_DEBUG_FS) to avoid duplication.
+ */
+
+void log_cephsan(char *buf) {
+ /* Use the per-core TLS logger */
+ u64 buf_idx;
+ int len = strlen(buf);
+ struct ceph_san_tls_logger *tls = this_cpu_ptr(&ceph_san_tls);
+ struct cephsan_pagefrag *pf = this_cpu_ptr(&ceph_san_pagefrag);
+
+ int head_idx = tls->head_idx++ & (CEPH_SAN_MAX_LOGS - 1);
+ int pre_len = tls->logs[head_idx].len;
+
+ buf[len-1] = '\0';
+ tls->logs[head_idx].pid = current->pid;
+ tls->logs[head_idx].ts = jiffies;
+ memcpy(tls->logs[head_idx].comm, current->comm, TASK_COMM_LEN);
+
+ cephsan_pagefrag_free(pf, pre_len);
+
+ buf_idx = cephsan_pagefrag_alloc(pf, len);
+ if (buf_idx) {
+ tls->logs[head_idx].len = len;
+ tls->logs[head_idx].buf = cephsan_pagefrag_get_ptr(pf, buf_idx);
+ memcpy(tls->logs[head_idx].buf, buf, len);
+ }
+}
+EXPORT_SYMBOL(log_cephsan);
+
+/* Cleanup function to free all TLS logger objects.
+ * Call this at module exit to free allocated TLS loggers.
+ */
+void cephsan_cleanup(void)
+{
+ int cpu;
+ struct ceph_san_tls_logger *tls;
+
+ for_each_possible_cpu(cpu) {
+ tls = per_cpu_ptr(&ceph_san_tls, cpu);
+ if (tls->pages) {
+ free_pages((unsigned long)tls->pages, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry)));
+ tls->pages = NULL;
+ }
+ }
+}
+EXPORT_SYMBOL(cephsan_cleanup);
+/* Initialize the Ceph SAN logging infrastructure.
+ * Call this at module init to set up the global list and lock.
+ */
+int cephsan_init(void)
+{
+ int cpu;
+ struct ceph_san_tls_logger *tls;
+ struct cephsan_pagefrag *pf;
+
+ for_each_possible_cpu(cpu) {
+ tls = per_cpu_ptr(&ceph_san_tls, cpu);
+ tls->pages = alloc_pages(GFP_KERNEL, get_order(CEPH_SAN_MAX_LOGS * sizeof(struct ceph_san_log_entry)));
+ if (!tls->pages) {
+ pr_err("Failed to allocate TLS logs for CPU %d\n", cpu);
+ return -ENOMEM;
+ }
+ tls->logs = (struct ceph_san_log_entry *)page_address(tls->pages);
+ }
+
+ for_each_possible_cpu(cpu) {
+ pf = per_cpu_ptr(&ceph_san_pagefrag, cpu);
+ cephsan_pagefrag_init(pf);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(cephsan_init);
+
+/**
+ * cephsan_pagefrag_init - Initialize the pagefrag allocator.
+ *
+ * Allocates a 16KB contiguous buffer and resets head and tail pointers.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int cephsan_pagefrag_init(struct cephsan_pagefrag *pf)
+{
+ pf->pages = alloc_pages(GFP_KERNEL, get_order(CEPHSAN_PAGEFRAG_SIZE));
+ if (!pf->pages)
+ return -ENOMEM;
+
+ pf->buffer = page_address(pf->pages);
+ pf->head = 0;
+ pf->tail = 0;
+ return 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_init);
+
+/**
+ * cephsan_pagefrag_alloc - Allocate bytes from the pagefrag buffer.
+ * @n: number of bytes to allocate.
+ *
+ * Allocates @n bytes if there is sufficient free space in the buffer.
+ * Advances the head pointer by @n bytes (wrapping around if needed).
+ *
+ * Return: pointer to the allocated memory, or NULL if not enough space.
+ */
+u64 cephsan_pagefrag_alloc(struct cephsan_pagefrag *pf, unsigned int n)
+{
+ /* Case 1: tail > head */
+ if (pf->tail > pf->head) {
+ if (pf->tail - pf->head >= n) {
+ unsigned int prev_head = pf->head;
+ pf->head += n;
+ return ((u64)n << 32) | prev_head;
+ } else {
+ pr_err("Not enough space in pagefrag buffer\n");
+ return 0;
+ }
+ }
+ /* Case 2: tail <= head */
+ if (pf->head + n <= CEPHSAN_PAGEFRAG_SIZE) {
+ /* Normal allocation */
+ unsigned int prev_head = pf->head;
+ pf->head += n;
+ return ((u64)n << 32) | prev_head;
+ } else {
+ /* Need to wrap around */
+ if (n <= pf->tail) {
+ pf->head = n;
+ n += CEPHSAN_PAGEFRAG_SIZE - pf->head;
+ return ((u64)n << 32) | 0;
+ } else {
+ pr_err("Not enough space for wrap-around allocation\n");
+ return 0;
+ }
+ }
+ pr_err("impossible: Not enough space in pagefrag buffer\n");
+ return 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_alloc);
+/**
+ * cephsan_pagefrag_get_ptr - Get buffer pointer from pagefrag allocation result
+ * @pf: pagefrag allocator
+ * @val: return value from cephsan_pagefrag_alloc
+ *
+ * Return: pointer to allocated buffer region
+ */
+static inline void *cephsan_pagefrag_get_ptr(struct cephsan_pagefrag *pf, u64 val)
+{
+ return pf->buffer + (val & 0xFFFFFFFF);
+}
+
+#define CEPHSAN_PAGEFRAG_GET_N(val) ((val) >> 32)
+
+/**
+ * cephsan_pagefrag_free - Free bytes in the pagefrag allocator.
+ * @n: number of bytes to free.
+ *
+ * Advances the tail pointer by @n bytes (wrapping around if needed).
+ */
+void cephsan_pagefrag_free(struct cephsan_pagefrag *pf, unsigned int n)
+{
+ pf->tail = (pf->tail + n) & (CEPHSAN_PAGEFRAG_SIZE - 1);
+}
+EXPORT_SYMBOL(cephsan_pagefrag_free);
+/**
+ * cephsan_pagefrag_deinit - Deinitialize the pagefrag allocator.
+ *
+ * Frees the allocated buffer and resets the head and tail pointers.
+ */
+void cephsan_pagefrag_deinit(struct cephsan_pagefrag *pf)
+{
+ kfree(pf->buffer);
+ pf->buffer = NULL;
+ pf->head = pf->tail = 0;
+}
+EXPORT_SYMBOL(cephsan_pagefrag_deinit);
{
int ret;
- dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
- iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
+ //dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
+ // iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
if (con->v2.out_iter_sendpage)
ret = do_try_sendpage(con->sock, &con->v2.out_iter);
else
ret = do_sendmsg(con->sock, &con->v2.out_iter);
- dout("%s con %p ret %d left %zu\n", __func__, con, ret,
- iov_iter_count(&con->v2.out_iter));
+ //dout("%s con %p ret %d left %zu\n", __func__, con, ret,
+ // iov_iter_count(&con->v2.out_iter));
return ret;
}
{
int ret;
- dout("%s con %p state %d need %zu\n", __func__, con, con->state,
- iov_iter_count(&con->v2.in_iter));
+ //dout("%s con %p state %d need %zu\n", __func__, con, con->state,
+ // iov_iter_count(&con->v2.in_iter));
if (con->state == CEPH_CON_S_PREOPEN)
return 0;
static void finish_message(struct ceph_connection *con)
{
- dout("%s con %p msg %p\n", __func__, con, con->out_msg);
+ //dout("%s con %p msg %p\n", __func__, con, con->out_msg);
/* we end up here both plain and secure modes */
if (con->v2.out_enc_pages) {
{
int ret;
- dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
- con->v2.out_state);
+ //dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
+ // con->v2.out_state);
WARN_ON(iov_iter_count(&con->v2.out_iter));
if (con->state != CEPH_CON_S_OPEN) {
populated:
if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
return -ENODATA;
- dout("%s con %p populated %zu\n", __func__, con,
- iov_iter_count(&con->v2.out_iter));
+ //dout("%s con %p populated %zu\n", __func__, con,
+ // iov_iter_count(&con->v2.out_iter));
return 1;
nothing_pending:
WARN_ON(iov_iter_count(&con->v2.out_iter));
- dout("%s con %p nothing pending\n", __func__, con);
+ //dout("%s con %p nothing pending\n", __func__, con);
ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
return 0;
}