generic: test MADV_POPULATE_READ with IO errors
[xfstests-dev.git] / src / log-writes / log-writes.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/fs.h>
3 #include <sys/types.h>
4 #include <sys/stat.h>
5 #include <sys/ioctl.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include "log-writes.h"
13
14 int log_writes_verbose = 0;
15
16 /*
17  * @log: the log to free.
18  *
19  * This will close any open fd's the log has and free up its memory.
20  */
21 void log_free(struct log *log)
22 {
23         if (log->replayfd >= 0)
24                 close(log->replayfd);
25         if (log->logfd >= 0)
26                 close(log->logfd);
27         free(log);
28 }
29
30 static int discard_range(struct log *log, u64 start, u64 len)
31 {
32         u64 range[2] = { start, len };
33
34         if (ioctl(log->replayfd, BLKDISCARD, &range) < 0) {
35                 if (log_writes_verbose)
36                         printf("replay device doesn't support discard, "
37                                "switching to writing zeros\n");
38                 log->flags |= LOG_DISCARD_NOT_SUPP;
39         }
40         return 0;
41 }
42
43 static int zero_range(struct log *log, u64 start, u64 len)
44 {
45         u64 bufsize = len;
46         ssize_t ret;
47         char *buf = NULL;
48
49         if (log->max_zero_size < len) {
50                 if (log_writes_verbose)
51                         printf("discard len %llu larger than max %llu\n",
52                                (unsigned long long)len,
53                                (unsigned long long)log->max_zero_size);
54                 return 0;
55         }
56
57         while (!buf) {
58                 buf = malloc(bufsize);
59                 if (!buf)
60                         bufsize >>= 1;
61                 if (!bufsize) {
62                         fprintf(stderr, "Couldn't allocate zero buffer");
63                         return -1;
64                 }
65         }
66
67         memset(buf, 0, bufsize);
68         while (len) {
69                 if (len < bufsize)
70                         bufsize = len;
71
72                 ret = pwrite(log->replayfd, buf, bufsize, start);
73                 if (ret != bufsize) {
74                         fprintf(stderr, "Error zeroing file: %d\n", errno);
75                         free(buf);
76                         return -1;
77                 }
78                 len -= ret;
79                 start += ret;
80         }
81         free(buf);
82         return 0;
83 }
84
85 /*
86  * @log: the log we are replaying.
87  * @entry: the discard entry.
88  *
89  * Discard the given length.  If the device supports discard we will call that
90  * ioctl, otherwise we will write 0's to emulate discard.  If the discard size
91  * is larger than log->max_zero_size then we will simply skip the zero'ing if
92  * the drive doesn't support discard.
93  */
94 int log_discard(struct log *log, struct log_write_entry *entry)
95 {
96         u64 start = le64_to_cpu(entry->sector) * log->sectorsize;
97         u64 size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
98         u64 max_chunk = 1 * 1024 * 1024 * 1024;
99
100         if (log->flags & LOG_IGNORE_DISCARD)
101                 return 0;
102
103         while (size) {
104                 u64 len = size > max_chunk ? max_chunk : size;
105                 int ret;
106
107                 /*
108                  * Do this check first in case it is our first discard, that way
109                  * if we return EOPNOTSUPP we will fall back to the 0 method
110                  * automatically.
111                  */
112                 if (!(log->flags & LOG_DISCARD_NOT_SUPP))
113                         ret = discard_range(log, start, len);
114                 if (log->flags & LOG_DISCARD_NOT_SUPP)
115                         ret = zero_range(log, start, len);
116                 if (ret)
117                         return -1;
118                 size -= len;
119                 start += len;
120         }
121         return 0;
122 }
123
124 #define DEFINE_LOG_FLAGS_STR_ENTRY(x)   \
125         {LOG_##x##_FLAG, #x}
126
127 struct flags_to_str_entry {
128         u64 flags;
129         const char *str;
130 } log_flags_table[] = {
131         DEFINE_LOG_FLAGS_STR_ENTRY(FLUSH),
132         DEFINE_LOG_FLAGS_STR_ENTRY(FUA),
133         DEFINE_LOG_FLAGS_STR_ENTRY(DISCARD),
134         DEFINE_LOG_FLAGS_STR_ENTRY(MARK),
135         DEFINE_LOG_FLAGS_STR_ENTRY(METADATA)
136 };
137
138 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
139 #define LOG_FLAGS_BUF_SIZE      128
140 /*
141  * Convert numeric flags to human readable flags.
142  * @flags:      numeric flags
143  * @buf:        output buffer for human readable string.
144  *              must have enough space (LOG_FLAGS_BUF_SIZE) to contain all
145  *              the string
146  */
147 static void entry_flags_to_str(u64 flags, char *buf)
148 {
149         int empty = 1;
150         int left_len;
151         int i;
152
153         buf[0] = '\0';
154         for (i = 0; i < ARRAY_SIZE(log_flags_table); i++) {
155                 if (flags & log_flags_table[i].flags) {
156                         if (!empty)
157                                 strncat(buf, "|", LOG_FLAGS_BUF_SIZE);
158                         empty = 0;
159                         strncat(buf, log_flags_table[i].str, LOG_FLAGS_BUF_SIZE);
160                         flags &= ~log_flags_table[i].flags;
161                 }
162         }
163         if (flags) {
164                 if (!empty)
165                         strncat(buf, "|", LOG_FLAGS_BUF_SIZE);
166                 empty = 0;
167                 left_len = LOG_FLAGS_BUF_SIZE - strnlen(buf,
168                                                         LOG_FLAGS_BUF_SIZE);
169                 if (left_len > 0)
170                         snprintf(buf + strnlen(buf, LOG_FLAGS_BUF_SIZE),
171                                  left_len, "UNKNOWN.0x%llx", flags);
172         }
173         if (empty)
174                 strncpy(buf, "NONE", LOG_FLAGS_BUF_SIZE);
175 }
176
177 /*
178  * @log: the log we are replaying.
179  * @entry: entry to be replayed.
180  *
181  * @return: 0 if we should replay the entry, > 0 if we should skip it.
182  *
183  * Should we skip the entry in our log or replay onto the replay device.
184  */
185 int log_should_skip(struct log *log, struct log_write_entry *entry)
186 {
187         u64 sector = le64_to_cpu(entry->sector);
188         u64 nr_sectors = le64_to_cpu(entry->nr_sectors);
189
190         if (!nr_sectors)
191                 return 0;
192         if (sector + nr_sectors <= log->start_sector ||
193             sector > log->end_sector)
194                 return 1;
195         return 0;
196 }
197
198 /*
199  * @entry: entry to be replayed.
200  *
201  * @return: 1 if the entry is sane, 0 if it is invalid.
202  *
203  * Check if this is a sane log entry.
204  */
205 int log_entry_valid(struct log_write_entry *entry)
206 {
207         u64 flags = le64_to_cpu(entry->flags);
208
209         /* Suspect all zeroes entry */
210         if (!flags && !entry->nr_sectors)
211                 return 0;
212         /* Suspect non zero padded entry */
213         if (flags != LOG_MARK_FLAG && entry->data[0] != 0)
214                 return 0;
215         return 1;
216 }
217
218 /*
219  * @log: the log we are replaying.
220  * @entry: where we put the entry.
221  * @read_data: read the entry data as well, entry must be log->sectorsize sized
222  * if this is set.
223  *
224  * @return: 0 if we replayed, 1 if we are at the end, -1 if there was an error.
225  *
226  * Replay the next entry in our log onto the replay device.
227  */
228 int log_replay_next_entry(struct log *log, struct log_write_entry *entry,
229                           int read_data)
230 {
231         u64 size;
232         u64 flags;
233         size_t read_size = read_data ? log->sectorsize :
234                 sizeof(struct log_write_entry);
235         char *buf;
236         char flags_buf[LOG_FLAGS_BUF_SIZE];
237         ssize_t ret;
238         off_t offset;
239         int skip = 0;
240
241         if (log->cur_entry >= log->nr_entries)
242                 return 1;
243
244         ret = read(log->logfd, entry, read_size);
245         if (ret != read_size) {
246                 fprintf(stderr, "Error reading entry: %d\n", errno);
247                 return -1;
248         }
249         if (!log_entry_valid(entry)) {
250                 fprintf(stderr, "Malformed entry @%llu\n",
251                                 log->cur_pos / log->sectorsize);
252                 return -1;
253         }
254         log->cur_entry++;
255
256         size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
257         if (read_size < log->sectorsize) {
258                 log->cur_pos = lseek(log->logfd,
259                         log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR);
260                 if (log->cur_pos == (off_t)-1) {
261                         fprintf(stderr, "Error seeking in log: %d\n", errno);
262                         return -1;
263                 }
264         } else {
265                 log->cur_pos += read_size;
266         }
267
268         flags = le64_to_cpu(entry->flags);
269         entry_flags_to_str(flags, flags_buf);
270         skip = log_should_skip(log, entry);
271         if (log_writes_verbose > 1 || (log_writes_verbose && !skip)) {
272                 printf("%s %d@%llu: sector %llu, size %llu, flags 0x%llx(%s)\n",
273                        skip ? "skipping" : "replaying",
274                        (int)log->cur_entry - 1, log->cur_pos / log->sectorsize,
275                        (unsigned long long)le64_to_cpu(entry->sector),
276                        (unsigned long long)size,
277                        (unsigned long long)flags, flags_buf);
278         }
279         if (!size)
280                 return 0;
281
282         if (flags & LOG_DISCARD_FLAG)
283                 return log_discard(log, entry);
284
285         if (skip) {
286                 log->cur_pos = lseek(log->logfd, size, SEEK_CUR);
287                 if (log->cur_pos == (off_t)-1) {
288                         fprintf(stderr, "Error seeking in log: %d\n", errno);
289                         return -1;
290                 }
291                 return 0;
292         }
293
294         buf = malloc(size);
295         if (!buf) {
296                 fprintf(stderr, "Error allocating buffer %llu entry %llu\n", (unsigned long long)size, (unsigned long long)log->cur_entry - 1);
297                 return -1;
298         }
299
300         ret = read(log->logfd, buf, size);
301         if (ret != size) {
302                 fprintf(stderr, "Error reading data: %d\n", errno);
303                 free(buf);
304                 return -1;
305         }
306         log->cur_pos += size;
307
308         offset = le64_to_cpu(entry->sector) * log->sectorsize;
309         ret = pwrite(log->replayfd, buf, size, offset);
310         free(buf);
311         if (ret != size) {
312                 fprintf(stderr, "Error writing data: %d\n", errno);
313                 return -1;
314         }
315
316         return 0;
317 }
318
319 /*
320  * @log: the log we are manipulating.
321  * @entry_num: the entry we want.
322  *
323  * Seek to the given entry in the log, starting at 0 and ending at
324  * log->nr_entries - 1.
325  */
326 int log_seek_entry(struct log *log, u64 entry_num)
327 {
328         u64 i = 0;
329
330         if (entry_num >= log->nr_entries) {
331                 fprintf(stderr, "Invalid entry number\n");
332                 return -1;
333         }
334
335         /* Skip the first sector containing the log super block */
336         log->cur_pos = lseek(log->logfd, log->sectorsize, SEEK_SET);
337         if (log->cur_pos == (off_t)-1) {
338                 fprintf(stderr, "Error seeking in file: %d\n", errno);
339                 return -1;
340         }
341
342         log->cur_entry = 0;
343         for (i = 0; i < entry_num; i++) {
344                 struct log_write_entry entry;
345                 ssize_t ret;
346                 off_t seek_size;
347                 u64 flags;
348
349                 ret = read(log->logfd, &entry, sizeof(entry));
350                 if (ret != sizeof(entry)) {
351                         fprintf(stderr, "Error reading entry: %d\n", errno);
352                         return -1;
353                 }
354                 if (!log_entry_valid(&entry)) {
355                         fprintf(stderr, "Malformed entry @%llu\n",
356                                         log->cur_pos / log->sectorsize);
357                         return -1;
358                 }
359                 if (log_writes_verbose > 1)
360                         printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx\n",
361                                (int)i, log->cur_pos / log->sectorsize,
362                                (unsigned long long)le64_to_cpu(entry.sector),
363                                (unsigned long long)le64_to_cpu(entry.nr_sectors),
364                                (unsigned long long)le64_to_cpu(entry.flags));
365                 flags = le64_to_cpu(entry.flags);
366                 seek_size = log->sectorsize - sizeof(entry);
367                 if (!(flags & LOG_DISCARD_FLAG))
368                         seek_size += le64_to_cpu(entry.nr_sectors) *
369                                 log->sectorsize;
370                 log->cur_pos = lseek(log->logfd, seek_size, SEEK_CUR);
371                 if (log->cur_pos == (off_t)-1) {
372                         fprintf(stderr, "Error seeking in file: %d\n", errno);
373                         return -1;
374                 }
375                 log->cur_entry++;
376         }
377
378         return 0;
379 }
380
381 /*
382  * @log: the log we are manipulating.
383  * @entry: the entry we read.
384  * @read_data: read the extra data for the entry, your entry must be
385  * log->sectorsize large.
386  *
387  * @return: 1 if we hit the end of the log, 0 we got the next entry, < 0 if
388  * there was an error.
389  *
390  * Seek to the next entry in the log.
391  */
392 int log_seek_next_entry(struct log *log, struct log_write_entry *entry,
393                         int read_data)
394 {
395         size_t read_size = read_data ? log->sectorsize :
396                 sizeof(struct log_write_entry);
397         u64 flags;
398         char flags_buf[LOG_FLAGS_BUF_SIZE];
399         ssize_t ret;
400
401         if (log->cur_entry >= log->nr_entries)
402                 return 1;
403
404         ret = read(log->logfd, entry, read_size);
405         if (ret != read_size) {
406                 fprintf(stderr, "Error reading entry: %d\n", errno);
407                 return -1;
408         }
409         if (!log_entry_valid(entry)) {
410                 fprintf(stderr, "Malformed entry @%llu\n",
411                                 log->cur_pos / log->sectorsize);
412                 return -1;
413         }
414         log->cur_entry++;
415
416         if (read_size < log->sectorsize) {
417                 log->cur_pos = lseek(log->logfd,
418                         log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR);
419                 if (log->cur_pos == (off_t)-1) {
420                         fprintf(stderr, "Error seeking in log: %d\n", errno);
421                         return -1;
422                 }
423         } else {
424                 log->cur_pos += read_size;
425         }
426         flags = le64_to_cpu(entry->flags);
427         entry_flags_to_str(flags, flags_buf);
428         if (log_writes_verbose > 1)
429                 printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx(%s)\n",
430                        (int)log->cur_entry - 1, log->cur_pos / log->sectorsize,
431                        (unsigned long long)le64_to_cpu(entry->sector),
432                        (unsigned long long)le64_to_cpu(entry->nr_sectors),
433                        (unsigned long long)flags, flags_buf);
434
435         read_size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
436         if (!read_size || (flags & LOG_DISCARD_FLAG))
437                 return 0;
438
439         log->cur_pos = lseek(log->logfd, read_size, SEEK_CUR);
440         if (log->cur_pos == (off_t)-1) {
441                 fprintf(stderr, "Error seeking in log: %d\n", errno);
442                 return -1;
443         }
444
445         return 0;
446 }
447
448 /*
449  * @logfile: the file that contains the write log.
450  * @replayfile: the file/device to replay onto, can be NULL.
451  *
452  * Opens a logfile and makes sure it is valid and returns a struct log.
453  */
454 struct log *log_open(char *logfile, char *replayfile)
455 {
456         struct log *log;
457         struct log_write_super super;
458         ssize_t ret;
459
460         log = malloc(sizeof(struct log));
461         if (!log) {
462                 fprintf(stderr, "Couldn't alloc log\n");
463                 return NULL;
464         }
465
466         log->replayfd = -1;
467
468         log->logfd = open(logfile, O_RDONLY);
469         if (log->logfd < 0) {
470                 fprintf(stderr, "Couldn't open log %s: %d\n", logfile,
471                         errno);
472                 log_free(log);
473                 return NULL;
474         }
475
476         if (replayfile) {
477                 log->replayfd = open(replayfile, O_WRONLY);
478                 if (log->replayfd < 0) {
479                         fprintf(stderr, "Couldn't open replay file %s: %d\n",
480                                 replayfile, errno);
481                         log_free(log);
482                         return NULL;
483                 }
484         }
485
486         ret = read(log->logfd, &super, sizeof(struct log_write_super));
487         if (ret < sizeof(struct log_write_super)) {
488                 fprintf(stderr, "Error reading super: %d\n", errno);
489                 log_free(log);
490                 return NULL;
491         }
492
493         if (le64_to_cpu(super.magic) != WRITE_LOG_MAGIC) {
494                 fprintf(stderr, "Magic doesn't match\n");
495                 log_free(log);
496                 return NULL;
497         }
498
499         if (le64_to_cpu(super.version) != WRITE_LOG_VERSION) {
500                 fprintf(stderr, "Version mismatch, wanted %d, have %d\n",
501                         WRITE_LOG_VERSION, (int)le64_to_cpu(super.version));
502                 log_free(log);
503                 return NULL;
504         }
505
506         log->sectorsize = le32_to_cpu(super.sectorsize);
507         log->nr_entries = le64_to_cpu(super.nr_entries);
508         log->max_zero_size = 128 * 1024 * 1024;
509
510         log->cur_pos = lseek(log->logfd, log->sectorsize - sizeof(super), SEEK_CUR);
511         if (log->cur_pos == (off_t) -1) {
512                 fprintf(stderr, "Error seeking to first entry: %d\n", errno);
513                 log_free(log);
514                 return NULL;
515         }
516         log->cur_entry = 0;
517
518         return log;
519 }