f1da72fb26ccdaa7ae2587e7552f064621f76693
[xfstests-dev.git] / src / fssum.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2012 STRATO AG.  All rights reserved.
4  */
5 #ifdef __linux__
6 #define _BSD_SOURCE
7 #define _DEFAULT_SOURCE
8 #define _LARGEFILE64_SOURCE
9 #ifndef _GNU_SOURCE
10 #define _GNU_SOURCE
11 #endif
12 #endif
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <unistd.h>
16 #include <string.h>
17 #include <fcntl.h>
18 #include <dirent.h>
19 #include <errno.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/sysmacros.h>
23 #ifdef __SOLARIS__
24 #include <sys/mkdev.h>
25 #endif
26 #include "md5.h"
27 #include <netinet/in.h>
28 #include <inttypes.h>
29 #include <assert.h>
30 #include <endian.h>
31
32 #define CS_SIZE 16
33 #define CHUNKS  128
34
35 #ifdef __linux__
36 #ifndef SEEK_DATA
37 #define SEEK_DATA 3
38 #define SEEK_HOLE 4
39 #endif
40 #endif
41
42 /* TODO: add hardlink recognition */
43 /* TODO: add xattr/acl */
44
45 struct excludes {
46         char *path;
47         int len;
48 };
49
50 typedef struct _sum {
51         MD5_CTX         md5;
52         unsigned char   out[16];
53 } sum_t;
54
55 typedef int (*sum_file_data_t)(int fd, sum_t *dst);
56
57 int gen_manifest = 0;
58 int in_manifest = 0;
59 char *checksum = NULL;
60 struct excludes *excludes;
61 int n_excludes = 0;
62 int verbose = 0;
63 FILE *out_fp;
64 FILE *in_fp;
65
66 enum _flags {
67         FLAG_UID,
68         FLAG_GID,
69         FLAG_MODE,
70         FLAG_ATIME,
71         FLAG_MTIME,
72         FLAG_CTIME,
73         FLAG_DATA,
74         FLAG_OPEN_ERROR,
75         FLAG_STRUCTURE,
76         NUM_FLAGS
77 };
78
79 const char flchar[] = "ugoamcdes";
80 char line[65536];
81
82 int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0};
83
84 char *
85 getln(char *buf, int size, FILE *fp)
86 {
87         char *p;
88         int l;
89
90         p = fgets(buf, size, fp);
91         if (!p)
92                 return NULL;
93
94         l = strlen(p);
95         while(l > 0  && (p[l - 1] == '\n' || p[l - 1] == '\r'))
96                 p[--l] = 0;
97
98         return p;
99 }
100
101 void
102 parse_flag(int c)
103 {
104         int i;
105         int is_upper = 0;
106
107         if (c >= 'A' && c <= 'Z') {
108                 is_upper = 1;
109                 c += 'a' - 'A';
110         }
111         for (i = 0; flchar[i]; ++i) {
112                 if (flchar[i] == c) {
113                         flags[i] = is_upper ? 0 : 1;
114                         return;
115                 }
116         }
117         fprintf(stderr, "unrecognized flag %c\n", c);
118         exit(-1);
119 }
120
121 void
122 parse_flags(char *p)
123 {
124         while (*p)
125                 parse_flag(*p++);
126 }
127
128 void
129 usage(void)
130 {
131         fprintf(stderr, "usage: fssum <options> <path>\n");
132         fprintf(stderr, "  options:\n");
133         fprintf(stderr, "    -f          : write out a full manifest file\n");
134         fprintf(stderr, "    -w <file>   : send output to file\n");
135         fprintf(stderr, "    -v          : verbose mode (debugging only)\n");
136         fprintf(stderr,
137                 "    -r <file>   : read checksum or manifest from file\n");
138         fprintf(stderr, "    -[ugoamcde] : specify which fields to include in checksum calculation.\n");
139         fprintf(stderr, "         u      : include uid\n");
140         fprintf(stderr, "         g      : include gid\n");
141         fprintf(stderr, "         o      : include mode\n");
142         fprintf(stderr, "         m      : include mtime\n");
143         fprintf(stderr, "         a      : include atime\n");
144         fprintf(stderr, "         c      : include ctime\n");
145         fprintf(stderr, "         d      : include file data\n");
146         fprintf(stderr, "         e      : include open errors (aborts otherwise)\n");
147         fprintf(stderr, "         s      : include block structure (holes)\n");
148         fprintf(stderr, "    -[UGOAMCDES]: exclude respective field from calculation\n");
149         fprintf(stderr, "    -n          : reset all flags\n");
150         fprintf(stderr, "    -N          : set all flags\n");
151         fprintf(stderr, "    -x path     : exclude path when building checksum (multiple ok)\n");
152         fprintf(stderr, "    -h          : this help\n\n");
153         fprintf(stderr, "The default field mask is ugoamCdES. If the checksum/manifest is read from a\n");
154         fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
155         fprintf(stderr, "are ignored.\n");
156         exit(-1);
157 }
158
159 static char buf[65536];
160
161 void *
162 alloc(size_t sz)
163 {
164         void *p = malloc(sz);
165
166         if (!p) {
167                 fprintf(stderr, "malloc failed\n");
168                 exit(-1);
169         }
170
171         return p;
172 }
173
174 void
175 sum_init(sum_t *cs)
176 {
177         MD5_Init(&cs->md5);
178 }
179
180 void
181 sum_fini(sum_t *cs)
182 {
183         MD5_Final(cs->out, &cs->md5);
184 }
185
186 void
187 sum_add(sum_t *cs, void *buf, int size)
188 {
189         MD5_Update(&cs->md5, buf, size);
190 }
191
192 void
193 sum_add_sum(sum_t *dst, sum_t *src)
194 {
195         sum_add(dst, src->out, sizeof(src->out));
196 }
197
198 void
199 sum_add_u64(sum_t *dst, uint64_t val)
200 {
201         uint64_t v = htobe64(val);
202         sum_add(dst, &v, sizeof(v));
203 }
204
205 void
206 sum_add_time(sum_t *dst, time_t t)
207 {
208         sum_add_u64(dst, t);
209 }
210
211 char *
212 sum_to_string(sum_t *dst)
213 {
214         int i;
215         char *s = alloc(CS_SIZE * 2 + 1);
216
217         for (i = 0; i < CS_SIZE; ++i)
218                 sprintf(s + i * 2, "%02x", dst->out[i]);
219
220         return s;
221 }
222
223 int
224 sum_file_data_permissive(int fd, sum_t *dst)
225 {
226         int ret;
227
228         while (1) {
229                 ret = read(fd, buf, sizeof(buf));
230                 if (ret < 0)
231                         return -errno;
232                 sum_add(dst, buf, ret);
233                 if (ret < sizeof(buf))
234                         break;
235         }
236         return 0;
237 }
238
239 int
240 sum_file_data_strict(int fd, sum_t *dst)
241 {
242         int ret;
243         off_t pos;
244
245         pos = lseek(fd, 0, SEEK_CUR);
246         if (pos == (off_t)-1)
247                 return errno == ENXIO ? 0 : -2;
248
249         while (1) {
250                 pos = lseek(fd, pos, SEEK_DATA);
251                 if (pos == (off_t)-1)
252                         return errno == ENXIO ? 0 : -2;
253                 ret = read(fd, buf, sizeof(buf));
254                 assert(ret); /* eof found by lseek */
255                 if (ret <= 0)
256                         return ret;
257                 if (verbose >= 2)
258                         fprintf(stderr,
259                                 "adding to sum at file offset %llu, %d bytes\n",
260                                 (unsigned long long)pos, ret);
261                 sum_add_u64(dst, (uint64_t)pos);
262                 sum_add(dst, buf, ret);
263                 pos += ret;
264         }
265 }
266
267 char *
268 escape(char *in)
269 {
270         char *out = alloc(strlen(in) * 3 + 1);
271         char *src = in;
272         char *dst = out;
273
274         for (; *src; ++src) {
275                 if (*src >= 32 && *src < 127 && *src != '\\') {
276                         *dst++ = *src;
277                 } else {
278                         sprintf(dst, "\\%02x", (unsigned char)*src);
279                         dst += 3;
280                 }
281         }
282         *dst = 0;
283
284         return out;
285 }
286
287 void
288 excess_file(const char *fn)
289 {
290         printf("only in local fs: %s\n", fn);
291 }
292
293 void
294 missing_file(const char *fn)
295 {
296         printf("only in remote fs: %s\n", fn);
297 }
298
299 int
300 pathcmp(const char *a, const char *b)
301 {
302         int len_a = strlen(a);
303         int len_b = strlen(b);
304
305         /*
306          * as the containing directory is sent after the files, it has to
307          * come out bigger in the comparison.
308          */
309         if (len_a < len_b && a[len_a - 1] == '/' && strncmp(a, b, len_a) == 0)
310                 return 1;
311         if (len_a > len_b && b[len_b - 1] == '/' && strncmp(a, b, len_b) == 0)
312                 return -1;
313
314         return strcmp(a, b);
315 }
316
317 void
318 check_match(char *fn, char *local_m, char *remote_m,
319             char *local_c, char *remote_c)
320 {
321         int match_m = !strcmp(local_m, remote_m);
322         int match_c = !strcmp(local_c, remote_c);
323
324         if (match_m && !match_c) {
325                 printf("data mismatch in %s\n", fn);
326         } else if (!match_m && match_c) {
327                 printf("metadata mismatch in %s\n", fn);
328         } else if (!match_m && !match_c) {
329                 printf("metadata and data mismatch in %s\n", fn);
330         }
331 }
332
333 char *prev_fn;
334 char *prev_m;
335 char *prev_c;
336 void
337 check_manifest(char *fn, char *m, char *c, int last_call)
338 {
339         char *rem_m;
340         char *rem_c;
341         char *l;
342         int cmp;
343
344         if (prev_fn) {
345                 if (last_call)
346                         cmp = -1;
347                 else
348                         cmp = pathcmp(prev_fn, fn);
349                 if (cmp > 0) {
350                         excess_file(fn);
351                         return;
352                 } else if (cmp < 0) {
353                         missing_file(prev_fn);
354                 } else {
355                         check_match(fn, m, prev_m, c, prev_c);
356                 }
357                 free(prev_fn);
358                 free(prev_m);
359                 free(prev_c);
360                 prev_fn = NULL;
361                 prev_m = NULL;
362                 prev_c = NULL;
363                 if (cmp == 0)
364                         return;
365         }
366         while ((l = getln(line, sizeof(line), in_fp))) {
367                 rem_c = strrchr(l, ' ');
368                 if (!rem_c) {
369                         /* final cs */
370                         checksum = strdup(l);
371                         break;
372                 }
373                 if (rem_c == l) {
374 malformed:
375                         fprintf(stderr, "malformed input\n");
376                         exit(-1);
377                 }
378                 *rem_c++ = 0;
379                 rem_m = strrchr(l, ' ');
380                 if (!rem_m)
381                         goto malformed;
382                 *rem_m++ = 0;
383
384                 if (last_call)
385                         cmp = -1;
386                 else
387                         cmp = pathcmp(l, fn);
388                 if (cmp == 0) {
389                         check_match(fn, m, rem_m, c, rem_c);
390                         return;
391                 } else if (cmp > 0) {
392                         excess_file(fn);
393                         prev_fn = strdup(l);
394                         prev_m = strdup(rem_m);
395                         prev_c = strdup(rem_c); 
396                         return;
397                 }
398                 missing_file(l);
399         }
400         if (!last_call)
401                 excess_file(fn);
402 }
403
404 int
405 namecmp(const void *aa, const void *bb)
406 {
407         char * const *a = aa;
408         char * const *b = bb;
409
410         return strcmp(*a, *b);
411 }
412
413 void
414 sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
415 {
416         DIR *d;
417         struct dirent *de;
418         char **namelist = NULL;
419         int alloclen = 0;
420         int entries = 0;
421         int i;
422         int ret;
423         int fd;
424         int excl;
425         sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
426                         sum_file_data_strict : sum_file_data_permissive;
427
428         d = fdopendir(dirfd);
429         if (!d) {
430                 perror("opendir");
431                 exit(-1);
432         }
433         while((de = readdir(d))) {
434                 if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
435                         continue;
436                 if (entries == alloclen) {
437                         alloclen += CHUNKS;
438                         namelist = realloc(namelist,
439                                            alloclen * sizeof(*namelist));
440                         if (!namelist) {
441                                 fprintf(stderr, "malloc failed\n");
442                                 exit(-1);
443                         }
444                 }
445                 namelist[entries] = strdup(de->d_name);
446                 if (!namelist[entries]) {
447                         fprintf(stderr, "malloc failed\n");
448                         exit(-1);
449                 }
450                 ++entries;
451         }
452         qsort(namelist, entries, sizeof(*namelist), namecmp);
453         for (i = 0; i < entries; ++i) {
454                 struct stat64 st;
455                 sum_t cs;
456                 sum_t meta;
457                 char *path;
458
459                 sum_init(&cs);
460                 sum_init(&meta);
461                 path = alloc(strlen(path_in) + strlen(namelist[i]) + 3);
462                 sprintf(path, "%s/%s", path_in, namelist[i]);
463                 for (excl = 0; excl < n_excludes; ++excl) {
464                         if (strncmp(excludes[excl].path, path,
465                             excludes[excl].len) == 0)
466                                 goto next;
467                 }
468
469                 ret = fchdir(dirfd);
470                 if (ret == -1) {
471                         perror("fchdir");
472                         exit(-1);
473                 }
474                 ret = lstat64(namelist[i], &st);
475                 if (ret) {
476                         fprintf(stderr, "stat failed for %s/%s: %s\n",
477                                 path_prefix, path, strerror(errno));
478                         exit(-1);
479                 }
480                 sum_add_u64(&meta, level);
481                 sum_add(&meta, namelist[i], strlen(namelist[i]));
482                 if (!S_ISDIR(st.st_mode))
483                         sum_add_u64(&meta, st.st_nlink);
484                 if (flags[FLAG_UID])
485                         sum_add_u64(&meta, st.st_uid);
486                 if (flags[FLAG_GID])
487                         sum_add_u64(&meta, st.st_gid);
488                 if (flags[FLAG_MODE])
489                         sum_add_u64(&meta, st.st_mode);
490                 if (flags[FLAG_ATIME])
491                         sum_add_time(&meta, st.st_atime);
492                 if (flags[FLAG_MTIME])
493                         sum_add_time(&meta, st.st_mtime);
494                 if (flags[FLAG_CTIME])
495                         sum_add_time(&meta, st.st_ctime);
496                 if (S_ISDIR(st.st_mode)) {
497                         fd = openat(dirfd, namelist[i], 0);
498                         if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
499                                 sum_add_u64(&meta, errno);
500                         } else if (fd == -1) {
501                                 fprintf(stderr, "open failed for %s/%s: %s\n",
502                                         path_prefix, path, strerror(errno));
503                                 exit(-1);
504                         } else {
505                                 sum(fd, level + 1, &cs, path_prefix, path);
506                                 close(fd);
507                         }
508                 } else if (S_ISREG(st.st_mode)) {
509                         sum_add_u64(&meta, st.st_size);
510                         if (flags[FLAG_DATA]) {
511                                 if (verbose)
512                                         fprintf(stderr, "file %s\n",
513                                                 namelist[i]);
514                                 fd = openat(dirfd, namelist[i], 0);
515                                 if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
516                                         sum_add_u64(&meta, errno);
517                                 } else if (fd == -1) {
518                                         fprintf(stderr,
519                                                 "open failed for %s/%s: %s\n",
520                                                 path_prefix, path,
521                                                 strerror(errno));
522                                         exit(-1);
523                                 }
524                                 if (fd != -1) {
525                                         ret = sum_file_data(fd, &cs);
526                                         if (ret < 0) {
527                                                 fprintf(stderr,
528                                                         "read failed for "
529                                                         "%s/%s: %s\n",
530                                                         path_prefix, path,
531                                                         strerror(errno));
532                                                 exit(-1);
533                                         }
534                                         close(fd);
535                                 }
536                         }
537                 } else if (S_ISLNK(st.st_mode)) {
538                         ret = readlink(namelist[i], buf, sizeof(buf));
539                         if (ret == -1) {
540                                 perror("readlink");
541                                 exit(-1);
542                         }
543                         sum_add(&cs, buf, ret);
544                 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
545                         sum_add_u64(&cs, major(st.st_rdev));
546                         sum_add_u64(&cs, minor(st.st_rdev));
547                 }
548                 sum_fini(&cs);
549                 sum_fini(&meta);
550                 if (gen_manifest || in_manifest) {
551                         char *fn;
552                         char *m;
553                         char *c;
554
555                         if (S_ISDIR(st.st_mode))
556                                 strcat(path, "/");
557                         fn = escape(path);
558                         m = sum_to_string(&meta);
559                         c = sum_to_string(&cs);
560
561                         if (gen_manifest)
562                                 fprintf(out_fp, "%s %s %s\n", fn, m, c);
563                         if (in_manifest)
564                                 check_manifest(fn, m, c, 0);
565                         free(c);
566                         free(m);
567                         free(fn);
568                 }
569                 sum_add_sum(dircs, &cs);
570                 sum_add_sum(dircs, &meta);
571 next:
572                 free(path);
573         }
574 }
575
576 int
577 main(int argc, char *argv[])
578 {
579         extern char *optarg;
580         extern int optind;
581         int     c;
582         char *path;
583         int fd;
584         sum_t cs;
585         char flagstring[sizeof(flchar)];
586         int i;
587         int plen;
588         int elen;
589         int n_flags = 0;
590         const char *allopts = "heEfuUgGoOaAmMcCdDsSnNw:r:vx:";
591
592         out_fp = stdout;
593         while ((c = getopt(argc, argv, allopts)) != EOF) {
594                 switch(c) {
595                 case 'f':
596                         gen_manifest = 1;
597                         break;
598                 case 'u':
599                 case 'U':
600                 case 'g':
601                 case 'G':
602                 case 'o':
603                 case 'O':
604                 case 'a':
605                 case 'A':
606                 case 'm':
607                 case 'M':
608                 case 'c':
609                 case 'C':
610                 case 'd':
611                 case 'D':
612                 case 'e':
613                 case 'E':
614                 case 's':
615                 case 'S':
616                         ++n_flags;
617                         parse_flag(c);
618                         break;
619                 case 'n':
620                         for (i = 0; i < NUM_FLAGS; ++i)
621                                 flags[i] = 0;
622                         break;
623                 case 'N':
624                         for (i = 0; i < NUM_FLAGS; ++i)
625                                 flags[i] = 1;
626                         break;
627                 case 'w':
628                         out_fp = fopen(optarg, "w");
629                         if (!out_fp) {
630                                 fprintf(stderr,
631                                         "failed to open output file: %s\n",
632                                         strerror(errno));
633                                 exit(-1);
634                         }
635                         break;
636                 case 'r':
637                         in_fp = fopen(optarg, "r");
638                         if (!in_fp) {
639                                 fprintf(stderr,
640                                         "failed to open input file: %s\n",
641                                         strerror(errno));
642                                 exit(-1);
643                         }
644                         break;
645                 case 'x':
646                         ++n_excludes;
647                         excludes = realloc(excludes,
648                                            sizeof(*excludes) * n_excludes);
649                         if (!excludes) {
650                                 fprintf(stderr,
651                                         "failed to alloc exclude space\n");
652                                 exit(-1);
653                         }
654                         excludes[n_excludes - 1].path = optarg;
655                         break;
656                 case 'v':
657                         ++verbose;
658                         break;
659                 case 'h':
660                 case '?':
661                         usage();
662                 }
663         }
664
665         if (optind + 1 != argc) {
666                 fprintf(stderr, "missing path\n");
667                 usage();
668         }
669
670         if (in_fp) {
671                 char *l = getln(line, sizeof(line), in_fp);
672                 char *p;
673
674                 if (l == NULL) {
675                         fprintf(stderr, "failed to read line from input\n");
676                         exit(-1);
677                 }
678                 if (strncmp(l, "Flags: ", 7) == 0) {
679                         l += 7;
680                         in_manifest = 1;
681                         parse_flags(l);
682                 } else if ((p = strchr(l, ':'))) {
683                         *p++ = 0;
684                         parse_flags(l);
685                         checksum = strdup(p);
686                 } else {
687                         fprintf(stderr, "invalid input file format\n");
688                         exit(-1);
689                 }
690                 if (n_flags)
691                         fprintf(stderr, "warning: "
692                                 "command line flags ignored in -r mode\n");
693         }
694         strcpy(flagstring, flchar);
695         for (i = 0; i < NUM_FLAGS; ++i) {
696                 if (flags[i] == 0)
697                         flagstring[i] -= 'a' - 'A';
698         }
699
700         path = argv[optind];
701         plen = strlen(path);
702         if (path[plen - 1] == '/') {
703                 --plen;
704                 path[plen] = '\0';
705         }
706
707         for (i = 0; i < n_excludes; ++i) {
708                 if (strncmp(path, excludes[i].path, plen) != 0)
709                         fprintf(stderr,
710                                 "warning: exclude %s outside of path %s\n",
711                                 excludes[i].path, path);
712                 else
713                         excludes[i].path += plen;
714                 elen = strlen(excludes[i].path);
715                 if (excludes[i].path[elen - 1] == '/')
716                         --elen;
717                 excludes[i].path[elen] = '\0';
718                 excludes[i].len = elen;
719         }
720
721         fd = open(path, O_RDONLY);
722         if (fd == -1) {
723                 fprintf(stderr, "failed to open %s: %s\n", path,
724                         strerror(errno));
725                 exit(-1);
726         }
727
728         if (gen_manifest)
729                 fprintf(out_fp, "Flags: %s\n", flagstring);
730
731         sum_init(&cs);
732         sum(fd, 1, &cs, path, "");
733         sum_fini(&cs);
734
735         close(fd);
736         if (in_manifest)
737                 check_manifest("", "", "", 1);
738
739         if (!checksum) {
740                 if (in_manifest) {
741                         fprintf(stderr, "malformed input\n");
742                         exit(-1);
743                 }
744                 if (!gen_manifest)
745                         fprintf(out_fp, "%s:", flagstring);
746
747                 fprintf(out_fp, "%s\n", sum_to_string(&cs));
748         } else {
749                 if (strcmp(checksum, sum_to_string(&cs)) == 0) {
750                         printf("OK\n");
751                         exit(0);
752                 } else {
753                         printf("FAIL\n");
754                         exit(1);
755                 }
756         }
757
758         exit(0);
759 }