diff --git a/src/bin/pg_basebackup/astreamer_inject.c b/src/bin/pg_basebackup/astreamer_inject.c index 15334e458ad..3fb3d4ecfd5 100644 --- a/src/bin/pg_basebackup/astreamer_inject.c +++ b/src/bin/pg_basebackup/astreamer_inject.c @@ -224,8 +224,9 @@ astreamer_inject_file(astreamer *streamer, char *pathname, char *data, strlcpy(member.pathname, pathname, MAXPGPATH); member.size = len; member.mode = pg_file_create_mode; + member.is_regular = true; member.is_directory = false; - member.is_link = false; + member.is_symlink = false; member.linktarget[0] = '\0'; /* diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 8456992c33c..4293e20b20e 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -42,6 +42,7 @@ #include "pg_backup_archiver.h" #include "pg_backup_db.h" #include "pg_backup_utils.h" +#include "pgtar.h" #define TEXT_DUMP_HEADER "--\n-- PostgreSQL database dump\n--\n\n" #define TEXT_DUMPALL_HEADER "--\n-- PostgreSQL database cluster dump\n--\n\n" @@ -2349,7 +2350,7 @@ _discoverArchiveFormat(ArchiveHandle *AH) } if (!isValidTarHeader(AH->lookahead)) - pg_fatal("input file does not appear to be a valid archive"); + pg_fatal("input file does not appear to be a valid tar archive"); AH->format = archTar; } diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 325b53fc9bd..c01d450697f 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -464,8 +464,6 @@ extern void InitArchiveFmt_Null(ArchiveHandle *AH); extern void InitArchiveFmt_Directory(ArchiveHandle *AH); extern void InitArchiveFmt_Tar(ArchiveHandle *AH); -extern bool isValidTarHeader(char *header); - extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname); extern void IssueCommandPerBlob(ArchiveHandle *AH, TocEntry *te, const char *cmdBegin, const char *cmdEnd); diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index b5ba3b46dd9..ec42a2cb19d 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -984,31 +984,6 @@ tarPrintf(TAR_MEMBER *th, const char *fmt,...) return (int) cnt; } -bool -isValidTarHeader(char *header) -{ - int sum; - int chk = tarChecksum(header); - - sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8); - - if (sum != chk) - return false; - - /* POSIX tar format */ - if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 && - memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0) - return true; - /* GNU tar format */ - if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar \0", 8) == 0) - return true; - /* not-quite-POSIX format written by pre-9.3 pg_dump */ - if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0) - return true; - - return false; -} - /* Given the member, write the TAR header & copy the file */ static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th) diff --git a/src/bin/pg_verifybackup/astreamer_verify.c b/src/bin/pg_verifybackup/astreamer_verify.c index 33cf67670a7..ba49a0520e2 100644 --- a/src/bin/pg_verifybackup/astreamer_verify.c +++ b/src/bin/pg_verifybackup/astreamer_verify.c @@ -165,7 +165,7 @@ member_verify_header(astreamer *streamer, astreamer_member *member) char pathname[MAXPGPATH]; /* We are only interested in normal files. */ - if (member->is_directory || member->is_link) + if (!member->is_regular) return; /* diff --git a/src/fe_utils/astreamer_file.c b/src/fe_utils/astreamer_file.c index b6cbd343f99..e1b339ecc8b 100644 --- a/src/fe_utils/astreamer_file.c +++ b/src/fe_utils/astreamer_file.c @@ -228,9 +228,13 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member, mystreamer->filename[fnamelen - 1] = '\0'; /* Dispatch based on file type. */ - if (member->is_directory) + if (member->is_regular) + mystreamer->file = + create_file_for_extract(mystreamer->filename, + member->mode); + else if (member->is_directory) extract_directory(mystreamer->filename, member->mode); - else if (member->is_link) + else if (member->is_symlink) { const char *linktarget = member->linktarget; @@ -238,10 +242,6 @@ astreamer_extractor_content(astreamer *streamer, astreamer_member *member, linktarget = mystreamer->link_map(linktarget); extract_link(mystreamer->filename, linktarget); } - else - mystreamer->file = - create_file_for_extract(mystreamer->filename, - member->mode); /* Report output file change. */ if (mystreamer->report_output_file) diff --git a/src/fe_utils/astreamer_tar.c b/src/fe_utils/astreamer_tar.c index 8390c0b49f1..ba446553e12 100644 --- a/src/fe_utils/astreamer_tar.c +++ b/src/fe_utils/astreamer_tar.c @@ -237,12 +237,16 @@ astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, /* * We've seen an end-of-archive indicator, so anything more is - * buffered and sent as part of the archive trailer. But we - * don't expect more than 2 blocks. + * buffered and sent as part of the archive trailer. + * + * Per POSIX, the last physical block of a tar archive is + * always full-sized, so there may be undefined data after the + * two zero blocks that mark end-of-archive. GNU tar, for + * example, zero-pads to a 10kB boundary by default. We just + * buffer whatever we receive and pass it along at finalize + * time. */ astreamer_buffer_bytes(streamer, &data, &len, len); - if (len > 2 * TAR_BLOCK_SIZE) - pg_fatal("tar file trailer exceeds 2 blocks"); return; default: @@ -256,7 +260,8 @@ astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, * Parse a file header within a tar stream. * * The return value is true if we found a file header and passed it on to the - * next astreamer; it is false if we have reached the archive trailer. + * next astreamer; it is false if we have found the archive trailer. + * We throw error if we see invalid data. */ static bool astreamer_tar_header(astreamer_tar_parser *mystreamer) @@ -268,6 +273,9 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer) Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE); + /* Zero out fields of *member, just for consistency. */ + memset(member, 0, sizeof(astreamer_member)); + /* Check whether we've got a block of all zero bytes. */ for (i = 0; i < TAR_BLOCK_SIZE; ++i) { @@ -285,6 +293,12 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer) if (!has_nonzero_byte) return false; + /* + * Verify that we have a reasonable-looking header. + */ + if (!isValidTarHeader(buffer)) + pg_fatal("input file does not appear to be a valid tar archive"); + /* * Parse key fields out of the header. */ @@ -295,12 +309,28 @@ astreamer_tar_header(astreamer_tar_parser *mystreamer) member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8); member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8); member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8); - member->is_directory = - (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY); - member->is_link = - (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK); - if (member->is_link) - strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100); + + switch (buffer[TAR_OFFSET_TYPEFLAG]) + { + case TAR_FILETYPE_PLAIN: + case TAR_FILETYPE_PLAIN_OLD: + member->is_regular = true; + break; + case TAR_FILETYPE_DIRECTORY: + member->is_directory = true; + break; + case TAR_FILETYPE_SYMLINK: + member->is_symlink = true; + strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100); + break; + case TAR_FILETYPE_PAX_EXTENDED: + case TAR_FILETYPE_PAX_EXTENDED_GLOBAL: + pg_fatal("pax extensions to tar format are not supported"); + break; + default: + /* For special filetypes, set none of the three is_xxx flags */ + break; + } /* Compute number of padding bytes. */ mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size); diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h index 0e0031741fa..005141ea8b3 100644 --- a/src/include/fe_utils/astreamer.h +++ b/src/include/fe_utils/astreamer.h @@ -83,8 +83,10 @@ typedef struct mode_t mode; uid_t uid; gid_t gid; + /* note: special filetypes will set none of these flags */ + bool is_regular; bool is_directory; - bool is_link; + bool is_symlink; char linktarget[MAXPGPATH]; } astreamer_member; diff --git a/src/include/pgtar.h b/src/include/pgtar.h index b2677578a3d..84c98dd3f6e 100644 --- a/src/include/pgtar.h +++ b/src/include/pgtar.h @@ -55,11 +55,15 @@ enum tarHeaderOffset /* last 12 bytes of the 512-byte block are unassigned */ }; +/* See POSIX (not all the standard file type codes are listed here) */ enum tarFileType { TAR_FILETYPE_PLAIN = '0', + TAR_FILETYPE_PLAIN_OLD = '\0', /* backwards compatibility, per POSIX */ TAR_FILETYPE_SYMLINK = '2', TAR_FILETYPE_DIRECTORY = '5', + TAR_FILETYPE_PAX_EXTENDED = 'x', + TAR_FILETYPE_PAX_EXTENDED_GLOBAL = 'g', }; extern enum tarError tarCreateHeader(char *h, const char *filename, @@ -68,7 +72,8 @@ extern enum tarError tarCreateHeader(char *h, const char *filename, time_t mtime); extern uint64 read_tar_number(const char *s, int len); extern void print_tar_number(char *s, int len, uint64 val); -extern int tarChecksum(char *header); +extern int tarChecksum(const char *header); +extern bool isValidTarHeader(const char *header); /* * Compute the number of padding bytes required for an entry in a tar diff --git a/src/port/tar.c b/src/port/tar.c index 592b4fb7b0f..fee9dbbf5be 100644 --- a/src/port/tar.c +++ b/src/port/tar.c @@ -87,7 +87,7 @@ read_tar_number(const char *s, int len) * be 512 bytes, per the tar standard. */ int -tarChecksum(char *header) +tarChecksum(const char *header) { int i, sum; @@ -95,15 +95,44 @@ tarChecksum(char *header) /* * Per POSIX, the checksum is the simple sum of all bytes in the header, * treating the bytes as unsigned, and treating the checksum field (at - * offset 148) as though it contained 8 spaces. + * offset TAR_OFFSET_CHECKSUM) as though it contained 8 spaces. */ sum = 8 * ' '; /* presumed value for checksum field */ - for (i = 0; i < 512; i++) - if (i < 148 || i >= 156) + for (i = 0; i < TAR_BLOCK_SIZE; i++) + if (i < TAR_OFFSET_CHECKSUM || i >= TAR_OFFSET_CHECKSUM + 8) sum += 0xFF & header[i]; return sum; } +/* + * Check validity of a tar header (assumed to be 512 bytes long). + * We verify the checksum and the magic number / version. + */ +bool +isValidTarHeader(const char *header) +{ + int sum; + int chk = tarChecksum(header); + + sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8); + + if (sum != chk) + return false; + + /* POSIX tar format */ + if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 && + memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0) + return true; + /* GNU tar format */ + if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar \0", 8) == 0) + return true; + /* not-quite-POSIX format written by pre-9.3 pg_dump */ + if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0) + return true; + + return false; +} + /* * Fill in the buffer pointed to by h with a tar format header. This buffer