diff --git a/borg/archive.py b/borg/archive.py index 31e3c0571..0bb65e099 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -1,4 +1,3 @@ -from binascii import hexlify from datetime import datetime, timezone from getpass import getuser from itertools import groupby @@ -19,8 +18,8 @@ from . import xattr from .compress import Compressor, COMPR_BUFFER from .constants import * # NOQA from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ - parse_timestamp, to_localtime, format_time, format_timedelta, \ - Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \ + parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \ + Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume from .repository import Repository @@ -176,7 +175,7 @@ class Archive: self.id = id self.metadata = self._load_meta(self.id) decode_dict(self.metadata, ARCHIVE_TEXT_KEYS) - self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']] + self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']] self.name = self.metadata[b'name'] @property @@ -194,7 +193,7 @@ class Archive: @property def fpr(self): - return hexlify(self.id).decode('ascii') + return bin_to_hex(self.id) @property def duration(self): @@ -567,7 +566,7 @@ Number of files: {0.stats.nfiles}'''.format( return status else: self.hard_links[st.st_ino, st.st_dev] = safe_path - path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) + path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) first_run = not cache.files ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) if first_run: @@ -795,7 +794,7 @@ class ArchiveChecker: for chunk_id, size, csize in item[b'chunks']: if chunk_id not in self.chunks: # If a file chunk is missing, create an all empty replacement chunk - logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size)) + logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size)) self.error_found = True data = bytes(size) chunk_id = self.key.id_hash(data) @@ -823,7 +822,7 @@ class ArchiveChecker: return _state def report(msg, chunk_id, chunk_no): - cid = hexlify(chunk_id).decode('ascii') + cid = bin_to_hex(chunk_id) msg += ' [chunk: %06d_%s]' % (chunk_no, cid) # see debug-dump-archive-items self.error_found = True logger.error(msg) @@ -882,7 +881,7 @@ class ArchiveChecker: if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, ARCHIVE_TEXT_KEYS) - archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']] + archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): @@ -1187,10 +1186,10 @@ class ArchiveRecreater: logger.info('Found %s, will resume interrupted operation', target_name) old_target = self.open_archive(target_name) resume_id = old_target.metadata[b'recreate_source_id'] - resume_args = [arg.decode('utf-8', 'surrogateescape') for arg in old_target.metadata[b'recreate_args']] + resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']] if resume_id != archive.id: logger.warning('Source archive changed, will discard %s and start over', target_name) - logger.warning('Saved fingerprint: %s', hexlify(resume_id).decode('ascii')) + logger.warning('Saved fingerprint: %s', bin_to_hex(resume_id)) logger.warning('Current fingerprint: %s', archive.fpr) old_target.delete(Statistics(), progress=self.progress) return None, None # can't resume diff --git a/borg/archiver.py b/borg/archiver.py index eea3f70aa..e0b52bc83 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1,4 +1,4 @@ -from binascii import hexlify, unhexlify +from binascii import unhexlify from datetime import datetime from itertools import zip_longest from operator import attrgetter @@ -19,7 +19,7 @@ import traceback from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \ parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ - get_cache_dir, prune_within, prune_split, \ + get_cache_dir, prune_within, prune_split, bin_to_hex, safe_encode, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ log_multi, PatternMatcher, ItemFormatter @@ -739,7 +739,7 @@ class Archiver: else: write = sys.stdout.buffer.write for item in archive.iter_items(lambda item: matcher.match(item[b'path'])): - write(formatter.format_item(item).encode('utf-8', errors='surrogateescape')) + write(safe_encode(formatter.format_item(item))) else: for archive_info in manifest.list_archive_infos(sort_by='ts'): if args.prefix and not archive_info.name.startswith(args.prefix): @@ -759,7 +759,7 @@ class Archiver: stats = archive.calc_stats(cache) print('Name:', archive.name) - print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) + print('Fingerprint: %s' % archive.fpr) print('Comment:', archive.metadata.get(b'comment', '')) print('Hostname:', archive.metadata[b'hostname']) print('Username:', archive.metadata[b'username']) @@ -901,7 +901,7 @@ class Archiver: archive = Archive(repository, key, manifest, args.location.archive) for i, item_id in enumerate(archive.metadata[b'items']): _, data = key.decrypt(item_id, repository.get(item_id)) - filename = '%06d_%s.items' % (i, hexlify(item_id).decode('ascii')) + filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) print('Dumping', filename) with open(filename, 'wb') as fd: fd.write(data) diff --git a/borg/cache.py b/borg/cache.py index 5902365c2..fff2dc33f 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -3,14 +3,14 @@ from .remote import cache_if_remote from collections import namedtuple import os import stat -from binascii import hexlify, unhexlify +from binascii import unhexlify import shutil from .key import PlaintextKey from .logger import create_logger logger = create_logger() from .helpers import Error, get_cache_dir, decode_dict, int_to_bigint, \ - bigint_to_int, format_file_size, yes + bigint_to_int, bin_to_hex, format_file_size, yes from .locking import UpgradableLock from .hashindex import ChunkIndex, ChunkIndexEntry @@ -37,13 +37,13 @@ class Cache: @staticmethod def break_lock(repository, path=None): - path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii')) + path = path or os.path.join(get_cache_dir(), repository.id_str) UpgradableLock(os.path.join(path, 'lock'), exclusive=True).break_lock() @staticmethod def destroy(repository, path=None): """destroy the cache for ``repository`` or at ``path``""" - path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii')) + path = path or os.path.join(get_cache_dir(), repository.id_str) config = os.path.join(path, 'config') if os.path.exists(config): os.remove(config) # kill config first @@ -64,7 +64,7 @@ class Cache: self.repository = repository self.key = key self.manifest = manifest - self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii')) + self.path = path or os.path.join(get_cache_dir(), repository.id_str) self.do_files = do_files # Warn user before sending data to a never seen before unencrypted repository if not os.path.exists(self.path): @@ -134,7 +134,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" config = configparser.ConfigParser(interpolation=None) config.add_section('cache') config.set('cache', 'version', '1') - config.set('cache', 'repository', hexlify(self.repository.id).decode('ascii')) + config.set('cache', 'repository', self.repository.id_str) config.set('cache', 'manifest', '') with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) @@ -214,7 +214,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" entry = FileCacheEntry(*msgpack.unpackb(item)) if entry.age < 10 and bigint_to_int(entry.mtime) < self._newest_mtime: msgpack.pack((path_hash, entry), fd) - self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii')) + self.config.set('cache', 'manifest', self.manifest.id_str) self.config.set('cache', 'timestamp', self.manifest.timestamp) self.config.set('cache', 'key_type', str(self.key.TYPE)) self.config.set('cache', 'previous_location', self.repository._location.canonical_path()) @@ -257,7 +257,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" archive_path = os.path.join(self.path, 'chunks.archive.d') def mkpath(id, suffix=''): - id_hex = hexlify(id).decode('ascii') + id_hex = bin_to_hex(id) path = os.path.join(archive_path, id_hex + suffix) return path.encode('utf-8') diff --git a/borg/helpers.py b/borg/helpers.py index 9259c8f81..994ad9a3e 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -95,6 +95,10 @@ class Manifest: self.key = key self.repository = repository + @property + def id_str(self): + return bin_to_hex(self.id) + @classmethod def load(cls, repository, key=None): from .key import key_factory @@ -658,7 +662,7 @@ def format_archive(archive): return '%-36s %s [%s]' % ( archive.name, format_time(to_localtime(archive.ts)), - hexlify(archive.id).decode('ascii'), + bin_to_hex(archive.id), ) @@ -731,6 +735,10 @@ def safe_encode(s, coding='utf-8', errors='surrogateescape'): return s.encode(coding, errors) +def bin_to_hex(binary): + return hexlify(binary).decode('ascii') + + class Location: """Object representing a repository / archive location """ diff --git a/borg/key.py b/borg/key.py index 124376d0e..ad960b796 100644 --- a/borg/key.py +++ b/borg/key.py @@ -1,4 +1,4 @@ -from binascii import hexlify, a2b_base64, b2a_base64 +from binascii import a2b_base64, b2a_base64 import configparser import getpass import os @@ -7,7 +7,7 @@ import textwrap from hmac import compare_digest from hashlib import sha256, pbkdf2_hmac -from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes +from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex from .logger import create_logger logger = create_logger() @@ -203,7 +203,7 @@ class Passphrase(str): passphrase.encode('ascii') except UnicodeEncodeError: print('Your passphrase (UTF-8 encoding in hex): %s' % - hexlify(passphrase.encode('utf-8')).decode('ascii'), + bin_to_hex(passphrase.encode('utf-8')), file=sys.stderr) print('As you have a non-ASCII passphrase, it is recommended to keep the UTF-8 encoding in hex together with the passphrase at a safe place.', file=sys.stderr) @@ -397,13 +397,12 @@ class KeyfileKey(KeyfileKeyBase): FILE_ID = 'BORG_KEY' def find_key(self): - id = hexlify(self.repository.id).decode('ascii') keys_dir = get_keys_dir() for name in os.listdir(keys_dir): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == id: + if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str: return filename raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) @@ -427,7 +426,7 @@ class KeyfileKey(KeyfileKeyBase): def save(self, target, passphrase): key_data = self._save(passphrase) with open(target, 'w') as fd: - fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii'))) + fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id))) fd.write(key_data) fd.write('\n') self.target = target diff --git a/borg/remote.py b/borg/remote.py index 051b9c0e8..5444f05bf 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -10,7 +10,7 @@ import tempfile from . import __version__ -from .helpers import Error, IntegrityError, get_home_dir, sysinfo +from .helpers import Error, IntegrityError, get_home_dir, sysinfo, bin_to_hex from .repository import Repository import msgpack @@ -191,6 +191,10 @@ class RemoteRepository: self.rollback() self.close() + @property + def id_str(self): + return bin_to_hex(self.id) + def borg_cmd(self, args, testing): """return a borg serve command line""" # give some args/options to "borg serve" process as they were given to us diff --git a/borg/repository.py b/borg/repository.py index 7a57e7cc1..3f8d5d68d 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -1,5 +1,5 @@ from configparser import ConfigParser -from binascii import hexlify, unhexlify +from binascii import unhexlify from datetime import datetime from itertools import islice import errno @@ -13,7 +13,7 @@ from zlib import crc32 import msgpack from .constants import * # NOQA -from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent +from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache @@ -83,6 +83,10 @@ class Repository: self.rollback() self.close() + @property + def id_str(self): + return bin_to_hex(self.id) + def create(self, path): """Create a new empty repository at `path` """ @@ -99,7 +103,7 @@ class Repository: config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR)) config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE)) config.set('repository', 'append_only', '0') - config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) + config.set('repository', 'id', bin_to_hex(os.urandom(32))) self.save_config(path, config) def save_config(self, path, config): diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 115e14736..77fb44277 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1,4 +1,3 @@ -from binascii import hexlify from configparser import ConfigParser import errno import os @@ -23,7 +22,7 @@ from ..archiver import Archiver from ..cache import Cache from ..constants import * # NOQA from ..crypto import bytes_to_long, num_aes_blocks -from ..helpers import Chunk, Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR +from ..helpers import Chunk, Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, bin_to_hex from ..key import KeyfileKeyBase from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository @@ -377,7 +376,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def _set_repository_id(self, path, id): config = ConfigParser(interpolation=None) config.read(os.path.join(path, 'config')) - config.set('repository', 'id', hexlify(id).decode('ascii')) + config.set('repository', 'id', bin_to_hex(id)) with open(os.path.join(path, 'config'), 'w') as fd: config.write(fd) with Repository(self.repository_path) as repository: diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 51d344818..c0b9a049a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -13,7 +13,7 @@ import time from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \ prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \ yes, TRUISH, FALSISH, DEFAULTISH, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \ + StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper from . import BaseTestCase, environment_variable, FakeInputs @@ -31,6 +31,11 @@ class BigIntTestCase(BaseTestCase): self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70) +def test_bin_to_hex(): + assert bin_to_hex(b'') == '' + assert bin_to_hex(b'\x00\x01\xff') == '0001ff' + + class TestLocationWithoutEnv: def test_ssh(self, monkeypatch): monkeypatch.delenv('BORG_REPO', raising=False) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 34d2d9a10..9e01103ad 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -6,7 +6,7 @@ from binascii import hexlify, unhexlify from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey -from ..helpers import Location, Chunk +from ..helpers import Location, Chunk, bin_to_hex from . import BaseTestCase @@ -44,6 +44,7 @@ class KeyTestCase(BaseTestCase): _location = _Location() id = bytes(32) + id_str = bin_to_hex(id) def test_plaintext(self): key = PlaintextKey.create(None, None) diff --git a/borg/upgrader.py b/borg/upgrader.py index f50b3f9e5..d0ea9680d 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -1,4 +1,3 @@ -from binascii import hexlify import datetime import logging logger = logging.getLogger(__name__) @@ -189,8 +188,8 @@ class AtticRepositoryUpgrader(Repository): attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', os.path.join(get_home_dir(), '.cache', 'attic')) - attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) - borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + attic_cache_dir = os.path.join(attic_cache_dir, self.id_str) + borg_cache_dir = os.path.join(get_cache_dir(), self.id_str) def copy_cache_file(path): """copy the given attic cache path into the borg directory @@ -264,7 +263,6 @@ class AtticKeyfileKey(KeyfileKey): assume the repository has been opened by the archiver yet """ get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() if not os.path.exists(keys_dir): raise KeyfileNotFoundError(repository.path, keys_dir) @@ -272,7 +270,7 @@ class AtticKeyfileKey(KeyfileKey): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: + if line and line.startswith(cls.FILE_ID) and line[10:] == repository.id_str: return filename raise KeyfileNotFoundError(repository.path, keys_dir) @@ -314,7 +312,6 @@ class Borg0xxKeyfileKey(KeyfileKey): @classmethod def find_key_file(cls, repository): get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() if not os.path.exists(keys_dir): raise KeyfileNotFoundError(repository.path, keys_dir) @@ -322,6 +319,6 @@ class Borg0xxKeyfileKey(KeyfileKey): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID) + 1:] == id: + if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID) + 1:] == repository.id_str: return filename raise KeyfileNotFoundError(repository.path, keys_dir)