From 36ebc827488aa8a42548d78b573a812b6925af11 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 9 Jul 2016 21:07:56 +0200 Subject: [PATCH 1/5] Add platform.SaveFile --- src/borg/platform/__init__.py | 2 +- src/borg/platform/base.py | 44 +++++++++++++++++++++++++++++++++-- src/borg/platform/linux.pyx | 4 ++-- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/borg/platform/__init__.py b/src/borg/platform/__init__.py index 29a97fc43..e1772c5e1 100644 --- a/src/borg/platform/__init__.py +++ b/src/borg/platform/__init__.py @@ -8,7 +8,7 @@ Public APIs are documented in platform.base. from .base import acl_get, acl_set from .base import set_flags, get_flags -from .base import SyncFile, sync_dir, fdatasync +from .base import SaveFile, SyncFile, sync_dir, fdatasync from .base import swidth, API_VERSION if sys.platform.startswith('linux'): # pragma: linux only diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index ef8853e31..9580f06ba 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -80,8 +80,11 @@ class SyncFile: TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH. """ - def __init__(self, path): - self.fd = open(path, 'xb') + def __init__(self, path, binary=True): + mode = 'x' + if binary: + mode += 'b' + self.fd = open(path, mode) self.fileno = self.fd.fileno() def __enter__(self): @@ -112,6 +115,43 @@ class SyncFile: platform.sync_dir(os.path.dirname(self.fd.name)) +class SaveFile: + """ + Update file contents atomically. + + Must be used as a context manager (defining the scope of the transaction). + + On a journaling file system the file contents are always updated + atomically and won't become corrupted, even on pure failures or + crashes (for caveats see SyncFile). + """ + + SUFFIX = '.tmp' + + def __init__(self, path, binary=True): + self.binary = binary + self.path = path + self.tmppath = self.path + self.SUFFIX + + def __enter__(self): + from .. import platform + try: + os.unlink(self.tmppath) + except OSError: + pass + self.fd = platform.SyncFile(self.tmppath, self.binary) + return self.fd + + def __exit__(self, exc_type, exc_val, exc_tb): + from .. import platform + self.fd.close() + if exc_type is not None: + os.unlink(self.tmppath) + return + os.rename(self.tmppath, self.path) + platform.sync_dir(os.path.dirname(self.path)) + + def swidth(s): """terminal output width of string diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 4bbdcc356..7dea321a0 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -228,8 +228,8 @@ class SyncFile(BaseSyncFile): disk in the immediate future. """ - def __init__(self, path): - super().__init__(path) + def __init__(self, path, binary=True): + super().__init__(path, binary) self.offset = 0 self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK self.last_sync = 0 From f4be2b352313bd008707d48bacea6e70d3272294 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 9 Jul 2016 21:10:46 +0200 Subject: [PATCH 2/5] Use platform.SaveFile for repository, cache and key files Fixes #1060 --- src/borg/cache.py | 9 +++++---- src/borg/key.py | 3 ++- src/borg/repository.py | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/borg/cache.py b/src/borg/cache.py index 4dc4c2181..6dcd88d2e 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -19,6 +19,7 @@ from .helpers import yes from .item import Item from .key import PlaintextKey from .locking import UpgradableLock +from .platform import SaveFile from .remote import cache_if_remote ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') @@ -141,11 +142,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" config.set('cache', 'version', '1') config.set('cache', 'repository', self.repository.id_str) config.set('cache', 'manifest', '') - with open(os.path.join(self.path, 'config'), 'w') as fd: + with SaveFile(os.path.join(self.path, 'config'), binary=False) as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) os.makedirs(os.path.join(self.path, 'chunks.archive.d')) - with open(os.path.join(self.path, 'files'), 'wb') as fd: + with SaveFile(os.path.join(self.path, 'files')) as fd: pass # empty file def _do_open(self): @@ -212,7 +213,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" if not self.txn_active: return if self.files is not None: - with open(os.path.join(self.path, 'files'), 'wb') as fd: + with SaveFile(os.path.join(self.path, 'files')) as fd: for path_hash, item in self.files.items(): # Discard cached files with the newest mtime to avoid # issues with filesystem snapshots and mtime precision @@ -223,7 +224,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" self.config.set('cache', 'timestamp', self.manifest.timestamp) self.config.set('cache', 'key_type', str(self.key.TYPE)) self.config.set('cache', 'previous_location', self.repository._location.canonical_path()) - with open(os.path.join(self.path, 'config'), 'w') as fd: + with SaveFile(os.path.join(self.path, 'config'), binary=False) as fd: self.config.write(fd) self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8')) os.rename(os.path.join(self.path, 'txn.active'), diff --git a/src/borg/key.py b/src/borg/key.py index 6965ae737..2fd4ad3f1 100644 --- a/src/borg/key.py +++ b/src/borg/key.py @@ -22,6 +22,7 @@ from .helpers import get_keys_dir from .helpers import bin_to_hex from .helpers import CompressionDecider2, CompressionSpec from .item import Key, EncryptedKey +from .platform import SaveFile PREFIX = b'\0' * 8 @@ -470,7 +471,7 @@ class KeyfileKey(KeyfileKeyBase): def save(self, target, passphrase): key_data = self._save(passphrase) - with open(target, 'w') as fd: + with SaveFile(target, binary=False) as fd: fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id))) fd.write(key_data) fd.write('\n') diff --git a/src/borg/repository.py b/src/borg/repository.py index 6af3f87d9..9d9dc3fbf 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -23,7 +23,7 @@ from .helpers import ProgressIndicatorPercent from .helpers import bin_to_hex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache -from .platform import SyncFile, sync_dir +from .platform import SaveFile, SyncFile, sync_dir MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' @@ -160,7 +160,7 @@ class Repository: def save_config(self, path, config): config_path = os.path.join(path, 'config') - with open(config_path, 'w') as fd: + with SaveFile(config_path, binary=False) as fd: config.write(fd) def save_key(self, keydata): From dec671d8ff63878c0977b229338f4d959ed0e0bd Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 26 Jul 2016 22:39:45 +0200 Subject: [PATCH 3/5] SaveFile: os.replace instead of rename --- src/borg/platform/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index 9580f06ba..05739062e 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -148,7 +148,7 @@ class SaveFile: if exc_type is not None: os.unlink(self.tmppath) return - os.rename(self.tmppath, self.path) + os.replace(self.tmppath, self.path) platform.sync_dir(os.path.dirname(self.path)) From 863ab66908a41ad121021f317d229898a509060c Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 26 Jul 2016 22:40:23 +0200 Subject: [PATCH 4/5] SaveFile: unlink(tmppath): only ignore FileNotFoundError --- src/borg/platform/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index 05739062e..2e894289b 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -137,7 +137,7 @@ class SaveFile: from .. import platform try: os.unlink(self.tmppath) - except OSError: + except FileNotFoundError: pass self.fd = platform.SyncFile(self.tmppath, self.binary) return self.fd From 2e3fc9ddfc55e3c1350e890e9d1ba3c672a59bdc Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 26 Jul 2016 22:49:25 +0200 Subject: [PATCH 5/5] SyncFile/SaveFile: default binary=False, just like open() --- src/borg/cache.py | 8 ++++---- src/borg/key.py | 2 +- src/borg/platform/base.py | 10 ++++------ src/borg/platform/linux.pyx | 2 +- src/borg/repository.py | 4 ++-- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/borg/cache.py b/src/borg/cache.py index 6dcd88d2e..473f156b2 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -142,11 +142,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" config.set('cache', 'version', '1') config.set('cache', 'repository', self.repository.id_str) config.set('cache', 'manifest', '') - with SaveFile(os.path.join(self.path, 'config'), binary=False) as fd: + with SaveFile(os.path.join(self.path, 'config')) as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) os.makedirs(os.path.join(self.path, 'chunks.archive.d')) - with SaveFile(os.path.join(self.path, 'files')) as fd: + with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd: pass # empty file def _do_open(self): @@ -213,7 +213,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" if not self.txn_active: return if self.files is not None: - with SaveFile(os.path.join(self.path, 'files')) as fd: + with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd: for path_hash, item in self.files.items(): # Discard cached files with the newest mtime to avoid # issues with filesystem snapshots and mtime precision @@ -224,7 +224,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" self.config.set('cache', 'timestamp', self.manifest.timestamp) self.config.set('cache', 'key_type', str(self.key.TYPE)) self.config.set('cache', 'previous_location', self.repository._location.canonical_path()) - with SaveFile(os.path.join(self.path, 'config'), binary=False) as fd: + with SaveFile(os.path.join(self.path, 'config')) as fd: self.config.write(fd) self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8')) os.rename(os.path.join(self.path, 'txn.active'), diff --git a/src/borg/key.py b/src/borg/key.py index 2fd4ad3f1..b122b638c 100644 --- a/src/borg/key.py +++ b/src/borg/key.py @@ -471,7 +471,7 @@ class KeyfileKey(KeyfileKeyBase): def save(self, target, passphrase): key_data = self._save(passphrase) - with SaveFile(target, binary=False) as fd: + with SaveFile(target) as fd: fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id))) fd.write(key_data) fd.write('\n') diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index 2e894289b..da8d3bc0c 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -80,10 +80,8 @@ class SyncFile: TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH. """ - def __init__(self, path, binary=True): - mode = 'x' - if binary: - mode += 'b' + def __init__(self, path, binary=False): + mode = 'xb' if binary else 'x' self.fd = open(path, mode) self.fileno = self.fd.fileno() @@ -122,13 +120,13 @@ class SaveFile: Must be used as a context manager (defining the scope of the transaction). On a journaling file system the file contents are always updated - atomically and won't become corrupted, even on pure failures or + atomically and won't become corrupted, even on power failures or crashes (for caveats see SyncFile). """ SUFFIX = '.tmp' - def __init__(self, path, binary=True): + def __init__(self, path, binary=False): self.binary = binary self.path = path self.tmppath = self.path + self.SUFFIX diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 7dea321a0..d35b28ac2 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -228,7 +228,7 @@ class SyncFile(BaseSyncFile): disk in the immediate future. """ - def __init__(self, path, binary=True): + def __init__(self, path, binary=False): super().__init__(path, binary) self.offset = 0 self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK diff --git a/src/borg/repository.py b/src/borg/repository.py index 9d9dc3fbf..ab65dfea3 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -160,7 +160,7 @@ class Repository: def save_config(self, path, config): config_path = os.path.join(path, 'config') - with SaveFile(config_path, binary=False) as fd: + with SaveFile(config_path) as fd: config.write(fd) def save_key(self, keydata): @@ -731,7 +731,7 @@ class LoggedIO: if not os.path.exists(dirname): os.mkdir(dirname) sync_dir(os.path.join(self.path, 'data')) - self._write_fd = SyncFile(self.segment_filename(self.segment)) + self._write_fd = SyncFile(self.segment_filename(self.segment), binary=True) self._write_fd.write(MAGIC) self.offset = MAGIC_LEN return self._write_fd