Merge branch '1.0-maint'

This commit is contained in:
Thomas Waldmann 2016-04-16 16:58:43 +02:00
commit 4de14fdd28
12 changed files with 481 additions and 182 deletions

View file

@ -84,12 +84,12 @@ Easy to use
Initialize a new backup repository and create a backup archive::
$ borg init /mnt/backup
$ borg create /mnt/backup::Saturday1 ~/Documents
$ borg init /path/to/repo
$ borg create /path/to/repo::Saturday1 ~/Documents
Now doing another backup, just to show off the great deduplication::
$ borg create -v --stats /mnt/backup::Saturday2 ~/Documents
$ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
-----------------------------------------------------------------------------
Archive name: Saturday2
Archive fingerprint: 622b7c53c...

5
Vagrantfile vendored
View file

@ -132,7 +132,7 @@ def packages_netbsd
touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..."
mozilla-rootcerts install
pkg_add pkg-config # avoids some "pkg-config missing" error msg, even without fuse
# pkg_add fuse # llfuse 0.41.1 supports netbsd, but is still buggy.
# pkg_add fuse # llfuse supports netbsd, but is still buggy.
# https://bitbucket.org/nikratio/python-llfuse/issues/70/perfuse_open-setsockopt-no-buffer-space
pkg_add python34 py34-setuptools
ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python
@ -200,9 +200,8 @@ def install_borg(boxname)
rm -f borg/*.so borg/*.cpy*
rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
pip install 'llfuse<0.41' # 0.41.1 throws UnicodeDecodeError at install time:
# https://bitbucket.org/nikratio/python-llfuse/issues/69/unicode-exception-at-install-time
pip install -r requirements.d/development.txt
pip install -r requirements.d/fuse.txt
pip install -e .
EOF
end

View file

@ -41,6 +41,7 @@ static uint32_t table_base[] =
#define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> (32 - shift)) )
size_t pagemask;
static uint32_t *
buzhash_init_table(uint32_t seed)
@ -130,6 +131,7 @@ chunker_fill(Chunker *c)
{
ssize_t n;
off_t offset, length;
int overshoot;
PyObject *data;
memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
c->position -= c->last;
@ -157,14 +159,33 @@ chunker_fill(Chunker *c)
}
length = c->bytes_read - offset;
#if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) )
// Only do it once per run.
if (pagemask == 0)
pagemask = getpagesize() - 1;
// We tell the OS that we do not need the data that we just have read any
// more (that it maybe has in the cache). This avoids that we spoil the
// complete cache with data that we only read once and (due to cache
// size limit) kick out data from the cache that might be still useful
// for the OS or other processes.
// We rollback the initial offset back to the start of the page,
// to avoid it not being truncated as a partial page request.
if (length > 0) {
posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED);
}
// Linux kernels prior to 4.7 have a bug where they truncate
// last partial page of POSIX_FADV_DONTNEED request, so we need
// to page-align it ourselves. We'll need the rest of this page
// on the next read (assuming this was not EOF)
overshoot = (offset + length) & pagemask;
} else {
// For length == 0 we set overshoot 0, so the below
// length - overshoot is 0, which means till end of file for
// fadvise. This will cancel the final page and is not part
// of the above workaround.
overshoot = 0;
}
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
#endif
}
else {

View file

@ -439,51 +439,3 @@ hashindex_get_size(HashIndex *index)
{
return index->num_entries;
}
static void
hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
long long *total_unique_size, long long *total_unique_csize,
long long *total_unique_chunks, long long *total_chunks)
{
int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0;
const int32_t *values;
void *key = NULL;
while((key = hashindex_next_key(index, key))) {
values = key + index->key_size;
unique_chunks++;
chunks += values[0];
unique_size += values[1];
unique_csize += values[2];
size += (int64_t) values[0] * values[1];
csize += (int64_t) values[0] * values[2];
}
*total_size = size;
*total_csize = csize;
*total_unique_size = unique_size;
*total_unique_csize = unique_csize;
*total_unique_chunks = unique_chunks;
*total_chunks = chunks;
}
static void
hashindex_add(HashIndex *index, const void *key, int32_t *other_values)
{
int32_t *my_values = (int32_t *)hashindex_get(index, key);
if(my_values == NULL) {
hashindex_set(index, key, other_values);
} else {
*my_values += *other_values;
}
}
static void
hashindex_merge(HashIndex *index, HashIndex *other)
{
int32_t key_size = index->key_size;
void *key = NULL;
while((key = hashindex_next_key(other, key))) {
hashindex_add(index, key, key + key_size);
}
}

View file

@ -786,8 +786,7 @@ class ArchiveChecker:
def add_reference(id_, size, csize, cdata=None):
try:
count, _, _ = self.chunks[id_]
self.chunks[id_] = count + 1, size, csize
self.chunks.incref(id_)
except KeyError:
assert cdata is not None
self.chunks[id_] = 1, size, csize

View file

@ -306,7 +306,7 @@ class Archiver:
if (st.st_ino, st.st_dev) in skip_inodes:
return
# Entering a new filesystem?
if restrict_dev and st.st_dev != restrict_dev:
if restrict_dev is not None and st.st_dev != restrict_dev:
return
status = None
# Ignore if nodump flag is set
@ -628,22 +628,23 @@ class Archiver:
cache.commit()
return self.exit_code
@with_repository(exclusive=True, cache=True)
def do_delete(self, args, repository, manifest, key, cache):
@with_repository(exclusive=True)
def do_delete(self, args, repository, manifest, key):
"""Delete an existing repository or archive"""
if args.location.archive:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
stats = Statistics()
archive.delete(stats, progress=args.progress)
manifest.write()
repository.commit(save_space=args.save_space)
cache.commit()
logger.info("Archive deleted.")
if args.stats:
log_multi(DASHES,
stats.summary.format(label='Deleted data:', stats=stats),
str(cache),
DASHES)
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
stats = Statistics()
archive.delete(stats, progress=args.progress)
manifest.write()
repository.commit(save_space=args.save_space)
cache.commit()
logger.info("Archive deleted.")
if args.stats:
log_multi(DASHES,
stats.summary.format(label='Deleted data:', stats=stats),
str(cache),
DASHES)
else:
if not args.cache_only:
msg = []
@ -658,7 +659,7 @@ class Archiver:
return self.exit_code
repository.destroy()
logger.info("Repository deleted.")
cache.destroy()
Cache.destroy(repository)
logger.info("Cache deleted.")
return self.exit_code

View file

@ -37,6 +37,15 @@ class Cache:
path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
UpgradableLock(os.path.join(path, 'lock'), exclusive=True).break_lock()
@staticmethod
def destroy(repository, path=None):
"""destroy the cache for ``repository`` or at ``path``"""
path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
config = os.path.join(path, 'config')
if os.path.exists(config):
os.remove(config) # kill config first
shutil.rmtree(path)
def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
lock_wait=None):
"""
@ -131,13 +140,6 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
with open(os.path.join(self.path, 'files'), 'wb') as fd:
pass # empty file
def destroy(self):
"""destroy the cache at `self.path`
"""
self.close()
os.remove(os.path.join(self.path, 'config')) # kill config first
shutil.rmtree(self.path)
def _do_open(self):
self.config = configparser.ConfigParser(interpolation=None)
config_path = os.path.join(self.path, 'config')
@ -389,21 +391,19 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def chunk_incref(self, id, stats):
if not self.txn_active:
self.begin_txn()
count, size, csize = self.chunks[id]
self.chunks[id] = (count + 1, size, csize)
count, size, csize = self.chunks.incref(id)
stats.update(size, csize, False)
return id, size, csize
def chunk_decref(self, id, stats):
if not self.txn_active:
self.begin_txn()
count, size, csize = self.chunks[id]
if count == 1:
count, size, csize = self.chunks.decref(id)
if count == 0:
del self.chunks[id]
self.repository.delete(id, wait=False)
stats.update(-size, -csize, True)
else:
self.chunks[id] = (count - 1, size, csize)
stats.update(-size, -csize, False)
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False):

View file

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
import os
cimport cython
from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
API_VERSION = 2
@ -11,9 +14,6 @@ cdef extern from "_hashindex.c":
HashIndex *hashindex_read(char *path)
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
long long *unique_size, long long *unique_csize,
long long *total_unique_chunks, long long *total_chunks)
void hashindex_merge(HashIndex *index, HashIndex *other)
void hashindex_add(HashIndex *index, void *key, void *value)
int hashindex_get_size(HashIndex *index)
@ -22,13 +22,34 @@ cdef extern from "_hashindex.c":
void *hashindex_next_key(HashIndex *index, void *key)
int hashindex_delete(HashIndex *index, void *key)
int hashindex_set(HashIndex *index, void *key, void *value)
int _htole32(int v)
int _le32toh(int v)
uint32_t _htole32(uint32_t v)
uint32_t _le32toh(uint32_t v)
cdef _NoDefault = object()
cimport cython
"""
The HashIndex is *not* a general purpose data structure. The value size must be at least 4 bytes, and these
first bytes are used for in-band signalling in the data structure itself.
The constant MAX_VALUE defines the valid range for these 4 bytes when interpreted as an uint32_t from 0
to MAX_VALUE (inclusive). The following reserved values beyond MAX_VALUE are currently in use
(byte order is LE)::
0xffffffff marks empty entries in the hashtable
0xfffffffe marks deleted entries in the hashtable
None of the publicly available classes in this module will accept nor return a reserved value;
AssertionError is raised instead.
"""
assert UINT32_MAX == 2**32-1
# module-level constant because cdef's in classes can't have default values
cdef uint32_t _MAX_VALUE = 2**32-1025
MAX_VALUE = _MAX_VALUE
assert _MAX_VALUE % 2 == 1
@cython.internal
cdef class IndexBase:
@ -101,22 +122,30 @@ cdef class NSIndex(IndexBase):
def __getitem__(self, key):
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1])
raise KeyError(key)
cdef uint32_t segment = _le32toh(data[0])
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return segment, _le32toh(data[1])
def __setitem__(self, key, value):
assert len(key) == self.key_size
cdef int[2] data
data[0] = _htole32(value[0])
cdef uint32_t[2] data
cdef uint32_t segment = value[0]
assert segment <= _MAX_VALUE, "maximum number of segments reached"
data[0] = _htole32(segment)
data[1] = _htole32(value[1])
if not hashindex_set(self.index, <char *>key, data):
raise Exception('hashindex_set failed')
def __contains__(self, key):
cdef uint32_t segment
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if data != NULL:
segment = _le32toh(data[0])
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return data != NULL
def iteritems(self, marker=None):
@ -149,25 +178,46 @@ cdef class NSKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
cdef uint32_t segment = _le32toh(value[0])
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return (<char *>self.key)[:self.key_size], (segment, _le32toh(value[1]))
cdef class ChunkIndex(IndexBase):
"""
Mapping of 32 byte keys to (refcount, size, csize), which are all 32-bit unsigned.
The reference count cannot overflow. If an overflow would occur, the refcount
is fixed to MAX_VALUE and will neither increase nor decrease by incref(), decref()
or add().
Prior signed 32-bit overflow is handled correctly for most cases: All values
from UINT32_MAX (2**32-1, inclusive) to MAX_VALUE (exclusive) are reserved and either
cause silent data loss (-1, -2) or will raise an AssertionError when accessed.
Other values are handled correctly. Note that previously the refcount could also reach
0 by *increasing* it.
Assigning refcounts in this reserved range is an invalid operation and raises AssertionError.
"""
value_size = 12
def __getitem__(self, key):
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
raise KeyError(key)
cdef uint32_t refcount = _le32toh(data[0])
assert refcount <= _MAX_VALUE
return refcount, _le32toh(data[1]), _le32toh(data[2])
def __setitem__(self, key, value):
assert len(key) == self.key_size
cdef int[3] data
data[0] = _htole32(value[0])
cdef uint32_t[3] data
cdef uint32_t refcount = value[0]
assert refcount <= _MAX_VALUE, "invalid reference count"
data[0] = _htole32(refcount)
data[1] = _htole32(value[1])
data[2] = _htole32(value[2])
if not hashindex_set(self.index, <char *>key, data):
@ -175,9 +225,38 @@ cdef class ChunkIndex(IndexBase):
def __contains__(self, key):
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if data != NULL:
assert data[0] <= _MAX_VALUE
return data != NULL
def incref(self, key):
"""Increase refcount for 'key', return (refcount, size, csize)"""
assert len(key) == self.key_size
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError(key)
cdef uint32_t refcount = _le32toh(data[0])
assert refcount <= _MAX_VALUE, "invalid reference count"
if refcount != _MAX_VALUE:
refcount += 1
data[0] = _htole32(refcount)
return refcount, _le32toh(data[1]), _le32toh(data[2])
def decref(self, key):
"""Decrease refcount for 'key', return (refcount, size, csize)"""
assert len(key) == self.key_size
data = <uint32_t *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError(key)
cdef uint32_t refcount = _le32toh(data[0])
# Never decrease a reference count of zero
assert 0 < refcount <= _MAX_VALUE, "invalid reference count"
if refcount != _MAX_VALUE:
refcount -= 1
data[0] = _htole32(refcount)
return refcount, _le32toh(data[1]), _le32toh(data[2])
def iteritems(self, marker=None):
cdef const void *key
iter = ChunkKeyIterator(self.key_size)
@ -191,22 +270,56 @@ cdef class ChunkIndex(IndexBase):
return iter
def summarize(self):
cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
hashindex_summarize(self.index, &total_size, &total_csize,
&unique_size, &unique_csize,
&total_unique_chunks, &total_chunks)
return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
cdef uint64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0
cdef uint32_t *values
cdef uint32_t refcount
cdef void *key = NULL
while True:
key = hashindex_next_key(self.index, key)
if not key:
break
unique_chunks += 1
values = <uint32_t*> (key + self.key_size)
refcount = _le32toh(values[0])
assert refcount <= MAX_VALUE, "invalid reference count"
chunks += refcount
unique_size += _le32toh(values[1])
unique_csize += _le32toh(values[2])
size += <uint64_t> _le32toh(values[1]) * _le32toh(values[0])
csize += <uint64_t> _le32toh(values[2]) * _le32toh(values[0])
return size, csize, unique_size, unique_csize, unique_chunks, chunks
def add(self, key, refs, size, csize):
assert len(key) == self.key_size
cdef int[3] data
cdef uint32_t[3] data
data[0] = _htole32(refs)
data[1] = _htole32(size)
data[2] = _htole32(csize)
hashindex_add(self.index, <char *>key, data)
self._add(<char*> key, data)
cdef _add(self, void *key, uint32_t *data):
cdef uint64_t refcount1, refcount2, result64
values = <uint32_t*> hashindex_get(self.index, key)
if values:
refcount1 = _le32toh(values[0])
refcount2 = _le32toh(data[0])
assert refcount1 <= _MAX_VALUE
assert refcount2 <= _MAX_VALUE
result64 = refcount1 + refcount2
values[0] = _htole32(min(result64, _MAX_VALUE))
else:
hashindex_set(self.index, key, data)
def merge(self, ChunkIndex other):
hashindex_merge(self.index, other.index)
cdef void *key = NULL
while True:
key = hashindex_next_key(other.index, key)
if not key:
break
self._add(key, <uint32_t*> (key + self.key_size))
cdef class ChunkKeyIterator:
@ -226,5 +339,7 @@ cdef class ChunkKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
cdef uint32_t refcount = _le32toh(value[0])
assert refcount <= MAX_VALUE, "invalid reference count"
return (<char *>self.key)[:self.key_size], (refcount, _le32toh(value[1]), _le32toh(value[2]))

View file

@ -1,8 +1,13 @@
import base64
import hashlib
import os
import struct
import tempfile
import zlib
import pytest
from ..hashindex import NSIndex, ChunkIndex
from .. import hashindex
from . import BaseTestCase
@ -100,3 +105,174 @@ class HashIndexTestCase(BaseTestCase):
assert idx1[H(2)] == (7, 200, 200)
assert idx1[H(3)] == (3, 300, 300)
assert idx1[H(4)] == (6, 400, 400)
def test_chunkindex_summarize(self):
idx = ChunkIndex()
idx[H(1)] = 1, 1000, 100
idx[H(2)] = 2, 2000, 200
idx[H(3)] = 3, 3000, 300
size, csize, unique_size, unique_csize, unique_chunks, chunks = idx.summarize()
assert size == 1000 + 2 * 2000 + 3 * 3000
assert csize == 100 + 2 * 200 + 3 * 300
assert unique_size == 1000 + 2000 + 3000
assert unique_csize == 100 + 200 + 300
assert chunks == 1 + 2 + 3
assert unique_chunks == 3
class HashIndexRefcountingTestCase(BaseTestCase):
def test_chunkindex_limit(self):
idx = ChunkIndex()
idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2
# 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited
for i in range(5):
# first incref to move it to the limit
refcount, *_ = idx.incref(H(1))
assert refcount == hashindex.MAX_VALUE
for i in range(5):
refcount, *_ = idx.decref(H(1))
assert refcount == hashindex.MAX_VALUE
def _merge(self, refcounta, refcountb):
def merge(refcount1, refcount2):
idx1 = ChunkIndex()
idx1[H(1)] = refcount1, 1, 2
idx2 = ChunkIndex()
idx2[H(1)] = refcount2, 1, 2
idx1.merge(idx2)
refcount, *_ = idx1[H(1)]
return refcount
result = merge(refcounta, refcountb)
# check for commutativity
assert result == merge(refcountb, refcounta)
return result
def test_chunkindex_merge_limit1(self):
# Check that it does *not* limit at MAX_VALUE - 1
# (MAX_VALUE is odd)
half = hashindex.MAX_VALUE // 2
assert self._merge(half, half) == hashindex.MAX_VALUE - 1
def test_chunkindex_merge_limit2(self):
# 3000000000 + 2000000000 > MAX_VALUE
assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE
def test_chunkindex_merge_limit3(self):
# Crossover point: both addition and limit semantics will yield the same result
half = hashindex.MAX_VALUE // 2
assert self._merge(half + 1, half) == hashindex.MAX_VALUE
def test_chunkindex_merge_limit4(self):
# Beyond crossover, result of addition would be 2**31
half = hashindex.MAX_VALUE // 2
assert self._merge(half + 2, half) == hashindex.MAX_VALUE
assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE
def test_chunkindex_add(self):
idx1 = ChunkIndex()
idx1.add(H(1), 5, 6, 7)
assert idx1[H(1)] == (5, 6, 7)
idx1.add(H(1), 1, 0, 0)
assert idx1[H(1)] == (6, 6, 7)
def test_incref_limit(self):
idx1 = ChunkIndex()
idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7)
idx1.incref(H(1))
refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE
def test_decref_limit(self):
idx1 = ChunkIndex()
idx1[H(1)] = hashindex.MAX_VALUE, 6, 7
idx1.decref(H(1))
refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE
def test_decref_zero(self):
idx1 = ChunkIndex()
idx1[H(1)] = 0, 0, 0
with pytest.raises(AssertionError):
idx1.decref(H(1))
def test_incref_decref(self):
idx1 = ChunkIndex()
idx1.add(H(1), 5, 6, 7)
assert idx1[H(1)] == (5, 6, 7)
idx1.incref(H(1))
assert idx1[H(1)] == (6, 6, 7)
idx1.decref(H(1))
assert idx1[H(1)] == (5, 6, 7)
def test_setitem_raises(self):
idx1 = ChunkIndex()
with pytest.raises(AssertionError):
idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
def test_keyerror(self):
idx = ChunkIndex()
with pytest.raises(KeyError):
idx.incref(H(1))
with pytest.raises(KeyError):
idx.decref(H(1))
with pytest.raises(KeyError):
idx[H(1)]
with pytest.raises(OverflowError):
idx.add(H(1), -1, 0, 0)
class HashIndexDataTestCase(BaseTestCase):
# This bytestring was created with 1.0-maint at c2f9533
HASHINDEX = b'eJzt0L0NgmAUhtHLT0LDEI6AuAEhMVYmVnSuYefC7AB3Aj9KNedJbnfyFne6P67P27w0EdG1Eac+Cm1ZybAsy7Isy7Isy7Isy7I' \
b'sy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7Isy7LsL9nhc+cqTZ' \
b'3XlO2Ys++Du5fX+l1/YFmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVmWZVn2/+0O2rYccw=='
def _serialize_hashindex(self, idx):
with tempfile.TemporaryDirectory() as tempdir:
file = os.path.join(tempdir, 'idx')
idx.write(file)
with open(file, 'rb') as f:
return self._pack(f.read())
def _deserialize_hashindex(self, bytestring):
with tempfile.TemporaryDirectory() as tempdir:
file = os.path.join(tempdir, 'idx')
with open(file, 'wb') as f:
f.write(self._unpack(bytestring))
return ChunkIndex.read(file)
def _pack(self, bytestring):
return base64.b64encode(zlib.compress(bytestring))
def _unpack(self, bytestring):
return zlib.decompress(base64.b64decode(bytestring))
def test_identical_creation(self):
idx1 = ChunkIndex()
idx1[H(1)] = 1, 2, 3
idx1[H(2)] = 2**31 - 1, 0, 0
idx1[H(3)] = 4294962296, 0, 0 # 4294962296 is -5000 interpreted as an uint32_t
assert self._serialize_hashindex(idx1) == self.HASHINDEX
def test_read_known_good(self):
idx1 = self._deserialize_hashindex(self.HASHINDEX)
assert idx1[H(1)] == (1, 2, 3)
assert idx1[H(2)] == (2**31 - 1, 0, 0)
assert idx1[H(3)] == (4294962296, 0, 0)
idx2 = ChunkIndex()
idx2[H(3)] = 2**32 - 123456, 6, 7
idx1.merge(idx2)
assert idx1[H(3)] == (hashindex.MAX_VALUE, 0, 0)
def test_nsindex_segment_limit():
idx = NSIndex()
with pytest.raises(AssertionError):
idx[H(1)] = hashindex.MAX_VALUE + 1, 0
assert H(1) not in idx
idx[H(2)] = hashindex.MAX_VALUE, 0
assert H(2) in idx

View file

@ -1,6 +1,42 @@
Changelog
=========
Version 1.0.2
-------------
Bug fixes:
- fix malfunction and potential corruption on (nowadays rather rare) big-endian
architectures or bi-endian archs in (rare) BE mode. #886, #889
cache resync / index merge was malfunctioning due to this, potentially
leading to data loss. borg info had cosmetic issues (displayed wrong values).
note: all (widespread) little-endian archs (like x86/x64) or bi-endian archs
in (widespread) LE mode (like ARMEL, MIPSEL, ...) were NOT affected.
- add overflow and range checks for 1st (special) uint32 of the hashindex
values, switch from int32 to uint32.
- fix so that refcount will never overflow, but just stick to max. value after
a overflow would have occured.
- borg delete: fix --cache-only for broken caches, #874
Makes --cache-only idempotent: it won't fail if the cache is already deleted.
- fixed borg create --one-file-system erroneously traversing into other
filesystems (if starting fs device number was 0), #873
- workround a bug in Linux fadvise FADV_DONTNEED, #907
Other changes:
- better test coverage for hashindex, incl. overflow testing, checking correct
computations so endianness issues would be discovered.
- reproducible doc for ProgressIndicator*, make the build reproducible.
- use latest llfuse for vagrant machines
- docs:
- use /path/to/repo in examples, fixes #901
- fix confusing usage of "repo" as archive name (use "arch")
Version 1.0.1
-------------

View file

@ -37,16 +37,16 @@ A step by step example
1. Before a backup can be made a repository has to be initialized::
$ borg init /mnt/backup
$ borg init /path/to/repo
2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called
*Monday*::
$ borg create /mnt/backup::Monday ~/src ~/Documents
$ borg create /path/to/repo::Monday ~/src ~/Documents
3. The next day create a new archive called *Tuesday*::
$ borg create -v --stats /mnt/backup::Tuesday ~/src ~/Documents
$ borg create -v --stats /path/to/repo::Tuesday ~/src ~/Documents
This backup will be a lot quicker and a lot smaller since only new never
before seen data is stored. The ``--stats`` option causes |project_name| to
@ -72,24 +72,24 @@ A step by step example
4. List all archives in the repository::
$ borg list /mnt/backup
$ borg list /path/to/repo
Monday Mon, 2016-02-15 19:14:44
Tuesday Tue, 2016-02-16 19:15:11
5. List the contents of the *Monday* archive::
$ borg list /mnt/backup::Monday
$ borg list /path/to/repo::Monday
drwxr-xr-x user group 0 Mon, 2016-02-15 18:22:30 home/user/Documents
-rw-r--r-- user group 7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc
...
6. Restore the *Monday* archive::
$ borg extract /mnt/backup::Monday
$ borg extract /path/to/repo::Monday
7. Recover disk space by manually deleting the *Monday* archive::
$ borg delete /mnt/backup::Monday
$ borg delete /path/to/repo::Monday
.. Note::
Borg is quiet by default (it works on WARNING log level).
@ -134,17 +134,17 @@ or high compression:
If you have a fast repo storage and you want some compression: ::
$ borg create --compression lz4 /mnt/backup::repo ~
$ borg create --compression lz4 /path/to/repo::arch ~
If you have a less fast repo storage and you want a bit more compression (N=0..9,
0 means no compression, 9 means high compression): ::
$ borg create --compression zlib,N /mnt/backup::repo ~
$ borg create --compression zlib,N /path/to/repo::arch ~
If you have a very slow repo storage and you want high compression (N=0..9, 0 means
low compression, 9 means high compression): ::
$ borg create --compression lzma,N /mnt/backup::repo ~
$ borg create --compression lzma,N /path/to/repo::arch ~
You'll need to experiment a bit to find the best compression for your use case.
Keep an eye on CPU load and throughput.
@ -208,23 +208,23 @@ Remote repositories
host is accessible using SSH. This is fastest and easiest when |project_name|
is installed on the remote host, in which case the following syntax is used::
$ borg init user@hostname:/mnt/backup
$ borg init user@hostname:/path/to/repo
or::
$ borg init ssh://user@hostname:port//mnt/backup
$ borg init ssh://user@hostname:port//path/to/repo
Remote operations over SSH can be automated with SSH keys. You can restrict the
use of the SSH keypair by prepending a forced command to the SSH public key in
the remote server's `authorized_keys` file. This example will start |project_name|
in server mode and limit it to a specific filesystem path::
command="borg serve --restrict-to-path /mnt/backup",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...]
command="borg serve --restrict-to-path /path/to/repo",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...]
If it is not possible to install |project_name| on the remote host,
it is still possible to use the remote host to store a repository by
mounting the remote filesystem, for example, using sshfs::
$ sshfs user@hostname:/mnt /mnt
$ borg init /mnt/backup
$ fusermount -u /mnt
$ sshfs user@hostname:/path/to /path/to
$ borg init /path/to/repo
$ fusermount -u /path/to

View file

@ -207,10 +207,10 @@ Examples
::
# Local repository (default is to use encryption in repokey mode)
$ borg init /mnt/backup
$ borg init /path/to/repo
# Local repository (no encryption)
$ borg init --encryption=none /mnt/backup
$ borg init --encryption=none /path/to/repo
# Remote repository (accesses a remote borg via ssh)
$ borg init user@hostname:backup
@ -265,54 +265,54 @@ Examples
::
# Backup ~/Documents into an archive named "my-documents"
$ borg create /mnt/backup::my-documents ~/Documents
$ borg create /path/to/repo::my-documents ~/Documents
# same, but verbosely list all files as we process them
$ borg create -v --list /mnt/backup::my-documents ~/Documents
$ borg create -v --list /path/to/repo::my-documents ~/Documents
# Backup ~/Documents and ~/src but exclude pyc files
$ borg create /mnt/backup::my-files \
$ borg create /path/to/repo::my-files \
~/Documents \
~/src \
--exclude '*.pyc'
# Backup home directories excluding image thumbnails (i.e. only
# /home/*/.thumbnails is excluded, not /home/*/*/.thumbnails)
$ borg create /mnt/backup::my-files /home \
$ borg create /path/to/repo::my-files /home \
--exclude 're:^/home/[^/]+/\.thumbnails/'
# Do the same using a shell-style pattern
$ borg create /mnt/backup::my-files /home \
$ borg create /path/to/repo::my-files /home \
--exclude 'sh:/home/*/.thumbnails'
# Backup the root filesystem into an archive named "root-YYYY-MM-DD"
# use zlib compression (good, but slow) - default is no compression
$ borg create -C zlib,6 /mnt/backup::root-{now:%Y-%m-%d} / --one-file-system
$ borg create -C zlib,6 /path/to/repo::root-{now:%Y-%m-%d} / --one-file-system
# Make a big effort in fine granular deduplication (big chunk management
# overhead, needs a lot of RAM and disk space, see formula in internals
# docs - same parameters as borg < 1.0 or attic):
$ borg create --chunker-params 10,23,16,4095 /mnt/backup::small /smallstuff
$ borg create --chunker-params 10,23,16,4095 /path/to/repo::small /smallstuff
# Backup a raw device (must not be active/in use/mounted at that time)
$ dd if=/dev/sdx bs=10M | borg create /mnt/backup::my-sdx -
$ dd if=/dev/sdx bs=10M | borg create /path/to/repo::my-sdx -
# No compression (default)
$ borg create /mnt/backup::repo ~
$ borg create /path/to/repo::arch ~
# Super fast, low compression
$ borg create --compression lz4 /mnt/backup::repo ~
$ borg create --compression lz4 /path/to/repo::arch ~
# Less fast, higher compression (N = 0..9)
$ borg create --compression zlib,N /mnt/backup::repo ~
$ borg create --compression zlib,N /path/to/repo::arch ~
# Even slower, even higher compression (N = 0..9)
$ borg create --compression lzma,N /mnt/backup::repo ~
$ borg create --compression lzma,N /path/to/repo::arch ~
# Format tags available for archive name:
# {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
# add short hostname, backup username and current unixtime (seconds from epoch)
$ borg create /mnt/backup::{hostname}-{user}-{now:%s} ~
$ borg create /path/to/repo::{hostname}-{user}-{now:%s} ~
.. include:: usage/extract.rst.inc
@ -321,19 +321,19 @@ Examples
::
# Extract entire archive
$ borg extract /mnt/backup::my-files
$ borg extract /path/to/repo::my-files
# Extract entire archive and list files while processing
$ borg extract -v --list /mnt/backup::my-files
$ borg extract -v --list /path/to/repo::my-files
# Extract the "src" directory
$ borg extract /mnt/backup::my-files home/USERNAME/src
$ borg extract /path/to/repo::my-files home/USERNAME/src
# Extract the "src" directory but exclude object files
$ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
$ borg extract /path/to/repo::my-files home/USERNAME/src --exclude '*.o'
# Restore a raw device (must not be active/in use/mounted at that time)
$ borg extract --stdout /mnt/backup::my-sdx | dd of=/dev/sdx bs=10M
$ borg extract --stdout /path/to/repo::my-sdx | dd of=/dev/sdx bs=10M
.. Note::
@ -349,12 +349,12 @@ Examples
~~~~~~~~
::
$ borg create /mnt/backup::archivename ~
$ borg list /mnt/backup
$ borg create /path/to/repo::archivename ~
$ borg list /path/to/repo
archivename Mon, 2016-02-15 19:50:19
$ borg rename /mnt/backup::archivename newname
$ borg list /mnt/backup
$ borg rename /path/to/repo::archivename newname
$ borg list /path/to/repo
newname Mon, 2016-02-15 19:50:19
@ -364,14 +364,14 @@ Examples
~~~~~~~~
::
$ borg list /mnt/backup
$ borg list /path/to/repo
Monday Mon, 2016-02-15 19:15:11
repo Mon, 2016-02-15 19:26:54
root-2016-02-15 Mon, 2016-02-15 19:36:29
newname Mon, 2016-02-15 19:50:19
...
$ borg list /mnt/backup::root-2016-02-15
$ borg list /path/to/repo::root-2016-02-15
drwxr-xr-x root root 0 Mon, 2016-02-15 17:44:27 .
drwxrwxr-x root root 0 Mon, 2016-02-15 19:04:49 bin
-rwxr-xr-x root root 1029624 Thu, 2014-11-13 00:08:51 bin/bash
@ -379,7 +379,7 @@ Examples
-rwxr-xr-x root root 2140 Fri, 2015-03-27 20:24:22 bin/bzdiff
...
$ borg list /mnt/backup::archiveA --list-format="{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}"
$ borg list /path/to/repo::archiveA --list-format="{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}"
drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 .
drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code
drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code/myproject
@ -387,8 +387,8 @@ Examples
...
# see what is changed between archives, based on file modification time, size and file path
$ borg list /mnt/backup::archiveA --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveA
$ borg list /mnt/backup::archiveB --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveB
$ borg list /path/to/repo::archiveA --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveA
$ borg list /path/to/repo::archiveB --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveB
$ diff -y /tmp/list.archiveA /tmp/list.archiveB
1422781200 0 . 1422781200 0 .
1422781200 0 code 1422781200 0 code
@ -442,10 +442,10 @@ Examples
::
# delete a single backup archive:
$ borg delete /mnt/backup::Monday
$ borg delete /path/to/repo::Monday
# delete the whole repository and the related local cache:
$ borg delete /mnt/backup
$ borg delete /path/to/repo
You requested to completely DELETE the repository *including* all archives it contains:
repo Mon, 2016-02-15 19:26:54
root-2016-02-15 Mon, 2016-02-15 19:36:29
@ -473,18 +473,18 @@ will see what it would do without it actually doing anything.
# Keep 7 end of day and 4 additional end of week archives.
# Do a dry-run without actually deleting anything.
$ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /mnt/backup
$ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo
# Same as above but only apply to archive names starting with "foo":
$ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /mnt/backup
$ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /path/to/repo
# Keep 7 end of day, 4 additional end of week archives,
# and an end of month archive for every month:
$ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /mnt/backup
$ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo
# Keep all backups in the last 10 days, 4 additional end of week archives,
# and an end of month archive for every month:
$ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /mnt/backup
$ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo
.. include:: usage/info.rst.inc
@ -493,14 +493,14 @@ Examples
~~~~~~~~
::
$ borg info /mnt/backup::root-2016-02-15
$ borg info /path/to/repo::root-2016-02-15
Name: root-2016-02-15
Fingerprint: 57c827621f21b000a8d363c1e163cc55983822b3afff3a96df595077a660be50
Hostname: myhostname
Username: root
Time (start): Mon, 2016-02-15 19:36:29
Time (end): Mon, 2016-02-15 19:39:26
Command line: /usr/local/bin/borg create -v --list -C zlib,6 /mnt/backup::root-2016-02-15 / --one-file-system
Command line: /usr/local/bin/borg create -v --list -C zlib,6 /path/to/repo::root-2016-02-15 / --one-file-system
Number of files: 38100
Original size Compressed size Deduplicated size
@ -519,7 +519,7 @@ borg mount/borgfs
+++++++++++++++++
::
$ borg mount /mnt/backup::root-2016-02-15 /tmp/mymountpoint
$ borg mount /path/to/repo::root-2016-02-15 /tmp/mymountpoint
$ ls /tmp/mymountpoint
bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var
$ fusermount -u /tmp/mymountpoint
@ -551,8 +551,8 @@ Examples
::
# Create a key file protected repository
$ borg init --encryption=keyfile -v /mnt/backup
Initializing repository at "/mnt/backup"
$ borg init --encryption=keyfile -v /path/to/repo
Initializing repository at "/path/to/repo"
Enter new passphrase:
Enter same passphrase again:
Remember your passphrase. Your data will be inaccessible without it.
@ -563,7 +563,7 @@ Examples
Done.
# Change key file passphrase
$ borg change-passphrase -v /mnt/backup
$ borg change-passphrase -v /path/to/repo
Enter passphrase for key /root/.config/borg/keys/mnt_backup:
Enter new passphrase:
Enter same passphrase again:
@ -586,11 +586,11 @@ forced command. That way, other options given by the client (like ``--info`` or
::
# Allow an SSH keypair to only run borg, and only have access to /mnt/backup.
# Allow an SSH keypair to only run borg, and only have access to /path/to/repo.
# Use key options to disable unneeded and potentially dangerous SSH functionality.
# This will help to secure an automated remote backup system.
$ cat ~/.ssh/authorized_keys
command="borg serve --restrict-to-path /mnt/backup",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...]
command="borg serve --restrict-to-path /path/to/repo",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...]
.. include:: usage/upgrade.rst.inc
@ -600,11 +600,11 @@ Examples
::
# Upgrade the borg repository to the most recent version.
$ borg upgrade -v /mnt/backup
making a hardlink copy in /mnt/backup.upgrade-2016-02-15-20:51:55
$ borg upgrade -v /path/to/repo
making a hardlink copy in /path/to/repo.upgrade-2016-02-15-20:51:55
opening attic repository with borg and converting
no key file found for repository
converting repo index /mnt/backup/index.0
converting repo index /path/to/repo/index.0
converting 1 segments...
converting borg 0.xx to borg current
no key file found for repository
@ -802,16 +802,16 @@ After the backup has completed, you remove the snapshots again. ::
$ # create snapshots here
$ lvdisplay > lvdisplay.txt
$ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot
$ borg create --read-special /path/to/repo::arch lvdisplay.txt /dev/vg0/*-snapshot
$ # remove snapshots here
Now, let's see how to restore some LVs from such a backup. ::
$ borg extract /mnt/backup::repo lvdisplay.txt
$ borg extract /path/to/repo::arch lvdisplay.txt
$ # create empty LVs with correct sizes here (look into lvdisplay.txt).
$ # we assume that you created an empty root and home LV and overwrite it now:
$ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root
$ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home
$ borg extract --stdout /path/to/repo::arch dev/vg0/root-snapshot > /dev/vg0/root
$ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
Append-only mode