mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-08 16:23:42 -04:00
Memory usage improvments
This commit is contained in:
parent
84b4b08309
commit
198b3f90fc
4 changed files with 20 additions and 42 deletions
|
|
@ -27,8 +27,6 @@ class Archive(object):
|
|||
self.keychain = keychain
|
||||
self.store = store
|
||||
self.items = []
|
||||
self.chunks = []
|
||||
self.chunk_idx = {}
|
||||
self.hard_links = {}
|
||||
if name:
|
||||
self.load(self.keychain.id_hash(name))
|
||||
|
|
@ -53,11 +51,10 @@ class Archive(object):
|
|||
assert items['version'] == 1
|
||||
assert self.metadata['items_hash'] == items_hash
|
||||
self.items = items['items']
|
||||
for i, chunk in enumerate(self.chunks):
|
||||
self.chunk_idx[i] = chunk[0]
|
||||
|
||||
def save(self, name):
|
||||
def save(self, name, cache):
|
||||
self.id = self.keychain.id_hash(name)
|
||||
self.chunks = [(id, size) for (id, (count, size)) in cache.chunk_counts.iteritems() if count > 1000000]
|
||||
chunks = {'version': 1, 'chunks': self.chunks}
|
||||
data, chunks_hash = self.keychain.encrypt_create(msgpack.packb(chunks))
|
||||
self.store.put(NS_ARCHIVE_CHUNKS, self.id, data)
|
||||
|
|
@ -124,12 +121,11 @@ class Archive(object):
|
|||
os.link(source, path)
|
||||
else:
|
||||
with open(path, 'wb') as fd:
|
||||
for chunk in item['chunks']:
|
||||
id = self.chunk_idx[chunk]
|
||||
for id in item['chunks']:
|
||||
try:
|
||||
data, hash = self.keychain.decrypt(self.store.get(NS_CHUNK, id))
|
||||
if self.keychain.id_hash(data) != id:
|
||||
raise IntegrityError('chunk id did not match')
|
||||
raise IntegrityError('chunk hash did not match')
|
||||
fd.write(data)
|
||||
except ValueError:
|
||||
raise Exception('Invalid chunk checksum')
|
||||
|
|
@ -161,8 +157,7 @@ class Archive(object):
|
|||
os.utime(path, (item['atime'], item['mtime']))
|
||||
|
||||
def verify_file(self, item):
|
||||
for chunk in item['chunks']:
|
||||
id = self.chunk_idx[chunk]
|
||||
for id in item['chunks']:
|
||||
try:
|
||||
data, hash = self.keychain.decrypt(self.store.get(NS_CHUNK, id))
|
||||
if self.keychain.id_hash(data) != id:
|
||||
|
|
@ -239,45 +234,22 @@ class Archive(object):
|
|||
ids = None
|
||||
break
|
||||
else:
|
||||
chunks = [self.process_chunk2(id, cache) for id in ids]
|
||||
for id in ids:
|
||||
cache.chunk_incref(id)
|
||||
# Only chunkify the file if needed
|
||||
if ids is None:
|
||||
fd = open(path, 'rb')
|
||||
with open(path, 'rb') as fd:
|
||||
size = 0
|
||||
ids = []
|
||||
chunks = []
|
||||
for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
|
||||
self.keychain.get_chunkify_seed()):
|
||||
id = self.keychain.id_hash(chunk)
|
||||
ids.append(id)
|
||||
try:
|
||||
chunks.append(self.chunk_idx[id])
|
||||
except KeyError:
|
||||
chunks.append(self.process_chunk(id, chunk, cache))
|
||||
ids.append(cache.add_chunk(self.keychain.id_hash(chunk), chunk))
|
||||
size += len(chunk)
|
||||
cache.memorize_file_chunks(path_hash, st, ids)
|
||||
item = {'path': safe_path, 'chunks': chunks, 'size': size}
|
||||
item = {'path': safe_path, 'chunks': ids, 'size': size}
|
||||
item.update(self.stat_attrs(st, path))
|
||||
self.items.append(item)
|
||||
|
||||
def process_chunk2(self, id, cache):
|
||||
try:
|
||||
return self.chunk_idx[id]
|
||||
except KeyError:
|
||||
idx = len(self.chunks)
|
||||
id, size = cache.chunk_incref(id)
|
||||
self.chunks.append((id, size))
|
||||
self.chunk_idx[id] = idx
|
||||
return idx
|
||||
|
||||
def process_chunk(self, id, data, cache):
|
||||
idx = len(self.chunks)
|
||||
id, size = cache.add_chunk(id, data)
|
||||
self.chunks.append((id, size))
|
||||
self.chunk_idx[id] = idx
|
||||
return idx
|
||||
|
||||
@staticmethod
|
||||
def list_archives(store, keychain):
|
||||
for id in list(store.list(NS_ARCHIVE_METADATA)):
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class Archiver(object):
|
|||
pass
|
||||
for path in args.paths:
|
||||
self._process(archive, cache, args.patterns, unicode(path))
|
||||
archive.save(args.archive.archive)
|
||||
archive.save(args.archive.archive, cache)
|
||||
cache.save()
|
||||
return self.exit_code
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,10 @@ class Cache(object):
|
|||
yield key, (value[0] + 1,) + value[1:]
|
||||
|
||||
def save(self):
|
||||
for id, (count, size) in self.chunk_counts.iteritems():
|
||||
if count > 1000000:
|
||||
self.chunk_counts[id] = count - 1000000, size
|
||||
|
||||
cache = {'version': 1,
|
||||
'tid': self.store.tid,
|
||||
'chunk_counts': self.chunk_counts,
|
||||
|
|
@ -78,16 +82,17 @@ class Cache(object):
|
|||
data, hash = self.keychain.encrypt_read(data)
|
||||
csize = len(data)
|
||||
self.store.put(NS_CHUNK, id, data)
|
||||
self.chunk_counts[id] = (1, csize)
|
||||
return id, csize
|
||||
self.chunk_counts[id] = (1000001, csize)
|
||||
return id
|
||||
|
||||
def seen_chunk(self, id):
|
||||
return self.chunk_counts.get(id, (0, 0))[0]
|
||||
|
||||
def chunk_incref(self, id):
|
||||
count, size = self.chunk_counts[id]
|
||||
self.chunk_counts[id] = (count + 1, size)
|
||||
return id, size
|
||||
if count < 1000000:
|
||||
self.chunk_counts[id] = (count + 1000001, size)
|
||||
return id
|
||||
|
||||
def chunk_decref(self, id):
|
||||
count, size = self.chunk_counts[id]
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ class Test(unittest.TestCase):
|
|||
os.symlink('somewhere', os.path.join(self.input_path, 'link1'))
|
||||
os.mkfifo(os.path.join(self.input_path, 'fifo1'))
|
||||
self.darc('create', self.store_path + '::test', 'input')
|
||||
self.darc('create', self.store_path + '::test.2', 'input')
|
||||
self.darc('extract', self.store_path + '::test', 'output')
|
||||
self.diff_dirs('input', 'output/input')
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue