From 5aa74abedffcc6862e28228bf0b7af56fb2f4e69 Mon Sep 17 00:00:00 2001 From: Abogical Date: Fri, 17 Feb 2017 14:28:39 +0200 Subject: [PATCH 1/6] Add dsize and dcsize keys These keys shows the amount of deduplicated size and compressed size of each file in the archive. --- src/borg/helpers.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index d6c71d054..a96049fbb 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1425,12 +1425,14 @@ class ItemFormatter(BaseFormatter): 'source': 'link target for links (identical to linktarget)', 'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links', 'csize': 'compressed size', + 'dsize': 'deduplicated size', + 'dcsize': 'deduplicated compressed size', 'num_chunks': 'number of chunks in this file', 'unique_chunks': 'number of unique chunks in this file', } KEY_GROUPS = ( ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'), - ('size', 'csize', 'num_chunks', 'unique_chunks'), + ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'), ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'), tuple(sorted(hashlib.algorithms_guaranteed)), ('archiveid', 'archivename', 'extra'), @@ -1479,6 +1481,8 @@ class ItemFormatter(BaseFormatter): self.call_keys = { 'size': self.calculate_size, 'csize': self.calculate_csize, + 'dsize': self.calculate_dsize, + 'dcsize': self.calculate_dcsize, 'num_chunks': self.calculate_num_chunks, 'unique_chunks': self.calculate_unique_chunks, 'isomtime': partial(self.format_time, 'mtime'), @@ -1540,6 +1544,14 @@ class ItemFormatter(BaseFormatter): def calculate_csize(self, item): return sum(c.csize for c in item.get('chunks', [])) + def calculate_dsize(self, item): + chunk_index = self.archive.cache.chunks + return sum(c.size for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) + + def calculate_dcsize(self, item): + chunk_index = self.archive.cache.chunks + return sum(c.csize for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) + def hash_item(self, hash_function, item): if 'chunks' not in item: return "" From 59571115a17c54b4724384466c4d04eeb9793d06 Mon Sep 17 00:00:00 2001 From: Abogical Date: Fri, 17 Feb 2017 15:26:14 +0200 Subject: [PATCH 2/6] Add tests for dsize and dcsize --- src/borg/testsuite/archiver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index d720af46f..03e647c21 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1298,9 +1298,12 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', '-C', 'lz4', test_archive, 'input') - output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive) - size, csize, path = output.split("\n")[1].split(" ") + output = self.cmd('list', '--format', '{size} {csize} {dsize} {dcsize} {path}{NL}', test_archive) + size, csize, dsize, dcsize, path = output.split("\n")[1].split(" ") assert int(csize) < int(size) + assert int(dcsize) < int(dsize) + assert int(dsize) <= int(size) + assert int(dcsize) <= int(csize) def _get_sizes(self, compression, compressible, size=10000): if compressible: From 6ed0746934a95b0cfd8504fdfe1354169598d674 Mon Sep 17 00:00:00 2001 From: Abogical Date: Fri, 17 Feb 2017 17:26:02 +0200 Subject: [PATCH 3/6] Count non-unique chunks deduplicated sizes --- src/borg/helpers.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index a96049fbb..502ab1e6f 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -19,7 +19,7 @@ import time import unicodedata import uuid from binascii import hexlify -from collections import namedtuple, deque, abc +from collections import namedtuple, deque, abc, Counter from datetime import datetime, timezone, timedelta from fnmatch import translate from functools import wraps, partial, lru_cache @@ -1546,11 +1546,15 @@ class ItemFormatter(BaseFormatter): def calculate_dsize(self, item): chunk_index = self.archive.cache.chunks - return sum(c.size for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) + chunks = item.get('chunks', []) + chunks_counter = Counter(c.id for c in chunks) + return sum(c.size for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) def calculate_dcsize(self, item): chunk_index = self.archive.cache.chunks - return sum(c.csize for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) + chunks = item.get('chunks', []) + chunks_counter = Counter(c.id for c in chunks) + return sum(c.csize for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) def hash_item(self, hash_function, item): if 'chunks' not in item: From 31f3ddf50303f603c9ab38354cd9b102bd401942 Mon Sep 17 00:00:00 2001 From: Abogical Date: Fri, 17 Feb 2017 19:12:01 +0200 Subject: [PATCH 4/6] Join the hall of fame --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bc7a8c7ce..9f469cd55 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,6 +10,7 @@ Borg authors ("The Borg Collective") - Marian Beermann - Daniel Reichelt - Lauri Niskanen +- Abdel-Rahman A. (Abogical) Borg is a fork of Attic. From cd3cbee962f9b6d761e1123ce293337a68d32f15 Mon Sep 17 00:00:00 2001 From: Abogical Date: Sat, 18 Feb 2017 01:56:18 +0200 Subject: [PATCH 5/6] Refactor unique chunks summing --- src/borg/helpers.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 502ab1e6f..d45ae887e 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1481,8 +1481,8 @@ class ItemFormatter(BaseFormatter): self.call_keys = { 'size': self.calculate_size, 'csize': self.calculate_csize, - 'dsize': self.calculate_dsize, - 'dcsize': self.calculate_dcsize, + 'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size), + 'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize), 'num_chunks': self.calculate_num_chunks, 'unique_chunks': self.calculate_unique_chunks, 'isomtime': partial(self.format_time, 'mtime'), @@ -1531,6 +1531,20 @@ class ItemFormatter(BaseFormatter): item_data[key] = self.call_keys[key](item) return item_data + def sum_unique_chunks_metadata(self, metadata_func, item): + """ + sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the + item + + item: The item to sum its unique chunks' metadata + metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return + the metadata needed from the chunk + """ + chunk_index = self.archive.cache.chunks + chunks = item.get('chunks', []) + chunks_counter = Counter(c.id for c in chunks) + return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) + def calculate_num_chunks(self, item): return len(item.get('chunks', [])) @@ -1544,18 +1558,6 @@ class ItemFormatter(BaseFormatter): def calculate_csize(self, item): return sum(c.csize for c in item.get('chunks', [])) - def calculate_dsize(self, item): - chunk_index = self.archive.cache.chunks - chunks = item.get('chunks', []) - chunks_counter = Counter(c.id for c in chunks) - return sum(c.size for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) - - def calculate_dcsize(self, item): - chunk_index = self.archive.cache.chunks - chunks = item.get('chunks', []) - chunks_counter = Counter(c.id for c in chunks) - return sum(c.csize for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) - def hash_item(self, hash_function, item): if 'chunks' not in item: return "" From 38e4817b48e4a0c212934331974478dc72fea779 Mon Sep 17 00:00:00 2001 From: Abogical Date: Sat, 18 Feb 2017 01:58:24 +0200 Subject: [PATCH 6/6] Correct calculation of unique chunks --- src/borg/helpers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index d45ae887e..550d7bc1f 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1484,7 +1484,7 @@ class ItemFormatter(BaseFormatter): 'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size), 'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize), 'num_chunks': self.calculate_num_chunks, - 'unique_chunks': self.calculate_unique_chunks, + 'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1), 'isomtime': partial(self.format_time, 'mtime'), 'isoctime': partial(self.format_time, 'ctime'), 'isoatime': partial(self.format_time, 'atime'), @@ -1548,10 +1548,6 @@ class ItemFormatter(BaseFormatter): def calculate_num_chunks(self, item): return len(item.get('chunks', [])) - def calculate_unique_chunks(self, item): - chunk_index = self.archive.cache.chunks - return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) - def calculate_size(self, item): return sum(c.size for c in item.get('chunks', []))