diff --git a/CHANGES b/CHANGES index 5f20080b4..e54ff83ec 100644 --- a/CHANGES +++ b/CHANGES @@ -8,6 +8,7 @@ Version 0.13 (feature release, released on X) +- Reduced memory usage when backing up many small files (#69) - Experimental Linux and FreeBSD ACL support (#66) - Added support for backup and restore of BSDFlags (OSX, FreeBSD) (#56) - Fix bug where xattrs on symlinks were not correctly restored diff --git a/attic/cache.py b/attic/cache.py index 65362ff39..63680056a 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -12,6 +12,8 @@ from .hashindex import ChunkIndex class Cache(object): """Client Side cache """ + # Do not cache file metadata for files smaller than this + FILE_MIN_SIZE = 4096 class RepositoryReplay(Error): """Cache is newer than repository, refusing to continue""" @@ -81,9 +83,10 @@ class Cache(object): if not data: break u.feed(data) - for hash, item in u: + for path_hash, item in u: + if item[2] > self.FILE_MIN_SIZE: item[0] += 1 - self.files[hash] = item + self.files[path_hash] = item def begin_txn(self): # Initialize transaction snapshot @@ -218,7 +221,8 @@ class Cache(object): return None def memorize_file(self, path_hash, st, ids): - # Entry: Age, inode, size, mtime, chunk ids - mtime_ns = st_mtime_ns(st) - self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids - self._newest_mtime = max(self._newest_mtime, mtime_ns) + if st.st_size > self.FILE_MIN_SIZE: + # Entry: Age, inode, size, mtime, chunk ids + mtime_ns = st_mtime_ns(st) + self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids + self._newest_mtime = max(self._newest_mtime, mtime_ns)