mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-10 17:32:13 -04:00
Modified Item.pyx to include diffs in ctime and mtime (#7335)
diff: include changes in ctime and mtime, fixes #7248 also: - sort JSON output alphabetically - add --content-only to ignore metadata changes Co-authored-by: Michael Deyaso <mdeyaso@fusioniq.io>
This commit is contained in:
parent
bba91e4b8b
commit
2c232449b0
6 changed files with 119 additions and 50 deletions
|
|
@ -1096,7 +1096,7 @@ Duration: {0.duration}
|
|||
logger.warning("borg check --repair is required to free all space.")
|
||||
|
||||
@staticmethod
|
||||
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False):
|
||||
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
|
||||
"""
|
||||
Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
|
||||
|
||||
|
|
@ -1111,6 +1111,7 @@ Duration: {0.duration}
|
|||
archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
|
||||
archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
|
||||
can_compare_chunk_ids=can_compare_chunk_ids,
|
||||
content_only=content_only,
|
||||
)
|
||||
|
||||
orphans_archive1 = OrderedDict()
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from ..archive import Archive
|
|||
from ..constants import * # NOQA
|
||||
from ..helpers import archivename_validator
|
||||
from ..manifest import Manifest
|
||||
from ..helpers.parseformat import BorgJsonEncoder
|
||||
|
||||
from ..logger import create_logger
|
||||
|
||||
|
|
@ -19,7 +20,7 @@ class DiffMixIn:
|
|||
"""Diff contents of two archives"""
|
||||
|
||||
def print_json_output(diff, path):
|
||||
print(json.dumps({"path": path, "changes": [j for j, str in diff]}))
|
||||
print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
|
||||
|
||||
def print_text_output(diff, path):
|
||||
print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
|
||||
|
|
@ -42,7 +43,9 @@ class DiffMixIn:
|
|||
|
||||
matcher = build_matcher(args.patterns, args.paths)
|
||||
|
||||
diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids)
|
||||
diffs = Archive.compare_archives_iter(
|
||||
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
|
||||
)
|
||||
# Conversion to string and filtering for diff.equal to save memory if sorting
|
||||
diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
|
||||
|
||||
|
|
@ -105,6 +108,11 @@ class DiffMixIn:
|
|||
)
|
||||
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
|
||||
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
|
||||
subparser.add_argument(
|
||||
"--content-only",
|
||||
action="store_true",
|
||||
help="Only compare differences in content (exclude metadata differences)",
|
||||
)
|
||||
subparser.add_argument("name", metavar="ARCHIVE1", type=archivename_validator, help="ARCHIVE1 name")
|
||||
subparser.add_argument("other_name", metavar="ARCHIVE2", type=archivename_validator, help="ARCHIVE2 name")
|
||||
subparser.add_argument(
|
||||
|
|
|
|||
|
|
@ -876,7 +876,7 @@ class ItemFormatter(BaseFormatter):
|
|||
self.used_call_keys = set(self.call_keys) & self.format_keys
|
||||
|
||||
def format_item_json(self, item):
|
||||
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
|
||||
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"
|
||||
|
||||
def get_item_data(self, item):
|
||||
item_data = {}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from .constants import ITEM_KEYS, ARCHIVE_KEYS
|
|||
from .helpers import StableDict
|
||||
from .helpers import format_file_size
|
||||
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
|
||||
from .helpers.time import OutputTimestamp, safe_timestamp
|
||||
|
||||
|
||||
cdef extern from "_item.c":
|
||||
|
|
@ -626,9 +627,10 @@ class ItemDiff:
|
|||
It does not include extended or time attributes in the comparison.
|
||||
"""
|
||||
|
||||
def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False):
|
||||
def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
|
||||
self._item1 = item1
|
||||
self._item2 = item2
|
||||
self._content_only = content_only
|
||||
self._numeric_ids = numeric_ids
|
||||
self._can_compare_chunk_ids = can_compare_chunk_ids
|
||||
self.equal = self._equal(chunk_iterator1, chunk_iterator2)
|
||||
|
|
@ -652,9 +654,11 @@ class ItemDiff:
|
|||
if self._item1.is_fifo() or self._item2.is_fifo():
|
||||
changes.append(self._presence_diff('fifo'))
|
||||
|
||||
if not (self._item1.get('deleted') or self._item2.get('deleted')):
|
||||
changes.append(self._owner_diff())
|
||||
changes.append(self._mode_diff())
|
||||
if not self._content_only:
|
||||
if not (self._item1.get('deleted') or self._item2.get('deleted')):
|
||||
changes.append(self._owner_diff())
|
||||
changes.append(self._mode_diff())
|
||||
changes.extend(self._time_diffs())
|
||||
|
||||
# filter out empty changes
|
||||
self._changes = [ch for ch in changes if ch]
|
||||
|
|
@ -672,8 +676,12 @@ class ItemDiff:
|
|||
if self._item1.get('deleted') and self._item2.get('deleted'):
|
||||
return True
|
||||
|
||||
attr_list = ['deleted', 'mode', 'target']
|
||||
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
|
||||
attr_list = ['deleted', 'target']
|
||||
|
||||
if not self._content_only:
|
||||
attr_list += ['mode', 'ctime', 'mtime']
|
||||
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
|
||||
|
||||
for attr in attr_list:
|
||||
if self._item1.get(attr) != self._item2.get(attr):
|
||||
return False
|
||||
|
|
@ -736,6 +744,16 @@ class ItemDiff:
|
|||
mode2 = stat.filemode(self._item2.mode)
|
||||
return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
|
||||
|
||||
def _time_diffs(self):
|
||||
changes = []
|
||||
attrs = ["ctime", "mtime"]
|
||||
for attr in attrs:
|
||||
if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
|
||||
ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
|
||||
ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
|
||||
changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
|
||||
return changes
|
||||
|
||||
def _content_equal(self, chunk_iterator1, chunk_iterator2):
|
||||
if self._can_compare_chunk_ids:
|
||||
return self._item1.chunks == self._item2.chunks
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ try:
|
|||
except ImportError:
|
||||
posix = None
|
||||
|
||||
import re
|
||||
import stat
|
||||
import sys
|
||||
import sysconfig
|
||||
|
|
@ -187,6 +188,9 @@ class BaseTestCase(unittest.TestCase):
|
|||
diff = filecmp.dircmp(dir1, dir2)
|
||||
self._assert_dirs_equal_cmp(diff, **kwargs)
|
||||
|
||||
def assert_line_exists(self, lines, expected_regexpr):
|
||||
assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}"
|
||||
|
||||
def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False):
|
||||
self.assert_equal(diff.left_only, [])
|
||||
self.assert_equal(diff.right_only, [])
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import stat
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from ...constants import * # NOQA
|
||||
|
|
@ -70,18 +71,19 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd(f"--repo={self.repository_location}", "create", "test1a", "input")
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")
|
||||
|
||||
def do_asserts(output, can_compare_ids):
|
||||
def do_asserts(output, can_compare_ids, content_only=False):
|
||||
# File contents changed (deleted and replaced with a new file)
|
||||
change = "B" if can_compare_ids else "{:<19}".format("modified")
|
||||
lines = output.splitlines()
|
||||
assert "file_replaced" in output # added to debug #3494
|
||||
assert f"{change} input/file_replaced" in output
|
||||
self.assert_line_exists(lines, f"{change}.*input/file_replaced")
|
||||
|
||||
# File unchanged
|
||||
assert "input/file_unchanged" not in output
|
||||
|
||||
# Directory replaced with a regular file
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32:
|
||||
assert "[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file" in output
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
|
||||
self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
|
||||
|
||||
# Basic directory cases
|
||||
assert "added directory input/dir_added" in output
|
||||
|
|
@ -89,13 +91,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
if are_symlinks_supported():
|
||||
# Basic symlink cases
|
||||
assert "changed link input/link_changed" in output
|
||||
assert "added link input/link_added" in output
|
||||
assert "removed link input/link_removed" in output
|
||||
self.assert_line_exists(lines, "changed link.*input/link_changed")
|
||||
self.assert_line_exists(lines, "added link.*input/link_added")
|
||||
self.assert_line_exists(lines, "removed link.*input/link_removed")
|
||||
|
||||
# Symlink replacing or being replaced
|
||||
assert "] input/dir_replaced_with_link" in output
|
||||
assert "] input/link_replaced_by_file" in output
|
||||
assert "input/dir_replaced_with_link" in output
|
||||
assert "input/link_replaced_by_file" in output
|
||||
|
||||
# Symlink target removed. Should not affect the symlink at all.
|
||||
assert "input/link_target_removed" not in output
|
||||
|
|
@ -104,9 +106,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
# should notice the changes in both links. However, the symlink
|
||||
# pointing to the file is not changed.
|
||||
change = "0 B" if can_compare_ids else "{:<19}".format("modified")
|
||||
assert f"{change} input/empty" in output
|
||||
self.assert_line_exists(lines, f"{change}.*input/empty")
|
||||
if are_hardlinks_supported():
|
||||
assert f"{change} input/hardlink_contents_changed" in output
|
||||
self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
|
||||
if are_symlinks_supported():
|
||||
assert "input/link_target_contents_changed" not in output
|
||||
|
||||
|
|
@ -125,18 +127,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
if are_hardlinks_supported():
|
||||
assert "removed 256 B input/hardlink_removed" in output
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was removed. This should not change this link at all.
|
||||
if are_hardlinks_supported():
|
||||
if are_hardlinks_supported() and content_only:
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was removed. This should only change the ctime since removing
|
||||
# the link would result in the decrementation of the inode's hard-link count.
|
||||
assert "input/hardlink_target_removed" not in output
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was replaced with a new regular file. This should not
|
||||
# change this link at all.
|
||||
if are_hardlinks_supported():
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was replaced with a new regular file. This should only change
|
||||
# its ctime. This should not be reflected in the output if content-only is set
|
||||
assert "input/hardlink_target_replaced" not in output
|
||||
|
||||
def do_json_asserts(output, can_compare_ids):
|
||||
def do_json_asserts(output, can_compare_ids, content_only=False):
|
||||
def get_changes(filename, data):
|
||||
chgsets = [j["changes"] for j in data if j["path"] == filename]
|
||||
assert len(chgsets) < 2
|
||||
|
|
@ -154,7 +156,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
assert not any(get_changes("input/file_unchanged", joutput))
|
||||
|
||||
# Directory replaced with a regular file
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32:
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
|
||||
assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
|
||||
"input/dir_replaced_with_file", joutput
|
||||
)
|
||||
|
|
@ -170,14 +172,16 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
assert {"type": "removed link"} in get_changes("input/link_removed", joutput)
|
||||
|
||||
# Symlink replacing or being replaced
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["new_mode"].startswith("l")
|
||||
for chg in get_changes("input/dir_replaced_with_link", joutput)
|
||||
)
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["old_mode"].startswith("l")
|
||||
for chg in get_changes("input/link_replaced_by_file", joutput)
|
||||
)
|
||||
|
||||
if not content_only:
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["new_mode"].startswith("l")
|
||||
for chg in get_changes("input/dir_replaced_with_link", joutput)
|
||||
), get_changes("input/dir_replaced_with_link", joutput)
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["old_mode"].startswith("l")
|
||||
for chg in get_changes("input/link_replaced_by_file", joutput)
|
||||
), get_changes("input/link_replaced_by_file", joutput)
|
||||
|
||||
# Symlink target removed. Should not affect the symlink at all.
|
||||
assert not any(get_changes("input/link_target_removed", joutput))
|
||||
|
|
@ -207,21 +211,56 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
if are_hardlinks_supported():
|
||||
assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was removed. This should not change this link at all.
|
||||
if are_hardlinks_supported():
|
||||
if are_hardlinks_supported() and content_only:
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was removed. This should only change the ctime since removing
|
||||
# the link would result in the decrementation of the inode's hard-link count.
|
||||
assert not any(get_changes("input/hardlink_target_removed", joutput))
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was replaced with a new regular file. This should not
|
||||
# change this link at all.
|
||||
if are_hardlinks_supported():
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was replaced with a new regular file. This should only change
|
||||
# its ctime. This should not be reflected in the output if content-only is set
|
||||
assert not any(get_changes("input/hardlink_target_replaced", joutput))
|
||||
|
||||
do_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a"), True)
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a")
|
||||
do_asserts(output, True)
|
||||
# We expect exit_code=1 due to the chunker params warning
|
||||
do_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1b", exit_code=1), False)
|
||||
do_json_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines"), True)
|
||||
output = self.cmd(
|
||||
f"--repo={self.repository_location}", "diff", "test0", "test1b", "--content-only", exit_code=1
|
||||
)
|
||||
do_asserts(output, False, content_only=True)
|
||||
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines")
|
||||
do_json_asserts(output, True)
|
||||
|
||||
output = self.cmd(
|
||||
f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines", "--content-only"
|
||||
)
|
||||
do_json_asserts(output, True, content_only=True)
|
||||
|
||||
def test_time_diffs(self):
|
||||
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
|
||||
self.create_regular_file("test_file", size=10)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "archive1", "input")
|
||||
time.sleep(0.1)
|
||||
os.unlink("input/test_file")
|
||||
if is_win32:
|
||||
# Sleeping for 15s because Windows doesn't refresh ctime if file is deleted and recreated within 15 seconds.
|
||||
time.sleep(15)
|
||||
self.create_regular_file("test_file", size=15)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
|
||||
self.assert_in("mtime", output)
|
||||
self.assert_in("ctime", output) # Should show up on windows as well since it is a new file.
|
||||
os.chmod("input/test_file", 777)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
|
||||
self.assert_not_in("mtime", output)
|
||||
# Checking platform because ctime should not be shown on windows since it wasn't recreated.
|
||||
if not is_win32:
|
||||
self.assert_in("ctime", output)
|
||||
else:
|
||||
self.assert_not_in("ctime", output)
|
||||
|
||||
def test_sort_option(self):
|
||||
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
|
||||
|
|
@ -242,7 +281,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.create_regular_file("d_file_added", size=256)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "test1", "input")
|
||||
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1", "--sort")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1", "--sort", "--content-only")
|
||||
expected = [
|
||||
"a_file_removed",
|
||||
"b_file_added",
|
||||
|
|
@ -251,7 +290,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
"e_file_changed",
|
||||
"f_file_removed",
|
||||
]
|
||||
|
||||
assert all(x in line for x, line in zip(expected, output.splitlines()))
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue