From c095cc88a647a56f45fa45e84ed6c7935b392187 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Sat, 21 Feb 2026 06:38:25 +0530 Subject: [PATCH] cache: add seek()/tell() to SyncFile, use SaveFile in _write_files_cache, fixes #9390 --- src/borg/cache.py | 55 ++++++++++--------- src/borg/platform/base.py | 9 +++ .../testsuite/crypto/file_integrity_test.py | 19 ++++++- src/borg/testsuite/platform/all_test.py | 28 +++++++++- 4 files changed, 83 insertions(+), 28 deletions(-) diff --git a/src/borg/cache.py b/src/borg/cache.py index 6d08a92927..b51ae67c08 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -582,36 +582,39 @@ def _write_files_cache(self, files): discard_after = min(newest_cmtime, start_backup_time) ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2)) files_cache_logger.debug("FILES-CACHE-SAVE: starting...") - # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek(). - with IntegrityCheckedFile(path=str(self.path / self.files_cache_name()), write=True) as fd: - entries = 0 - age_discarded = 0 - race_discarded = 0 - for path_hash, entry in files.items(): - entry = self.decompress_entry(entry) - if entry.age == 0: # current entries - if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after: - # Only keep files seen in this backup that old enough not to suffer race conditions relating - # to filesystem snapshots and ctime/mtime granularity or being modified while we read them. - keep = True - else: - keep = False - race_discarded += 1 - else: # old entries - if entry.age < ttl: - # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. - keep = True - else: - keep = False - age_discarded += 1 - if keep: - msgpack.pack((path_hash, entry), fd) - entries += 1 + cache_path = str(self.path / self.files_cache_name()) + with SaveFile(cache_path, binary=True) as sync_file: + with IntegrityCheckedFile(path=cache_path, write=True, override_fd=sync_file) as fd: + entries = 0 + age_discarded = 0 + race_discarded = 0 + for path_hash, entry in files.items(): + entry = self.decompress_entry(entry) + if entry.age == 0: # current entries + if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after: + # Only keep files seen in this backup that old enough not to suffer race conditions + # relating to filesystem snapshots and ctime/mtime granularity or being modified + # while we read them. + keep = True + else: + keep = False + race_discarded += 1 + else: # old entries + if entry.age < ttl: + # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. + keep = True + else: + keep = False + age_discarded += 1 + if keep: + msgpack.pack((path_hash, entry), fd) + entries += 1 + integrity_data = fd.integrity_data files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]") t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat() files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}") files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.") - return fd.integrity_data + return integrity_data def file_known_and_unchanged(self, hashed_path, path_hash, st): """ diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index 06fcab206a..161c0fae3b 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -1,4 +1,5 @@ import errno +import io import os import socket import unicodedata @@ -180,6 +181,12 @@ def __exit__(self, exc_type, exc_val, exc_tb): def write(self, data): self.f.write(data) + def seek(self, offset, whence=io.SEEK_SET): + return self.f.seek(offset, whence) + + def tell(self): + return self.f.tell() + def sync(self): """ Synchronize file contents. Everything written prior to sync() must become durable before anything written @@ -195,6 +202,8 @@ def sync(self): def close(self): """sync() and close.""" + if self.f.closed: + return from .. import platform dirname = None diff --git a/src/borg/testsuite/crypto/file_integrity_test.py b/src/borg/testsuite/crypto/file_integrity_test.py index 052bad81ce..6dd64a3aeb 100644 --- a/src/borg/testsuite/crypto/file_integrity_test.py +++ b/src/borg/testsuite/crypto/file_integrity_test.py @@ -1,6 +1,7 @@ import pytest -from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError +from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError, IntegrityCheckedFile +from ...platform import SyncFile class TestReadIntegrityFile: @@ -130,3 +131,19 @@ def test_part_independence(self, integrity_protected_file, partial_read): if not partial_read: fd.read() # But overall it explodes with the final digest. Neat, eh? + + +class TestIntegrityCheckedFileWithSyncFile: + def test_write_and_verify_with_syncfile(self, tmpdir): + """IntegrityCheckedFile works correctly with SyncFile as override_fd.""" + path = str(tmpdir.join("testfile")) + with SyncFile(path, binary=True) as sf: + with IntegrityCheckedFile(path=path, write=True, override_fd=sf) as fd: + fd.write(b"test data for integrity check") + integrity_data = fd.integrity_data + + assert integrity_data is not None + + # verify the written data can be read back with integrity check + with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd: + assert fd.read() == b"test data for integrity check" diff --git a/src/borg/testsuite/platform/all_test.py b/src/borg/testsuite/platform/all_test.py index b97f62b647..0c45a06b72 100644 --- a/src/borg/testsuite/platform/all_test.py +++ b/src/borg/testsuite/platform/all_test.py @@ -1,4 +1,6 @@ -from ...platform import swidth +import io + +from ...platform import swidth, SyncFile def test_swidth_ascii(): @@ -11,3 +13,27 @@ def test_swidth_cjk(): def test_swidth_mixed(): assert swidth("borgバックアップ") == 4 + 6 * 2 + + +def test_syncfile_seek_tell(tmp_path): + """SyncFile exposes seek() and tell() from the underlying file object.""" + path = tmp_path / "testfile" + with SyncFile(path, binary=True) as sf: + sf.write(b"hello world") + assert sf.tell() == 11 + sf.seek(0, io.SEEK_SET) + assert sf.tell() == 0 + sf.seek(0, io.SEEK_END) + assert sf.tell() == 11 + sf.seek(5, io.SEEK_SET) + assert sf.tell() == 5 + assert path.read_bytes() == b"hello world" + + +def test_syncfile_close_idempotent(tmp_path): + """Calling SyncFile.close() twice does not raise.""" + path = tmp_path / "testfile" + sf = SyncFile(path, binary=True) + sf.write(b"data") + sf.close() + sf.close() # must not raise