Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 29 additions & 26 deletions src/borg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,36 +582,39 @@ def _write_files_cache(self, files):
discard_after = min(newest_cmtime, start_backup_time)
ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
# TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
with IntegrityCheckedFile(path=str(self.path / self.files_cache_name()), write=True) as fd:
entries = 0
age_discarded = 0
race_discarded = 0
for path_hash, entry in files.items():
entry = self.decompress_entry(entry)
if entry.age == 0: # current entries
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
# Only keep files seen in this backup that old enough not to suffer race conditions relating
# to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
keep = True
else:
keep = False
race_discarded += 1
else: # old entries
if entry.age < ttl:
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
keep = True
else:
keep = False
age_discarded += 1
if keep:
msgpack.pack((path_hash, entry), fd)
entries += 1
cache_path = str(self.path / self.files_cache_name())
with SaveFile(cache_path, binary=True) as sync_file:
with IntegrityCheckedFile(path=cache_path, write=True, override_fd=sync_file) as fd:
entries = 0
age_discarded = 0
race_discarded = 0
for path_hash, entry in files.items():
entry = self.decompress_entry(entry)
if entry.age == 0: # current entries
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
# Only keep files seen in this backup that old enough not to suffer race conditions
# relating to filesystem snapshots and ctime/mtime granularity or being modified
# while we read them.
keep = True
else:
keep = False
race_discarded += 1
else: # old entries
if entry.age < ttl:
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
keep = True
else:
keep = False
age_discarded += 1
if keep:
msgpack.pack((path_hash, entry), fd)
entries += 1
integrity_data = fd.integrity_data
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
return fd.integrity_data
return integrity_data

def file_known_and_unchanged(self, hashed_path, path_hash, st):
"""
Expand Down
9 changes: 9 additions & 0 deletions src/borg/platform/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import errno
import io
import os
import socket
import unicodedata
Expand Down Expand Up @@ -180,6 +181,12 @@ def __exit__(self, exc_type, exc_val, exc_tb):
def write(self, data):
self.f.write(data)

def seek(self, offset, whence=io.SEEK_SET):
return self.f.seek(offset, whence)

def tell(self):
return self.f.tell()

def sync(self):
"""
Synchronize file contents. Everything written prior to sync() must become durable before anything written
Expand All @@ -195,6 +202,8 @@ def sync(self):

def close(self):
"""sync() and close."""
if self.f.closed:
return
from .. import platform

dirname = None
Expand Down
19 changes: 18 additions & 1 deletion src/borg/testsuite/crypto/file_integrity_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError, IntegrityCheckedFile
from ...platform import SyncFile


class TestReadIntegrityFile:
Expand Down Expand Up @@ -130,3 +131,19 @@ def test_part_independence(self, integrity_protected_file, partial_read):
if not partial_read:
fd.read()
# But overall it explodes with the final digest. Neat, eh?


class TestIntegrityCheckedFileWithSyncFile:
def test_write_and_verify_with_syncfile(self, tmpdir):
"""IntegrityCheckedFile works correctly with SyncFile as override_fd."""
path = str(tmpdir.join("testfile"))
with SyncFile(path, binary=True) as sf:
with IntegrityCheckedFile(path=path, write=True, override_fd=sf) as fd:
fd.write(b"test data for integrity check")
integrity_data = fd.integrity_data

assert integrity_data is not None

# verify the written data can be read back with integrity check
with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd:
assert fd.read() == b"test data for integrity check"
49 changes: 48 additions & 1 deletion src/borg/testsuite/platform/all_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from ...platform import swidth
import io
import os
import tempfile

from ...platform import swidth, SyncFile, SaveFile
from ...crypto.file_integrity import IntegrityCheckedFile


def test_swidth_ascii():
Expand All @@ -11,3 +16,45 @@ def test_swidth_cjk():

def test_swidth_mixed():
assert swidth("borgバックアップ") == 4 + 6 * 2


def test_syncfile_seek_tell():
"""SyncFile exposes seek() and tell() from the underlying file object."""
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "testfile")
with SyncFile(path, binary=True) as sf:
sf.write(b"hello world")
assert sf.tell() == 11
sf.seek(0, io.SEEK_SET)
assert sf.tell() == 0
sf.seek(0, io.SEEK_END)
assert sf.tell() == 11
sf.seek(5, io.SEEK_SET)
assert sf.tell() == 5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for completeness, do a read after that and assert.



def test_syncfile_close_idempotent():
"""Calling SyncFile.close() twice does not raise."""
with tempfile.TemporaryDirectory() as tmpdir:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use the pytest fixture.

path = os.path.join(tmpdir, "testfile")
sf = SyncFile(path, binary=True)
sf.write(b"data")
sf.close()
sf.close() # must not raise


def test_savefile_with_integrity_checked_file():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quite similar to test_write_and_verify_with_syncfile.

"""SaveFile + IntegrityCheckedFile provides atomic writes with integrity verification."""
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "testfile")
with SaveFile(path, binary=True) as sync_file:
with IntegrityCheckedFile(path=path, write=True, override_fd=sync_file) as fd:
fd.write(b"atomic integrity data")
integrity_data = fd.integrity_data

assert os.path.exists(path)
assert integrity_data is not None

# verify the written data can be read back with integrity check
with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd:
assert fd.read() == b"atomic integrity data"