Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 80 additions & 19 deletions contentcuration/contentcuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,25 +349,57 @@ def check_feature_flag(self, flag_name):

def check_channel_space(self, channel):
tree_cte = With(self.get_user_active_trees().distinct(), name="trees")
files_cte = With(
tree_cte.join(
self.files.get_queryset(), contentnode__tree_id=tree_cte.col.tree_id
)
.values("checksum")

user_files_cte = With(
self.files.get_queryset()
.values("checksum", "contentnode_id", "file_format_id")
.distinct(),
name="files",
name="user_files",
)

staging_tree_files = (
self.files.filter(contentnode__tree_id=channel.staging_tree.tree_id)
editable_files_qs = (
user_files_cte.queryset()
.with_cte(tree_cte)
.with_cte(files_cte)
.exclude(Exists(files_cte.queryset().filter(checksum=OuterRef("checksum"))))
.values("checksum")
.distinct()
.with_cte(user_files_cte)
.filter(
Exists(
tree_cte.join(
ContentNode.objects.all(), tree_id=tree_cte.col.tree_id
)
.with_cte(tree_cte)
.filter(id=OuterRef("contentnode_id"))
)
)
)

editable_files_qs = self._filter_storage_billable_files(editable_files_qs)

existing_checksums_cte = With(
editable_files_qs.values("checksum").distinct(), name="existing_checksums"
)

staging_files_qs = self._filter_storage_billable_files(
self.files.filter(contentnode__tree_id=channel.staging_tree.tree_id)
)
Comment on lines +381 to +383
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still has the same issue as the original queries-- it queries on too many things at once. The user_files_cte can be reused for both editable and staged trees. So you can essentially duplicate editable_files_qs but instead of joining on tree_cte just check existence where tree_id=channel.staging_tree.tree_id.

Then in the core SELECT query, where it diffs between existing and new checksums, you can also filter off file_format_id


staging_files_qs = (
staging_files_qs.with_cte(tree_cte)
.with_cte(user_files_cte)
.with_cte(existing_checksums_cte)
.exclude(
Exists(
existing_checksums_cte.queryset().filter(
checksum=OuterRef("checksum")
)
)
)
)

staged_size = float(
staging_tree_files.aggregate(used=Sum("file_size"))["used"] or 0
staging_files_qs.values("checksum")
.distinct()
.aggregate(used=Sum("file_size"))["used"]
or 0
)

if self.get_available_space() < staged_size:
Expand Down Expand Up @@ -410,13 +442,42 @@ def get_user_active_trees(self):
)

def get_user_active_files(self):
cte = With(self.get_user_active_trees().distinct())

return (
cte.join(self.files.get_queryset(), contentnode__tree_id=cte.col.tree_id)
.with_cte(cte)
.values("checksum")
.distinct()
tree_cte = With(self.get_user_active_trees().distinct(), name="trees")

user_files_cte = With(
self.files.get_queryset()
.values("checksum", "contentnode_id", "file_format_id")
.distinct(),
name="user_files",
)
file_qs = (
user_files_cte.queryset()
.with_cte(tree_cte)
.with_cte(user_files_cte)
.filter(
Exists(
tree_cte.join(
ContentNode.objects.all(), tree_id=tree_cte.col.tree_id
)
.with_cte(tree_cte)
.filter(id=OuterRef("contentnode_id"))
)
)
)

files_qs = self._filter_storage_billable_files(file_qs)

return files_qs.values("checksum").distinct()

def _filter_storage_billable_files(self, queryset):
"""
Perseus exports would not be included in storage calculations.
"""
if queryset is None:
return queryset
return queryset.exclude(file_format_id__isnull=True).exclude(
file_format_id=file_formats.PERSEUS
Copy link
Member

@rtibbles rtibbles Dec 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not an immediate concern, but just a heads that when QTI assessments are more broadly available, and we are generating QTI ZIP files, then we may need to filter these too (and it would need to be on the format preset, rather than the file format id, because the format id would be 'zip'!)

)

def get_space_used(self, active_files=None):
Expand Down
55 changes: 55 additions & 0 deletions contentcuration/contentcuration/tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.db.models import Exists
from django.db.models import OuterRef
from le_utils.constants import content_kinds
from le_utils.constants import file_formats
from mock import patch

from .base import BaseAPITestCase
Expand Down Expand Up @@ -232,3 +233,57 @@ def test_check_staged_space__exists(self):
) as get_available_staged_space:
get_available_staged_space.return_value = 0
self.assertTrue(self.user.check_staged_space(100, f.checksum))

def test_check_channel_space_ignores_perseus_exports(self):
with mock.patch("contentcuration.utils.user.calculate_user_storage"):
self.node_file.file_format_id = file_formats.PERSEUS
self.node_file.file_size = self.user.disk_space + 1
self.node_file.checksum = uuid4().hex
self.node_file.uploaded_by = self.user
self.node_file.save(set_by_file_on_disk=False)

try:
self.user.check_channel_space(self.staged_channel)
except PermissionDenied:
self.fail("Perseus exports should not count against staging space")


class UserStorageUsageTestCase(StudioTestCase):
def setUp(self):
super().setUpBase()
self.contentnode = (
self.channel.main_tree.get_descendants(include_self=True)
.filter(files__isnull=False)
.first()
)
self.assertIsNotNone(self.contentnode)
self.base_file = self.contentnode.files.first()
self.assertIsNotNone(self.base_file)

def _create_file(self, *, file_format, size):
file_record = File(
contentnode=self.contentnode,
checksum=uuid4().hex,
file_format_id=file_format,
file_size=size,
uploaded_by=self.user,
)
file_record.save(set_by_file_on_disk=False)
return file_record

def test_get_space_used_excludes_perseus_exports(self):
baseline_usage = self.user.get_space_used()

perseus_size = 125
with mock.patch("contentcuration.utils.user.calculate_user_storage"):
self._create_file(file_format=file_formats.PERSEUS, size=perseus_size)
self.assertEqual(self.user.get_space_used(), baseline_usage)

non_perseus_size = 275
with mock.patch("contentcuration.utils.user.calculate_user_storage"):
self._create_file(
file_format=self.base_file.file_format_id, size=non_perseus_size
)

expected_usage = baseline_usage + non_perseus_size
self.assertEqual(self.user.get_space_used(), expected_usage)
Loading