diff --git a/component_catalog/models.py b/component_catalog/models.py index a8a4ced2..0412e681 100644 --- a/component_catalog/models.py +++ b/component_catalog/models.py @@ -1719,6 +1719,10 @@ def has_package_url(self): """Return objects with Package URL defined.""" return self.filter(~models.Q(type="") & ~models.Q(name="")) + def has_download_url(self): + """Return objects with download URL defined.""" + return self.filter(~models.Q(download_url="")) + def annotate_sortable_identifier(self): """ Annotate the QuerySet with a `sortable_identifier` value that combines @@ -2036,9 +2040,14 @@ def package_url_filename(self): @property def inferred_repo_url(self): - """Return the URL deduced from the information available in a Package URL (purl).""" + """Return the repo URL deduced from the Package URL (purl).""" return purl2url.get_repo_url(self.package_url) + def infer_download_url(self): + """Infer the download URL deduced from the Package URL (purl).""" + if self.package_url: + return download.infer_download_url(self.package_url) + def get_url(self, name, params=None, include_identifier=False): if not params: params = [self.dataspace.name, quote_plus(str(self.uuid))] diff --git a/component_catalog/tests/test_models.py b/component_catalog/tests/test_models.py index 8f9c1491..788143cf 100644 --- a/component_catalog/tests/test_models.py +++ b/component_catalog/tests/test_models.py @@ -2576,6 +2576,15 @@ def test_package_model_inferred_repo_url_property(self): expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4" self.assertEqual(expected, package1.inferred_repo_url) + def test_package_model_infer_download_url(self): + package1 = make_package(self.dataspace, filename="package") + self.assertIsNone(package1.infer_download_url()) + + package1.set_package_url("pkg:nuget/Azure.Core@1.45.0") + package1.save() + expected_download_url = "https://www.nuget.org/api/v2/package/Azure.Core/1.45.0" + self.assertEqual(expected_download_url, package1.infer_download_url()) + @mock.patch("dejacode_toolkit.purldb.PurlDB.find_packages") def test_package_model_get_purldb_entries(self, mock_find_packages): purl1 = "pkg:pypi/django@3.0" @@ -2758,6 +2767,12 @@ def test_package_queryset_has_package_url(self): qs = Package.objects.has_package_url() self.assertQuerySetEqual(qs, [package1]) + def test_package_queryset_has_download_url(self): + package1 = make_package(self.dataspace, download_url="https://download.url") + make_package(self.dataspace) + qs = Package.objects.has_download_url() + self.assertQuerySetEqual(qs, [package1]) + def test_package_queryset_annotate_sortable_identifier(self): package1 = make_package(self.dataspace, package_url="pkg:pypi/django@5.0") package2 = make_package(self.dataspace) diff --git a/product_portfolio/api.py b/product_portfolio/api.py index 9f6fa0af..d24c5076 100644 --- a/product_portfolio/api.py +++ b/product_portfolio/api.py @@ -227,6 +227,12 @@ class LoadSBOMsFormSerializer(serializers.Serializer): required=True, help_text=LoadSBOMsForm.base_fields["input_file"].label, ) + infer_download_urls = serializers.BooleanField( + required=False, + initial=True, + default=True, + help_text=LoadSBOMsForm.base_fields["infer_download_urls"].help_text, + ) update_existing_packages = serializers.BooleanField( required=False, default=False, @@ -246,6 +252,12 @@ class ImportManifestsFormSerializer(serializers.Serializer): required=True, help_text=ImportManifestsForm.base_fields["input_file"].label, ) + infer_download_urls = serializers.BooleanField( + required=False, + initial=True, + default=True, + help_text=ImportManifestsForm.base_fields["infer_download_urls"].help_text, + ) update_existing_packages = serializers.BooleanField( required=False, default=False, diff --git a/product_portfolio/forms.py b/product_portfolio/forms.py index 216bfa6d..f6ce25d4 100644 --- a/product_portfolio/forms.py +++ b/product_portfolio/forms.py @@ -641,6 +641,15 @@ class BaseProductImportFormView(forms.Form): "for all of the packages assigned to your product." ), ) + infer_download_urls = forms.BooleanField( + label=_("Infer missing download URLs"), + required=False, + initial=True, + help_text=_( + "When a download URL is missing from the input data, attempt to infer it " + "from the Package URL (purl). A download URL is required for package scanning." + ), + ) @property def helper(self): @@ -652,6 +661,7 @@ def helper(self): Fieldset( None, "input_file", + "infer_download_urls", "update_existing_packages", "scan_all_packages", StrictSubmit("submit", _("Import"), css_class="btn-success col-2"), @@ -667,6 +677,7 @@ def submit(self, product, user): input_file=self.cleaned_data.get("input_file"), update_existing_packages=self.cleaned_data.get("update_existing_packages"), scan_all_packages=self.cleaned_data.get("scan_all_packages"), + infer_download_urls=self.cleaned_data.get("infer_download_urls"), created_by=user, ) @@ -716,7 +727,7 @@ class LoadSBOMsForm(BaseProductImportFormView): class ImportManifestsForm(BaseProductImportFormView): project_type = ScanCodeProject.ProjectType.IMPORT_FROM_MANIFEST - pipeline_name = "resolve_dependencies" + pipeline_name = "resolve_dependencies:StaticResolver,DynamicResolver" input_file = SmartFileField( label=_("Manifest file or zip archive"), @@ -1005,3 +1016,25 @@ def submit(self, product, user): scancodeproject_uuid=scancode_project.uuid, ) ) + + +class ScanAllPackagesForm(forms.Form): + infer_download_urls = forms.BooleanField( + label=_("Infer missing download URLs"), + required=False, + initial=True, + help_text=_( + "When a download URL is missing for packages, attempt to infer it " + "from the Package URL (purl). " + "A download URL is required for package scanning." + ), + ) + + @property + def helper(self): + helper = FormHelper() + helper.form_method = "post" + helper.form_id = "scan-all-packages-form" + helper.attrs = {"autocomplete": "off"} + helper.layout = Layout("infer_download_urls") + return helper diff --git a/product_portfolio/importers.py b/product_portfolio/importers.py index 699c746d..7b931bcf 100644 --- a/product_portfolio/importers.py +++ b/product_portfolio/importers.py @@ -26,6 +26,7 @@ from component_catalog.models import PACKAGE_URL_FIELDS from component_catalog.models import Component from component_catalog.models import Package +from dejacode_toolkit import download from dejacode_toolkit.scancodeio import ScanCodeIO from dje.copier import copy_object from dje.importers import BaseImporter @@ -649,7 +650,15 @@ class ImportPackageFromScanCodeIO: "filename", ] - def __init__(self, user, project_uuid, product, update_existing=False, scan_all_packages=False): + def __init__( + self, + user, + project_uuid, + product, + update_existing=False, + scan_all_packages=False, + infer_download_urls=False, + ): self.licensing = Licensing() self.created = defaultdict(list) self.existing = defaultdict(list) @@ -662,12 +671,13 @@ def __init__(self, user, project_uuid, product, update_existing=False, scan_all_ self.product = product self.update_existing = update_existing self.scan_all_packages = scan_all_packages + self.infer_download_urls = infer_download_urls scancodeio = ScanCodeIO(user.dataspace) self.packages = scancodeio.fetch_project_packages(self.project_uuid) - if not self.packages: - raise Exception("Packages could not be fetched from ScanCode.io") self.dependencies = scancodeio.fetch_project_dependencies(self.project_uuid) + if not self.packages and not self.dependencies: + raise Exception("Packages could not be fetched from ScanCode.io") def save(self): self.import_packages() @@ -696,6 +706,15 @@ def import_package(self, package_data): # Check if the package already exists to prevent duplication. package = self.look_for_existing_package(package_data) + # Infer a download URL from the Package URL + if ( + self.infer_download_urls + and not package_data.get("download_url") + and (purl := package_data.get("purl")) + and (download_url := download.infer_download_url(purl)) + ): + package_data["download_url"] = download_url + if license_expression := package_data.get("declared_license_expression"): license_expression = str(self.licensing.dedup(license_expression)) package_data["license_expression"] = license_expression diff --git a/product_portfolio/migrations/0014_scancodeproject_infer_download_urls.py b/product_portfolio/migrations/0014_scancodeproject_infer_download_urls.py new file mode 100644 index 00000000..4bcd43b4 --- /dev/null +++ b/product_portfolio/migrations/0014_scancodeproject_infer_download_urls.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.8 on 2025-12-16 04:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("product_portfolio", "0013_productstatus_is_locked_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="scancodeproject", + name="infer_download_urls", + field=models.BooleanField(default=False), + ), + ] diff --git a/product_portfolio/models.py b/product_portfolio/models.py index 0fa97dae..c82db0b4 100644 --- a/product_portfolio/models.py +++ b/product_portfolio/models.py @@ -557,25 +557,40 @@ def assign_objects(self, related_objects, user, replace_version=False): return created_count, updated_count, unchanged_count - def scan_all_packages_task(self, user): + def scan_all_packages_task(self, user, infer_download_urls=False): """ Submit a Scan request to ScanCode.io for each package assigned to this Product. Only packages with a proper download URL are sent. """ - package_urls = [ + if infer_download_urls: + self.improve_packages_from_purl() + + package_download_urls = [ package.download_url - for package in self.all_packages + for package in self.all_packages.has_download_url() if package.download_url.startswith(("http", "https")) ] tasks.scancodeio_submit_scan.delay( - uris=package_urls, + uris=package_download_urls, user_uuid=user.uuid, dataspace_uuid=user.dataspace.uuid, ) + def improve_packages_from_purl(self): + """Infer missing packages download URL using the Package URL when possible.""" + updated_packages = [] + + packages = self.all_packages.has_package_url().filter(models.Q(download_url="")) + for package in packages: + if download_url := package.infer_download_url(): + package.update(download_url=download_url) + updated_packages.append(package) + + return updated_packages + def improve_packages_from_purldb(self, user): - """Update all Packages assigned to the Product using PurlDB data.""" + """Update all packages assigned to thepProduct using PurlDB data.""" updated_packages = [] for package in self.packages.all(): updated_fields = package.update_from_purldb(user) @@ -1555,6 +1570,9 @@ class Status(models.TextChoices): scan_all_packages = models.BooleanField( default=False, ) + infer_download_urls = models.BooleanField( + default=False, + ) status = models.CharField( max_length=10, choices=Status.choices, @@ -1615,6 +1633,7 @@ def import_data_from_scancodeio(self): product=self.product, update_existing=self.update_existing_packages, scan_all_packages=self.scan_all_packages, + infer_download_urls=self.infer_download_urls, ) created, existing, errors = importer.save() diff --git a/product_portfolio/templates/product_portfolio/modals/scan_all_packages_modal.html b/product_portfolio/templates/product_portfolio/modals/scan_all_packages_modal.html index 4f06c9f5..6262fe46 100644 --- a/product_portfolio/templates/product_portfolio/modals/scan_all_packages_modal.html +++ b/product_portfolio/templates/product_portfolio/modals/scan_all_packages_modal.html @@ -1,24 +1,35 @@ +{% load crispy_forms_tags %}