Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion component_catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1719,6 +1719,10 @@ def has_package_url(self):
"""Return objects with Package URL defined."""
return self.filter(~models.Q(type="") & ~models.Q(name=""))

def has_download_url(self):
"""Return objects with download URL defined."""
return self.filter(~models.Q(download_url=""))

def annotate_sortable_identifier(self):
"""
Annotate the QuerySet with a `sortable_identifier` value that combines
Expand Down Expand Up @@ -2036,9 +2040,14 @@ def package_url_filename(self):

@property
def inferred_repo_url(self):
"""Return the URL deduced from the information available in a Package URL (purl)."""
"""Return the repo URL deduced from the Package URL (purl)."""
return purl2url.get_repo_url(self.package_url)

def infer_download_url(self):
"""Infer the download URL deduced from the Package URL (purl)."""
if self.package_url:
return download.infer_download_url(self.package_url)

def get_url(self, name, params=None, include_identifier=False):
if not params:
params = [self.dataspace.name, quote_plus(str(self.uuid))]
Expand Down
15 changes: 15 additions & 0 deletions component_catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2576,6 +2576,15 @@ def test_package_model_inferred_repo_url_property(self):
expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4"
self.assertEqual(expected, package1.inferred_repo_url)

def test_package_model_infer_download_url(self):
package1 = make_package(self.dataspace, filename="package")
self.assertIsNone(package1.infer_download_url())

package1.set_package_url("pkg:nuget/[email protected]")
package1.save()
expected_download_url = "https://www.nuget.org/api/v2/package/Azure.Core/1.45.0"
self.assertEqual(expected_download_url, package1.infer_download_url())

@mock.patch("dejacode_toolkit.purldb.PurlDB.find_packages")
def test_package_model_get_purldb_entries(self, mock_find_packages):
purl1 = "pkg:pypi/[email protected]"
Expand Down Expand Up @@ -2758,6 +2767,12 @@ def test_package_queryset_has_package_url(self):
qs = Package.objects.has_package_url()
self.assertQuerySetEqual(qs, [package1])

def test_package_queryset_has_download_url(self):
package1 = make_package(self.dataspace, download_url="https://download.url")
make_package(self.dataspace)
qs = Package.objects.has_download_url()
self.assertQuerySetEqual(qs, [package1])

def test_package_queryset_annotate_sortable_identifier(self):
package1 = make_package(self.dataspace, package_url="pkg:pypi/[email protected]")
package2 = make_package(self.dataspace)
Expand Down
12 changes: 12 additions & 0 deletions product_portfolio/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@ class LoadSBOMsFormSerializer(serializers.Serializer):
required=True,
help_text=LoadSBOMsForm.base_fields["input_file"].label,
)
infer_download_urls = serializers.BooleanField(
required=False,
initial=True,
default=True,
help_text=LoadSBOMsForm.base_fields["infer_download_urls"].help_text,
)
update_existing_packages = serializers.BooleanField(
required=False,
default=False,
Expand All @@ -246,6 +252,12 @@ class ImportManifestsFormSerializer(serializers.Serializer):
required=True,
help_text=ImportManifestsForm.base_fields["input_file"].label,
)
infer_download_urls = serializers.BooleanField(
required=False,
initial=True,
default=True,
help_text=ImportManifestsForm.base_fields["infer_download_urls"].help_text,
)
update_existing_packages = serializers.BooleanField(
required=False,
default=False,
Expand Down
35 changes: 34 additions & 1 deletion product_portfolio/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,15 @@ class BaseProductImportFormView(forms.Form):
"for all of the packages assigned to your product."
),
)
infer_download_urls = forms.BooleanField(
label=_("Infer missing download URLs"),
required=False,
initial=True,
help_text=_(
"When a download URL is missing from the input data, attempt to infer it "
"from the Package URL (purl). A download URL is required for package scanning."
),
)

@property
def helper(self):
Expand All @@ -652,6 +661,7 @@ def helper(self):
Fieldset(
None,
"input_file",
"infer_download_urls",
"update_existing_packages",
"scan_all_packages",
StrictSubmit("submit", _("Import"), css_class="btn-success col-2"),
Expand All @@ -667,6 +677,7 @@ def submit(self, product, user):
input_file=self.cleaned_data.get("input_file"),
update_existing_packages=self.cleaned_data.get("update_existing_packages"),
scan_all_packages=self.cleaned_data.get("scan_all_packages"),
infer_download_urls=self.cleaned_data.get("infer_download_urls"),
created_by=user,
)

Expand Down Expand Up @@ -716,7 +727,7 @@ class LoadSBOMsForm(BaseProductImportFormView):

class ImportManifestsForm(BaseProductImportFormView):
project_type = ScanCodeProject.ProjectType.IMPORT_FROM_MANIFEST
pipeline_name = "resolve_dependencies"
pipeline_name = "resolve_dependencies:StaticResolver,DynamicResolver"

input_file = SmartFileField(
label=_("Manifest file or zip archive"),
Expand Down Expand Up @@ -1005,3 +1016,25 @@ def submit(self, product, user):
scancodeproject_uuid=scancode_project.uuid,
)
)


class ScanAllPackagesForm(forms.Form):
infer_download_urls = forms.BooleanField(
label=_("Infer missing download URLs"),
required=False,
initial=True,
help_text=_(
"When a download URL is missing for packages, attempt to infer it "
"from the Package URL (purl). "
"A download URL is required for package scanning."
),
)

@property
def helper(self):
helper = FormHelper()
helper.form_method = "post"
helper.form_id = "scan-all-packages-form"
helper.attrs = {"autocomplete": "off"}
helper.layout = Layout("infer_download_urls")
return helper
25 changes: 22 additions & 3 deletions product_portfolio/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from component_catalog.models import PACKAGE_URL_FIELDS
from component_catalog.models import Component
from component_catalog.models import Package
from dejacode_toolkit import download
from dejacode_toolkit.scancodeio import ScanCodeIO
from dje.copier import copy_object
from dje.importers import BaseImporter
Expand Down Expand Up @@ -649,7 +650,15 @@ class ImportPackageFromScanCodeIO:
"filename",
]

def __init__(self, user, project_uuid, product, update_existing=False, scan_all_packages=False):
def __init__(
self,
user,
project_uuid,
product,
update_existing=False,
scan_all_packages=False,
infer_download_urls=False,
):
self.licensing = Licensing()
self.created = defaultdict(list)
self.existing = defaultdict(list)
Expand All @@ -662,12 +671,13 @@ def __init__(self, user, project_uuid, product, update_existing=False, scan_all_
self.product = product
self.update_existing = update_existing
self.scan_all_packages = scan_all_packages
self.infer_download_urls = infer_download_urls

scancodeio = ScanCodeIO(user.dataspace)
self.packages = scancodeio.fetch_project_packages(self.project_uuid)
if not self.packages:
raise Exception("Packages could not be fetched from ScanCode.io")
self.dependencies = scancodeio.fetch_project_dependencies(self.project_uuid)
if not self.packages and not self.dependencies:
raise Exception("Packages could not be fetched from ScanCode.io")

def save(self):
self.import_packages()
Expand Down Expand Up @@ -696,6 +706,15 @@ def import_package(self, package_data):
# Check if the package already exists to prevent duplication.
package = self.look_for_existing_package(package_data)

# Infer a download URL from the Package URL
if (
self.infer_download_urls
and not package_data.get("download_url")
and (purl := package_data.get("purl"))
and (download_url := download.infer_download_url(purl))
):
package_data["download_url"] = download_url

if license_expression := package_data.get("declared_license_expression"):
license_expression = str(self.licensing.dedup(license_expression))
package_data["license_expression"] = license_expression
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2.8 on 2025-12-16 04:14

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("product_portfolio", "0013_productstatus_is_locked_and_more"),
]

operations = [
migrations.AddField(
model_name="scancodeproject",
name="infer_download_urls",
field=models.BooleanField(default=False),
),
]
29 changes: 24 additions & 5 deletions product_portfolio/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,25 +557,40 @@ def assign_objects(self, related_objects, user, replace_version=False):

return created_count, updated_count, unchanged_count

def scan_all_packages_task(self, user):
def scan_all_packages_task(self, user, infer_download_urls=False):
"""
Submit a Scan request to ScanCode.io for each package assigned to this Product.
Only packages with a proper download URL are sent.
"""
package_urls = [
if infer_download_urls:
self.improve_packages_from_purl()

package_download_urls = [
package.download_url
for package in self.all_packages
for package in self.all_packages.has_download_url()
if package.download_url.startswith(("http", "https"))
]

tasks.scancodeio_submit_scan.delay(
uris=package_urls,
uris=package_download_urls,
user_uuid=user.uuid,
dataspace_uuid=user.dataspace.uuid,
)

def improve_packages_from_purl(self):
"""Infer missing packages download URL using the Package URL when possible."""
updated_packages = []

packages = self.all_packages.has_package_url().filter(models.Q(download_url=""))
for package in packages:
if download_url := package.infer_download_url():
package.update(download_url=download_url)
updated_packages.append(package)

return updated_packages

def improve_packages_from_purldb(self, user):
"""Update all Packages assigned to the Product using PurlDB data."""
"""Update all packages assigned to thepProduct using PurlDB data."""
updated_packages = []
for package in self.packages.all():
updated_fields = package.update_from_purldb(user)
Expand Down Expand Up @@ -1555,6 +1570,9 @@ class Status(models.TextChoices):
scan_all_packages = models.BooleanField(
default=False,
)
infer_download_urls = models.BooleanField(
default=False,
)
status = models.CharField(
max_length=10,
choices=Status.choices,
Expand Down Expand Up @@ -1615,6 +1633,7 @@ def import_data_from_scancodeio(self):
product=self.product,
update_existing=self.update_existing_packages,
scan_all_packages=self.scan_all_packages,
infer_download_urls=self.infer_download_urls,
)
created, existing, errors = importer.save()

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
{% load crispy_forms_tags %}
<div class="modal" tabindex="-1" role="dialog" id="scan-all-packages-modal">
<div class="modal-dialog" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Scan all Packages</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body bg-body-tertiary">
You are about to initiate multiple scans on the ScanCode.io server for all of the
Packages assigned to your Product, either by direct assignment or by assignment to a
Component assigned to your Product.<br><br>
<strong>Note that this may take some time to complete.</strong><br><br>
You can view the status of all the scans by selecting the
<a target="_blank" href="{% url 'component_catalog:scan_list' %}">Scans</a> option
from the DejaCode Tools dropdown menu, where you can also select each Package in the list
to view scan results details in the "Scan" tab of that Package.
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
<a id="scan_all_packages_submit" href="{{ object.get_scan_all_packages_url }}" class="btn btn-success">Submit Scan Request</a>
<form autocomplete="off" method="{{ scan_all_packages_form.helper.form_method }}" action="{{ object.get_scan_all_packages_url }}" id="{{ scan_all_packages_form.helper.form_id }}">
<div class="modal-dialog" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Scan all Packages</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body bg-body-tertiary">
<p>
You are about to initiate multiple scans on the ScanCode.io server for all the
Packages assigned to your Product, either by direct assignment or by assignment to a
Component assigned to your Product.
</p>
<p>
<strong>Note that this may take some time to complete.</strong>
</p>
<p>
You can view the status of all the scans by selecting the
<a target="_blank" href="{% url 'component_catalog:scan_list' %}">Scans</a> option
from the DejaCode Tools dropdown menu, where you can also select each Package in the list
to view scan results details in the "Scan" tab of that Package.
</p>
<hr>
{% crispy scan_all_packages_form %}
</div>
<div class="modal-footer">
<input type="button" name="close" value="Close" class="btn btn-secondary" data-bs-dismiss="modal">
<input type="submit" id="scan_all_packages_submit" value="Submit Scan Request" class="btn btn-primary btn-success">
</div>
</div>
</div>
</div>
</form>
</div>
2 changes: 2 additions & 0 deletions product_portfolio/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ def test_api_product_endpoint_load_sboms_action(self):

data = {
"input_file": ContentFile("{}", name="sbom.json"),
"infer_download_urls": True,
"update_existing_packages": False,
"scan_all_packages": False,
}
Expand Down Expand Up @@ -436,6 +437,7 @@ def test_api_product_endpoint_import_manifests_action(self):

data = {
"input_file": ContentFile("Content", name="requirements.txt"),
"infer_download_urls": True,
"update_existing_packages": False,
"scan_all_packages": False,
}
Expand Down
Loading