From 2da4907773c078b1c66d93952d4c68d923d7367c Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 19 Feb 2026 20:39:29 +0530 Subject: [PATCH 01/64] Execution backend - revamp --- backend/backend/worker_celery.py | 112 ++ .../prompt_studio_helper.py | 180 ++-- docker/docker-compose.yaml | 36 + docs/local-dev-setup-executor-migration.md | 586 +++++++++++ .../unstract/sdk1/adapters/ocr/register.py | 4 +- .../sdk1/adapters/vectordb/exceptions.py | 18 +- .../sdk1/adapters/vectordb/register.py | 4 +- .../unstract/sdk1/adapters/x2text/register.py | 4 +- .../src/unstract/sdk1/execution/__init__.py | 15 + .../src/unstract/sdk1/execution/context.py | 109 ++ .../src/unstract/sdk1/execution/dispatcher.py | 164 +++ .../src/unstract/sdk1/execution/executor.py | 46 + .../unstract/sdk1/execution/orchestrator.py | 81 ++ .../src/unstract/sdk1/execution/registry.py | 114 ++ .../src/unstract/sdk1/execution/result.py | 74 ++ unstract/sdk1/tests/test_execution.py | 960 +++++++++++++++++ workers/.env.test | 4 + workers/executor/__init__.py | 12 + workers/executor/executor_tool_shim.py | 130 +++ workers/executor/executors/__init__.py | 9 + workers/executor/executors/answer_prompt.py | 335 ++++++ workers/executor/executors/constants.py | 203 ++++ workers/executor/executors/dto.py | 39 + workers/executor/executors/exceptions.py | 79 ++ workers/executor/executors/file_utils.py | 39 + workers/executor/executors/index.py | 221 ++++ .../executor/executors/json_repair_helper.py | 63 ++ workers/executor/executors/legacy_executor.py | 925 ++++++++++++++++ workers/executor/executors/postprocessor.py | 119 +++ workers/executor/executors/retrieval.py | 113 ++ .../executor/executors/retrievers/__init__.py | 0 .../executors/retrievers/automerging.py | 85 ++ .../executors/retrievers/base_retriever.py | 35 + .../executor/executors/retrievers/fusion.py | 94 ++ .../executors/retrievers/keyword_table.py | 79 ++ .../executors/retrievers/recursive.py | 77 ++ .../executor/executors/retrievers/router.py | 157 +++ .../executor/executors/retrievers/simple.py | 53 + .../executors/retrievers/subquestion.py | 59 ++ workers/executor/executors/usage.py | 81 ++ .../executors/variable_replacement.py | 264 +++++ workers/executor/tasks.py | 76 ++ workers/executor/worker.py | 77 ++ workers/file_processing/__init__.py | 2 + .../file_processing/structure_tool_task.py | 851 +++++++++++++++ workers/run-worker-docker.sh | 18 + workers/run-worker.sh | 15 +- workers/shared/enums/task_enums.py | 6 + workers/shared/enums/worker_enums_base.py | 5 + .../shared/infrastructure/config/registry.py | 13 + workers/shared/workflow/execution/service.py | 85 +- workers/tests/__init__.py | 0 workers/tests/conftest.py | 14 + workers/tests/test_answer_prompt.py | 843 +++++++++++++++ workers/tests/test_executor_sanity.py | 288 +++++ workers/tests/test_legacy_executor_extract.py | 595 +++++++++++ workers/tests/test_legacy_executor_index.py | 453 ++++++++ .../tests/test_legacy_executor_scaffold.py | 282 +++++ workers/tests/test_phase2f.py | 331 ++++++ workers/tests/test_phase2h.py | 484 +++++++++ workers/tests/test_retrieval.py | 275 +++++ workers/tests/test_sanity_phase2.py | 788 ++++++++++++++ workers/tests/test_sanity_phase3.py | 996 ++++++++++++++++++ workers/tests/test_sanity_phase4.py | 873 +++++++++++++++ workers/tests/test_usage.py | 309 ++++++ 65 files changed, 13374 insertions(+), 87 deletions(-) create mode 100644 backend/backend/worker_celery.py create mode 100644 docs/local-dev-setup-executor-migration.md create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/__init__.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/context.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/executor.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/registry.py create mode 100644 unstract/sdk1/src/unstract/sdk1/execution/result.py create mode 100644 unstract/sdk1/tests/test_execution.py create mode 100644 workers/.env.test create mode 100644 workers/executor/__init__.py create mode 100644 workers/executor/executor_tool_shim.py create mode 100644 workers/executor/executors/__init__.py create mode 100644 workers/executor/executors/answer_prompt.py create mode 100644 workers/executor/executors/constants.py create mode 100644 workers/executor/executors/dto.py create mode 100644 workers/executor/executors/exceptions.py create mode 100644 workers/executor/executors/file_utils.py create mode 100644 workers/executor/executors/index.py create mode 100644 workers/executor/executors/json_repair_helper.py create mode 100644 workers/executor/executors/legacy_executor.py create mode 100644 workers/executor/executors/postprocessor.py create mode 100644 workers/executor/executors/retrieval.py create mode 100644 workers/executor/executors/retrievers/__init__.py create mode 100644 workers/executor/executors/retrievers/automerging.py create mode 100644 workers/executor/executors/retrievers/base_retriever.py create mode 100644 workers/executor/executors/retrievers/fusion.py create mode 100644 workers/executor/executors/retrievers/keyword_table.py create mode 100644 workers/executor/executors/retrievers/recursive.py create mode 100644 workers/executor/executors/retrievers/router.py create mode 100644 workers/executor/executors/retrievers/simple.py create mode 100644 workers/executor/executors/retrievers/subquestion.py create mode 100644 workers/executor/executors/usage.py create mode 100644 workers/executor/executors/variable_replacement.py create mode 100644 workers/executor/tasks.py create mode 100644 workers/executor/worker.py create mode 100644 workers/file_processing/structure_tool_task.py create mode 100644 workers/tests/__init__.py create mode 100644 workers/tests/conftest.py create mode 100644 workers/tests/test_answer_prompt.py create mode 100644 workers/tests/test_executor_sanity.py create mode 100644 workers/tests/test_legacy_executor_extract.py create mode 100644 workers/tests/test_legacy_executor_index.py create mode 100644 workers/tests/test_legacy_executor_scaffold.py create mode 100644 workers/tests/test_phase2f.py create mode 100644 workers/tests/test_phase2h.py create mode 100644 workers/tests/test_retrieval.py create mode 100644 workers/tests/test_sanity_phase2.py create mode 100644 workers/tests/test_sanity_phase3.py create mode 100644 workers/tests/test_sanity_phase4.py create mode 100644 workers/tests/test_usage.py diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py new file mode 100644 index 0000000000..6b980f6f26 --- /dev/null +++ b/backend/backend/worker_celery.py @@ -0,0 +1,112 @@ +"""Lightweight Celery app for dispatching tasks to worker-v2 workers. + +The Django backend uses Redis as its Celery broker for internal tasks +(beat, periodic tasks, etc.). The worker-v2 workers (executor, +file_processing, etc.) use RabbitMQ as their broker. + +This module provides a Celery app connected to RabbitMQ specifically +for dispatching tasks (via ExecutionDispatcher) to worker-v2 workers. + +Problem: Celery reads the ``CELERY_BROKER_URL`` environment variable +with highest priority — overriding constructor args, ``conf.update()``, +and ``config_from_object()``. Since Django sets that env var to Redis, +every Celery app created in this process inherits Redis as broker. + +Solution: Subclass Celery and override ``connection_for_write`` / +``connection_for_read`` so they always use our explicit RabbitMQ URL, +bypassing the config resolution chain entirely. +""" + +import logging +import os +from urllib.parse import quote_plus + +from celery import Celery +from django.conf import settings +from kombu import Queue + +logger = logging.getLogger(__name__) + +_worker_app: Celery | None = None + + +class _WorkerDispatchCelery(Celery): + """Celery subclass that forces an explicit broker URL. + + Works around Celery's env-var-takes-priority behaviour where + ``CELERY_BROKER_URL`` always overrides per-app configuration. + The connection methods are the actual points where Celery opens + AMQP/Redis connections, so overriding them is both sufficient + and safe. + """ + + _explicit_broker: str | None = None + + def connection_for_write(self, url=None, *args, **kwargs): + return super().connection_for_write( + url=url or self._explicit_broker, *args, **kwargs + ) + + def connection_for_read(self, url=None, *args, **kwargs): + return super().connection_for_read( + url=url or self._explicit_broker, *args, **kwargs + ) + + +def get_worker_celery_app() -> Celery: + """Get or create a Celery app for dispatching to worker-v2 workers. + + The app uses: + - RabbitMQ as broker (WORKER_CELERY_BROKER_URL env var) + - Same PostgreSQL result backend as the Django Celery app + + Returns: + Celery app configured for worker-v2 dispatch. + + Raises: + ValueError: If WORKER_CELERY_BROKER_URL is not set. + """ + global _worker_app + if _worker_app is not None: + return _worker_app + + broker_url = os.environ.get("WORKER_CELERY_BROKER_URL") + if not broker_url: + raise ValueError( + "WORKER_CELERY_BROKER_URL is not set. " + "This should point to the RabbitMQ broker used by worker-v2 " + "workers (e.g., amqp://admin:password@rabbitmq:5672//)." + ) + + # Reuse the same PostgreSQL result backend as Django's Celery app + result_backend = ( + f"db+postgresql://{settings.DB_USER}:" + f"{quote_plus(settings.DB_PASSWORD)}" + f"@{settings.DB_HOST}:{settings.DB_PORT}/" + f"{settings.CELERY_BACKEND_DB_NAME}" + ) + + app = _WorkerDispatchCelery( + "worker-dispatch", + set_as_current=False, + fixups=[], + ) + # Store the explicit broker URL for use in connection overrides + app._explicit_broker = broker_url + + app.conf.update( + result_backend=result_backend, + task_queues=[Queue("executor")], + task_serializer="json", + accept_content=["json"], + result_serializer="json", + result_extended=True, + ) + + _worker_app = app + # Log broker host only (mask credentials) + safe_broker = broker_url.split("@")[-1] if "@" in broker_url else broker_url + logger.info( + "Created worker dispatch Celery app (broker=%s)", safe_broker + ) + return _worker_app diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index 991adbcfcc..2bd2dd3b05 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -28,6 +28,7 @@ ExecutionSource, IndexingStatus, LogLevels, + ToolStudioKeys, ToolStudioPromptKeys, ) from prompt_studio.prompt_studio_core_v2.constants import IndexingConstants as IKeys @@ -67,9 +68,10 @@ from unstract.core.pubsub_helper import LogPublisher from unstract.sdk1.constants import LogLevel from unstract.sdk1.exceptions import IndexingError, SdkError +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher from unstract.sdk1.file_storage.constants import StorageType from unstract.sdk1.file_storage.env_helper import EnvHelper -from unstract.sdk1.prompt import PromptTool from unstract.sdk1.utils.indexing import IndexingUtils from unstract.sdk1.utils.tool import ToolUtils @@ -181,6 +183,9 @@ def validate_profile_manager_owner_access( the action. """ profile_manager_owner = profile_manager.created_by + if profile_manager_owner is None: + # No owner on this profile manager — skip ownership validation + return is_llm_owned = ( profile_manager.llm.shared_to_org @@ -266,6 +271,27 @@ def _publish_log( LogPublisher.log_prompt(component, level, state, message), ) + @staticmethod + def _get_dispatcher() -> ExecutionDispatcher: + """Get an ExecutionDispatcher backed by the worker Celery app. + + Uses the RabbitMQ-backed Celery app (not the Django Redis one) + so tasks reach the worker-v2 executor worker. + """ + from backend.worker_celery import get_worker_celery_app + + return ExecutionDispatcher(celery_app=get_worker_celery_app()) + + @staticmethod + def _get_platform_api_key(org_id: str) -> str: + """Get the platform API key for the given organization.""" + from platform_settings_v2.platform_auth_service import ( + PlatformAuthenticationService, + ) + + platform_key = PlatformAuthenticationService.get_active_platform_key(org_id) + return str(platform_key.key) + @staticmethod def get_select_fields() -> dict[str, Any]: """Method to fetch dropdown field values for frontend. @@ -994,24 +1020,28 @@ def _fetch_response( TSPKeys.CUSTOM_DATA: tool.custom_data, } - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - params = {TSPKeys.INCLUDE_METADATA: True} - return responder.answer_prompt(payload=payload, params=params) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) + # Add platform API key and metadata flag for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + dispatcher = PromptStudioHelper._get_dispatcher() + context = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id=run_id, + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + ) + result = dispatcher.dispatch(context) + if not result.success: raise AnswerFetchError( "Error while fetching response for " - f"'{prompt.prompt_key}' with '{doc_name}'. {msg}", - status_code=int(e.status_code or 500), + f"'{prompt.prompt_key}' with '{doc_name}'. {result.error}", ) + return result.data @staticmethod def fetch_table_settings_if_enabled( @@ -1140,24 +1170,26 @@ def dynamic_indexer( TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, } - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) - - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - doc_id = responder.index(payload=payload) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) + # Add platform API key for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload["platform_api_key"] = platform_api_key + + dispatcher = PromptStudioHelper._get_dispatcher() + index_context = ExecutionContext( + executor_name="legacy", + operation="index", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + ) + result = dispatcher.dispatch(index_context) + if not result.success: raise IndexingAPIError( - f"Failed to index '{filename}'. {msg}", - status_code=int(e.status_code or 500), + f"Failed to index '{filename}'. {result.error}", ) + doc_id = result.data.get("doc_id") PromptStudioIndexHelper.handle_index_manager( document_id=document_id, @@ -1221,7 +1253,6 @@ def _fetch_single_pass_response( storage_type=StorageType.PERMANENT, env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, ) - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) directory, filename = os.path.split(input_file_path) file_path = os.path.join( directory, "extract", os.path.splitext(filename)[0] + ".txt" @@ -1288,14 +1319,27 @@ def _fetch_single_pass_response( TSPKeys.CUSTOM_DATA: tool.custom_data, } - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, + # Add platform API key and metadata flag for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + dispatcher = PromptStudioHelper._get_dispatcher() + context = ExecutionContext( + executor_name="legacy", + operation="single_pass_extraction", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), ) - params = {TSPKeys.INCLUDE_METADATA: True} - return responder.single_pass_extraction(payload=payload, params=params) + result = dispatcher.dispatch(context) + if not result.success: + raise AnswerFetchError( + f"Error fetching single pass response. {result.error}", + ) + return result.data @staticmethod def get_tool_from_tool_id(tool_id: str) -> CustomTool | None: @@ -1361,32 +1405,23 @@ def dynamic_extractor( IKeys.OUTPUT_FILE_PATH: extract_file_path, } - util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) - - try: - responder = PromptTool( - tool=util, - prompt_host=settings.PROMPT_HOST, - prompt_port=settings.PROMPT_PORT, - request_id=StateStore.get(Common.REQUEST_ID), - ) - extracted_text = responder.extract(payload=payload) - success = PromptStudioIndexHelper.mark_extraction_status( - document_id=document_id, - profile_manager=profile_manager, - x2text_config_hash=x2text_config_hash, - enable_highlight=enable_highlight, - ) - if not success: - logger.warning( - f"Failed to mark extraction success for document {document_id}. " - f"Extraction completed but status not saved." - ) - except SdkError as e: - msg = str(e) - if e.actual_err and hasattr(e.actual_err, "response"): - msg = e.actual_err.response.json().get("error", str(e)) - + # Add platform API key for executor + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload["platform_api_key"] = platform_api_key + + dispatcher = PromptStudioHelper._get_dispatcher() + extract_context = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=StateStore.get(Common.REQUEST_ID), + ) + result = dispatcher.dispatch(extract_context) + if not result.success: + msg = result.error or "Unknown extraction error" success = PromptStudioIndexHelper.mark_extraction_status( document_id=document_id, profile_manager=profile_manager, @@ -1400,10 +1435,21 @@ def dynamic_extractor( f"Failed to mark extraction failure for document {document_id}. " f"Extraction failed but status not saved." ) - raise ExtractionAPIError( f"Failed to extract '{filename}'. {msg}", - status_code=int(e.status_code or 500), + ) + + extracted_text = result.data.get("extracted_text", "") + success = PromptStudioIndexHelper.mark_extraction_status( + document_id=document_id, + profile_manager=profile_manager, + x2text_config_hash=x2text_config_hash, + enable_highlight=enable_highlight, + ) + if not success: + logger.warning( + f"Failed to mark extraction success for document {document_id}. " + f"Extraction completed but status not saved." ) return extracted_text diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 24b00f0c6e..075b3f2895 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -562,6 +562,42 @@ services: profiles: - workers-v2 + worker-executor-v2: + image: unstract/worker-unified:${VERSION} + container_name: unstract-worker-executor-v2 + restart: unless-stopped + command: ["executor"] + ports: + - "8092:8088" + env_file: + - ../workers/.env + - ./essentials.env + depends_on: + - db + - redis + - rabbitmq + - platform-service + environment: + - ENVIRONMENT=development + - APPLICATION_NAME=unstract-worker-executor-v2 + - WORKER_TYPE=executor + - WORKER_NAME=executor-worker-v2 + - EXECUTOR_METRICS_PORT=8088 + - HEALTH_PORT=8088 + # Configurable Celery options + - CELERY_QUEUES_EXECUTOR=${CELERY_QUEUES_EXECUTOR:-executor} + - CELERY_POOL=${WORKER_EXECUTOR_POOL:-prefork} + - CELERY_PREFETCH_MULTIPLIER=${WORKER_EXECUTOR_PREFETCH_MULTIPLIER:-1} + - CELERY_CONCURRENCY=${WORKER_EXECUTOR_CONCURRENCY:-2} + - CELERY_EXTRA_ARGS=${WORKER_EXECUTOR_EXTRA_ARGS:-} + labels: + - traefik.enable=false + volumes: + - ./workflow_data:/data + - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + profiles: + - workers-v2 + volumes: prompt_studio_data: unstract_data: diff --git a/docs/local-dev-setup-executor-migration.md b/docs/local-dev-setup-executor-migration.md new file mode 100644 index 0000000000..8bb6921fee --- /dev/null +++ b/docs/local-dev-setup-executor-migration.md @@ -0,0 +1,586 @@ +# Local Dev Setup: Executor Migration (Pluggable Executor System v2) + +> **Branch:** `feat/execution-backend` +> **Date:** 2026-02-19 + +This guide covers everything needed to run and test the executor migration locally. + +--- + +## Table of Contents + +1. [Architecture Overview (Post-Migration)](#1-architecture-overview-post-migration) +2. [Prerequisites](#2-prerequisites) +3. [Service Dependency Map](#3-service-dependency-map) +4. [Step-by-Step Setup](#4-step-by-step-setup) +5. [Environment Configuration](#5-environment-configuration) +6. [Running the Executor Worker](#6-running-the-executor-worker) +7. [Port Reference](#7-port-reference) +8. [Health Check Endpoints](#8-health-check-endpoints) +9. [Debugging & Troubleshooting](#9-debugging--troubleshooting) +10. [Test Verification Checklist](#10-test-verification-checklist) + +--- + +## 1. Architecture Overview (Post-Migration) + +``` +┌──────────────────────────────────────────────────────────────┐ +│ CALLERS │ +│ │ +│ Workflow Path: │ +│ process_file_batch → structure_tool_task │ +│ → ExecutionDispatcher.dispatch() [Celery] │ +│ → AsyncResult.get() │ +│ │ +│ Prompt Studio IDE: │ +│ Django View → PromptStudioHelper │ +│ → ExecutionDispatcher.dispatch() [Celery] │ +│ → AsyncResult.get() │ +└───────────────────────┬──────────────────────────────────────┘ + │ Celery task: execute_extraction + ▼ +┌──────────────────────────────────────────────────────────────┐ +│ EXECUTOR WORKER (dedicated, queue: "executor") │ +│ │ +│ execute_extraction task │ +│ → ExecutionOrchestrator → ExecutorRegistry → LegacyExecutor │ +│ → Returns ExecutionResult via Celery result backend │ +└──────────────────────────────────────────────────────────────┘ +``` + +**What changed:** +- `prompt-service` Flask app is **replaced** by the executor worker (Celery) +- Structure tool Docker container is **replaced** by `structure_tool_task` (Celery task in file_processing worker) +- `PromptTool` SDK HTTP client is **replaced** by `ExecutionDispatcher` (Celery dispatch) +- **No DB schema changes** — no Django migrations needed + +**What stays the same:** +- `platform-service` (port 3001) — still serves tool metadata +- `runner` (port 5002) — still needed for Classifier, Text Extractor, Translate tools +- `x2text-service` (port 3004) — still needed for text extraction +- All adapter SDKs (LLM, Embedding, VectorDB, X2Text) — used by LegacyExecutor via ExecutorToolShim +- Frontend — no changes (same REST API responses) + +--- + +## 2. Prerequisites + +### 2.1 System Requirements + +| Requirement | Minimum | Notes | +|---|---|---| +| Docker + Docker Compose | v2.20+ | `docker compose version` | +| Python | 3.11+ | System or pyenv | +| uv | Latest | `pip install uv` or use the repo-local binary at `backend/venv/bin/uv` | +| Git | 2.30+ | On `feat/execution-backend` branch | +| Free RAM | 8 GB+ | Many services run concurrently | +| Free Disk | 10 GB+ | Docker images + volumes | + +### 2.2 Verify Branch + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract +git branch --show-current +# Expected: feat/execution-backend +``` + +### 2.3 Required Docker Images + +The system needs these images built: + +```bash +# Build all images (from docker/ directory) +cd docker +docker compose -f docker-compose.build.yaml build + +# Or build just the critical ones: +docker compose -f docker-compose.build.yaml build backend +docker compose -f docker-compose.build.yaml build platform-service +docker compose -f docker-compose.build.yaml build worker-unified # V2 workers including executor +docker compose -f docker-compose.build.yaml build runner +docker compose -f docker-compose.build.yaml build frontend +``` + +> **Tip:** For faster dev builds, set `MINIMAL_BUILD=1` in docker-compose.build.yaml args. + +--- + +## 3. Service Dependency Map + +### Essential Infrastructure (must be running for ANYTHING to work) + +| Service | Container | Port | Purpose | +|---|---|---|---| +| PostgreSQL (pgvector) | `unstract-db` | 5432 | Primary database | +| Redis | `unstract-redis` | 6379 | Cache + queues | +| RabbitMQ | `unstract-rabbitmq` | 5672 (AMQP), 15672 (UI) | Celery message broker | +| MinIO | `unstract-minio` | 9000 (S3), 9001 (Console) | Object storage | +| Traefik | `unstract-proxy` | 80, 8080 (Dashboard) | Reverse proxy | + +### Application Services + +| Service | Container | Port | Required For | +|---|---|---|---| +| Backend (Django) | `unstract-backend` | 8000 | API, auth, DB migrations | +| Platform Service | `unstract-platform-service` | 3001 | Tool metadata, adapter configs | +| X2Text Service | `unstract-x2text-service` | 3004 | Text extraction (used by executor) | +| Runner | `unstract-runner` | 5002 | Non-structure tools (Classifier, etc.) | +| Frontend | `unstract-frontend` | 3000 | Web UI | +| Flipt | `unstract-flipt` | 8082 (REST), 9005 (gRPC) | Feature flags | + +### Workers (V2 Unified — `--profile workers-v2`) + +| Worker | Container | Health Port | Queue(s) | +|---|---|---|---| +| **Executor** | `unstract-worker-executor-v2` | 8088 | `executor` | +| File Processing | `unstract-worker-file-processing-v2` | 8082 | `file_processing`, `api_file_processing` | +| API Deployment | `unstract-worker-api-deployment-v2` | 8090 | `celery_api_deployments` | +| Callback | `unstract-worker-callback-v2` | 8083 | `file_processing_callback`, `api_file_processing_callback` | +| General | `unstract-worker-general-v2` | 8082 | `celery` | +| Notification | `unstract-worker-notification-v2` | 8085 | `notifications`, `notifications_*` | +| Log Consumer | `unstract-worker-log-consumer-v2` | 8084 | `celery_log_task_queue` | +| Scheduler | `unstract-worker-scheduler-v2` | 8087 | `scheduler` | + +### Post-Migration: REMOVED Services + +| Service | Port | Replaced By | +|---|---|---| +| ~~Prompt Service~~ | ~~3003~~ | Executor Worker (LegacyExecutor inline) | +| ~~Structure Tool (Docker)~~ | N/A | `structure_tool_task` (Celery) | + +--- + +## 4. Step-by-Step Setup + +### 4.1 Start Essential Infrastructure + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/docker + +# Start infrastructure services only +docker compose -f docker-compose-dev-essentials.yaml up -d +``` + +Wait for all services to be healthy: +```bash +docker compose -f docker-compose-dev-essentials.yaml ps +``` + +### 4.2 Start Application Services + +**Option A: All via Docker Compose (recommended for first-time setup)** + +```bash +cd docker + +# Start everything including V2 workers (with executor) +docker compose --profile workers-v2 up -d +``` + +**Option B: Hybrid mode (services in Docker, workers local)** + +This is useful when you want to iterate on worker code without rebuilding images. + +```bash +# Start only infrastructure + app services (no V2 workers) +docker compose up -d + +# Then run executor worker locally (see Section 6) +``` + +### 4.3 Verify DB Migrations + +The backend container runs migrations on startup (`--migrate` flag). Verify: + +```bash +docker logs unstract-backend 2>&1 | grep -i "migration" +``` + +### 4.4 Create Workers .env for Local Development + +If running workers outside Docker, you need a local `.env`: + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Copy sample and adjust for local dev +cp sample.env .env +``` + +Then edit `workers/.env` — change all Docker hostnames to `localhost`: + +```ini +# === CRITICAL CHANGES FOR LOCAL DEV === +DJANGO_APP_BACKEND_URL=http://localhost:8000 +INTERNAL_API_BASE_URL=http://localhost:8000/internal +CELERY_BROKER_BASE_URL=amqp://localhost:5672// +DB_HOST=localhost +REDIS_HOST=localhost +CACHE_REDIS_HOST=localhost +PLATFORM_SERVICE_HOST=http://localhost +PLATFORM_SERVICE_PORT=3001 +PROMPT_HOST=http://localhost +PROMPT_PORT=3003 +X2TEXT_HOST=http://localhost +X2TEXT_PORT=3004 +UNSTRACT_RUNNER_HOST=http://localhost +UNSTRACT_RUNNER_PORT=5002 +WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}' +API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}' +``` + +> **Important:** The `INTERNAL_SERVICE_API_KEY` must match what the backend expects. Default dev value: `dev-internal-key-123`. + +--- + +## 5. Environment Configuration + +### 5.1 Key Environment Variables for Executor Worker + +| Variable | Default (Docker) | Local Override | Purpose | +|---|---|---|---| +| `CELERY_BROKER_BASE_URL` | `amqp://unstract-rabbitmq:5672//` | `amqp://localhost:5672//` | RabbitMQ connection | +| `CELERY_BROKER_USER` | `admin` | same | RabbitMQ user | +| `CELERY_BROKER_PASS` | `password` | same | RabbitMQ password | +| `DB_HOST` | `unstract-db` | `localhost` | PostgreSQL for result backend | +| `DB_USER` | `unstract_dev` | same | DB user | +| `DB_PASSWORD` | `unstract_pass` | same | DB password | +| `DB_NAME` | `unstract_db` | same | DB name | +| `DB_PORT` | `5432` | same | DB port | +| `REDIS_HOST` | `unstract-redis` | `localhost` | Redis for caching | +| `PLATFORM_SERVICE_HOST` | `http://unstract-platform-service` | `http://localhost` | Platform service URL | +| `PLATFORM_SERVICE_PORT` | `3001` | same | Platform service port | +| `X2TEXT_HOST` | `http://unstract-x2text-service` | `http://localhost` | X2Text service URL | +| `X2TEXT_PORT` | `3004` | same | X2Text service port | +| `INTERNAL_SERVICE_API_KEY` | `dev-internal-key-123` | same | Worker→Backend auth | +| `INTERNAL_API_BASE_URL` | `http://unstract-backend:8000/internal` | `http://localhost:8000/internal` | Backend internal API | +| `WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS` | (MinIO JSON, Docker host) | (MinIO JSON, localhost) | Shared file storage | + +### 5.2 Credentials Reference (Default Dev) + +| Service | Username | Password | +|---|---|---| +| PostgreSQL | `unstract_dev` | `unstract_pass` | +| RabbitMQ | `admin` | `password` | +| MinIO | `minio` | `minio123` | +| Redis | (none) | (none) | + +### 5.3 Hierarchical Celery Config + +Worker settings use a 3-tier hierarchy (most specific wins): + +1. **Worker-specific:** `EXECUTOR_TASK_TIME_LIMIT=7200` +2. **Global Celery:** `CELERY_TASK_TIME_LIMIT=3600` +3. **Code default:** (hardcoded fallback) + +--- + +## 6. Running the Executor Worker + +### 6.1 Via Docker Compose (easiest) + +```bash +cd docker + +# Start just the executor worker (assumes infra is up) +docker compose --profile workers-v2 up -d worker-executor-v2 + +# Check logs +docker logs -f unstract-worker-executor-v2 +``` + +### 6.2 Locally with run-worker.sh + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Ensure .env has local overrides (Section 4.4) +./run-worker.sh executor +``` + +Options: +```bash +./run-worker.sh -l DEBUG executor # Debug logging +./run-worker.sh -c 4 executor # 4 concurrent tasks +./run-worker.sh -P threads executor # Thread pool instead of prefork +./run-worker.sh -d executor # Run in background (detached) +./run-worker.sh -s # Show status of all workers +./run-worker.sh -k # Kill all workers +``` + +### 6.3 Locally with uv (manual) + +```bash +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers + +# Load env +set -a && source .env && set +a + +# Run executor worker +uv run celery -A worker worker \ + --queues=executor \ + --loglevel=INFO \ + --pool=prefork \ + --concurrency=2 \ + --hostname=executor-worker@%h +``` + +### 6.4 Verify Executor Worker is Running + +```bash +# Check health endpoint +curl -s http://localhost:8088/health | python3 -m json.tool + +# Check Celery registered tasks +uv run celery -A worker inspect registered \ + --destination=executor-worker@$(hostname) + +# Expected task: execute_extraction +``` + +### 6.5 Running All V2 Workers + +```bash +# Via Docker +cd docker && docker compose --profile workers-v2 up -d + +# Via script (local) +cd workers && ./run-worker.sh all +``` + +--- + +## 7. Port Reference + +### Infrastructure + +| Service | Port | URL | +|---|---|---| +| PostgreSQL | 5432 | `psql -h localhost -U unstract_dev -d unstract_db` | +| Redis | 6379 | `redis-cli -h localhost` | +| RabbitMQ AMQP | 5672 | `amqp://admin:password@localhost:5672//` | +| RabbitMQ Management | 15672 | http://localhost:15672 (admin/password) | +| MinIO S3 API | 9000 | http://localhost:9000 | +| MinIO Console | 9001 | http://localhost:9001 (minio/minio123) | +| Qdrant | 6333 | http://localhost:6333 | +| Traefik Dashboard | 8080 | http://localhost:8080 | + +### Application + +| Service | Port | URL | +|---|---|---| +| Backend API | 8000 | http://localhost:8000/api/v1/ | +| Frontend | 3000 | http://frontend.unstract.localhost | +| Platform Service | 3001 | http://localhost:3001 | +| X2Text Service | 3004 | http://localhost:3004 | +| Runner | 5002 | http://localhost:5002 | +| Celery Flower (optional) | 5555 | http://localhost:5555 | + +### V2 Worker Health Ports + +| Worker | Internal Port | External Port (Docker) | +|---|---|---| +| API Deployment | 8090 | 8085 | +| Callback | 8083 | 8086 | +| File Processing | 8082 | 8087 | +| General | 8082 | 8088 | +| Notification | 8085 | 8089 | +| Log Consumer | 8084 | 8090 | +| Scheduler | 8087 | 8091 | +| **Executor** | **8088** | **8092** | + +### Debug Ports (Docker dev mode via compose.override.yaml) + +| Service | Debug Port | +|---|---| +| Backend | 5678 | +| Runner | 5679 | +| Platform Service | 5680 | +| Prompt Service | 5681 | +| File Processing Worker | 5682 | +| Callback Worker | 5683 | +| API Deployment Worker | 5684 | +| General Worker | 5685 | + +--- + +## 8. Health Check Endpoints + +Every V2 worker exposes `GET /health` on its health port: + +```bash +# Executor worker +curl -s http://localhost:8088/health + +# Expected response: +# {"status": "healthy", "worker_type": "executor", ...} +``` + +All endpoints: +``` +http://localhost:8080/health — API Deployment worker +http://localhost:8081/health — General worker +http://localhost:8082/health — File Processing worker +http://localhost:8083/health — Callback worker +http://localhost:8084/health — Log Consumer worker +http://localhost:8085/health — Notification worker +http://localhost:8087/health — Scheduler worker +http://localhost:8088/health — Executor worker +``` + +--- + +## 9. Debugging & Troubleshooting + +### 9.1 Common Issues + +**"Connection refused" to RabbitMQ/Redis/DB** +- Check Docker containers are running: `docker ps` +- Check if using Docker hostnames vs localhost (see Section 5.1) +- Ensure ports are exposed: `docker port unstract-rabbitmq` + +**Executor worker starts but tasks don't execute** +- Check queue binding: Worker must listen on `executor` queue +- Check RabbitMQ UI (http://localhost:15672) → Queues tab → look for `executor` queue +- Check task is registered: `celery -A worker inspect registered` +- Check task routing in `workers/shared/infrastructure/config/registry.py` + +**"Module not found" errors in executor worker** +- Ensure `PYTHONPATH` includes the workers directory +- If running locally, `cd workers` before starting +- If using `run-worker.sh`, it sets PYTHONPATH automatically + +**MinIO file access errors** +- Check `WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS` has correct endpoint (localhost vs Docker hostname) +- Verify MinIO bucket exists: `mc ls minio/unstract/` +- MinIO bootstrap container creates the bucket on first start + +**Platform service connection errors** +- Executor needs `PLATFORM_SERVICE_HOST` and `PLATFORM_SERVICE_PORT` +- Verify platform-service is running: `curl http://localhost:3001/health` + +### 9.2 Useful Debug Commands + +```bash +# Check all Docker containers +docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" + +# Check RabbitMQ queues +docker exec unstract-rabbitmq rabbitmqctl list_queues name messages consumers + +# Check Celery worker status (from workers/ dir) +cd workers && uv run celery -A worker inspect active + +# Check registered tasks +cd workers && uv run celery -A worker inspect registered + +# Send a test task to executor +cd workers && uv run python -c " +from worker import app +from shared.enums.task_enums import TaskName +result = app.send_task( + TaskName.EXECUTE_EXTRACTION, + args=[{ + 'executor_name': 'legacy', + 'operation': 'extract', + 'run_id': 'test-123', + 'execution_source': 'tool', + 'executor_params': {} + }], + queue='executor' +) +print(f'Task ID: {result.id}') +print(f'Result: {result.get(timeout=30)}') +" + +# Monitor Celery events in real-time +cd workers && uv run celery -A worker events + +# Check Postgres (Celery result backend) +docker exec -it unstract-db psql -U unstract_dev -d unstract_db -c "SELECT task_id, status FROM public.celery_taskmeta ORDER BY date_done DESC LIMIT 10;" +``` + +### 9.3 Log Locations + +| Context | Location | +|---|---| +| Docker container | `docker logs ` | +| Local worker (foreground) | stdout/stderr | +| Local worker (detached) | `workers//.log` | +| Backend | `docker logs unstract-backend` | + +--- + +## 10. Test Verification Checklist + +### Phase 1 Sanity (Executor Framework) + +- [ ] Executor worker starts and connects to Celery broker +- [ ] Health check responds: `curl http://localhost:8088/health` +- [ ] `execute_extraction` task is registered in Celery +- [ ] No-op task dispatch round-trips successfully +- [ ] Task routing: task goes to `executor` queue, processed by executor worker + +### Phase 2 Sanity (LegacyExecutor) + +- [ ] `extract` operation returns `{"extracted_text": "..."}` +- [ ] `index` operation returns `{"doc_id": "..."}` +- [ ] `answer_prompt` returns `{"output": {...}, "metadata": {...}, "metrics": {...}}` +- [ ] `single_pass_extraction` returns same shape as answer_prompt +- [ ] `summarize` returns `{"data": "..."}` +- [ ] Error cases return `ExecutionResult(success=False, error="...")` not unhandled exceptions + +### Phase 3 Sanity (Structure Tool as Celery Task) + +- [ ] Run workflow with structure tool via new Celery path +- [ ] Compare output with Docker-based structure tool output +- [ ] Non-structure tools still work via Docker/Runner (regression check) + +### Phase 4 Sanity (IDE Path) + +- [ ] Open Prompt Studio IDE, create/load a project +- [ ] Run extraction on a document — result displays correctly +- [ ] Run prompt answering — output persists in DB +- [ ] Error cases display properly in IDE + +### Phase 5 Sanity (Decommission) + +- [ ] `docker compose up` boots cleanly — no errors from missing services +- [ ] No dangling references to prompt-service, PromptTool, PROMPT_HOST, PROMPT_PORT +- [ ] All health checks pass + +### Running Unit Tests + +```bash +# SDK1 tests (execution framework) +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/unstract/sdk1 +/home/harini/Documents/Workspace/unstract-poc/clean/unstract/backend/venv/bin/uv run pytest -v + +# Workers tests (executor, LegacyExecutor, retrievers, etc.) +cd /home/harini/Documents/Workspace/unstract-poc/clean/unstract/workers +/home/harini/Documents/Workspace/unstract-poc/clean/unstract/backend/venv/bin/uv run pytest -v +``` + +--- + +## Quick Reference: One-Liner Setup + +```bash +# From repo root: +cd docker + +# 1. Build images +docker compose -f docker-compose.build.yaml build + +# 2. Start everything with V2 workers +docker compose --profile workers-v2 up -d + +# 3. Verify +docker ps --format "table {{.Names}}\t{{.Status}}" + +# 4. Check executor health +curl -s http://localhost:8092/health # 8092 = external Docker port for executor +``` + +For the automated version, use the setup check script: `scripts/check-local-setup.sh` diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py index fde5558c16..cbc1a6ea67 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/ocr/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import ocr adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import OCR adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py index edef6bd043..82aee35d18 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py @@ -1,5 +1,3 @@ -from qdrant_client.http.exceptions import ApiException as QdrantAPIException -from unstract.sdk1.adapters.vectordb.qdrant.src import Qdrant from unstract.sdk1.adapters.vectordb.vectordb_adapter import VectorDBAdapter from unstract.sdk1.exceptions import VectorDBError @@ -20,9 +18,19 @@ def parse_vector_db_err(e: Exception, vector_db: VectorDBAdapter) -> VectorDBErr if isinstance(e, VectorDBError): return e - if isinstance(e, QdrantAPIException): - err = Qdrant.parse_vector_db_err(e) - else: + # Lazy import to avoid hard dependency on qdrant_client at module level. + # qdrant_client's protobuf files can fail to load depending on the + # protobuf runtime version (KeyError: '_POINTID'). + try: + from qdrant_client.http.exceptions import ApiException as QdrantAPIException + + from unstract.sdk1.adapters.vectordb.qdrant.src import Qdrant + + if isinstance(e, QdrantAPIException): + err = Qdrant.parse_vector_db_err(e) + else: + err = VectorDBError(str(e), actual_err=e) + except Exception: err = VectorDBError(str(e), actual_err=e) msg = f"Error from vector DB '{vector_db.get_name()}'." diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py index 1c551dafe1..05c01d822e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import vectorDB adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import vectorDB adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py index 48d6a606af..3318887f95 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/register.py @@ -45,5 +45,5 @@ def _build_adapter_list(adapter: str, package: str, adapters: dict[str, Any]) -> Common.MODULE: module, Common.METADATA: metadata, } - except ModuleNotFoundError as exception: - logger.warning(f"Unable to import X2Text adapters : {exception}") + except Exception as exception: + logger.warning(f"Unable to import X2Text adapter '{adapter}': {exception}") diff --git a/unstract/sdk1/src/unstract/sdk1/execution/__init__.py b/unstract/sdk1/src/unstract/sdk1/execution/__init__.py new file mode 100644 index 0000000000..fa70c88821 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/__init__.py @@ -0,0 +1,15 @@ +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +__all__ = [ + "BaseExecutor", + "ExecutionContext", + "ExecutionDispatcher", + "ExecutionOrchestrator", + "ExecutionResult", + "ExecutorRegistry", +] diff --git a/unstract/sdk1/src/unstract/sdk1/execution/context.py b/unstract/sdk1/src/unstract/sdk1/execution/context.py new file mode 100644 index 0000000000..ce5810066d --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/context.py @@ -0,0 +1,109 @@ +"""Execution context model for the executor framework. + +Defines the serializable context that is dispatched to executor +workers via Celery. Used by both the workflow path (structure tool +task) and the IDE path (PromptStudioHelper). +""" + +import uuid +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class ExecutionSource(str, Enum): + """Origin of the execution request.""" + + IDE = "ide" + TOOL = "tool" + + +class Operation(str, Enum): + """Supported extraction operations. + + Maps 1-to-1 with current PromptTool HTTP endpoints. + """ + + EXTRACT = "extract" + INDEX = "index" + ANSWER_PROMPT = "answer_prompt" + SINGLE_PASS_EXTRACTION = "single_pass_extraction" + SUMMARIZE = "summarize" + AGENTIC_EXTRACTION = "agentic_extraction" + + +@dataclass +class ExecutionContext: + """Serializable execution context dispatched to executor worker. + + This is the single payload sent as a Celery task argument to + ``execute_extraction``. It must remain JSON-serializable (no + ORM objects, no file handles, no callables). + + Attributes: + executor_name: Registered executor to handle this request + (e.g. ``"legacy"``, ``"agentic_table"``). + operation: The extraction operation to perform. + run_id: Unique identifier for this execution run. + execution_source: Where the request originated + (``"ide"`` or ``"tool"``). + organization_id: Tenant/org scope. ``None`` for public + calls. + executor_params: Opaque, operation-specific payload passed + through to the executor. Must be JSON-serializable. + request_id: Correlation ID for tracing across services. + """ + + executor_name: str + operation: str + run_id: str + execution_source: str + organization_id: str | None = None + executor_params: dict[str, Any] = field(default_factory=dict) + request_id: str | None = None + + def __post_init__(self) -> None: + """Validate required fields after initialization.""" + if not self.executor_name: + raise ValueError("executor_name is required") + if not self.operation: + raise ValueError("operation is required") + if not self.run_id: + raise ValueError("run_id is required") + if not self.execution_source: + raise ValueError("execution_source is required") + + # Normalize enum values to plain strings for serialization + if isinstance(self.operation, Operation): + self.operation = self.operation.value + if isinstance(self.execution_source, ExecutionSource): + self.execution_source = self.execution_source.value + + # Auto-generate request_id if not provided + if self.request_id is None: + self.request_id = str(uuid.uuid4()) + + def to_dict(self) -> dict[str, Any]: + """Serialize to a JSON-compatible dict for Celery dispatch.""" + return { + "executor_name": self.executor_name, + "operation": self.operation, + "run_id": self.run_id, + "execution_source": self.execution_source, + "organization_id": self.organization_id, + "executor_params": self.executor_params, + "request_id": self.request_id, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ExecutionContext": + """Deserialize from a dict (e.g. Celery task argument).""" + return cls( + executor_name=data["executor_name"], + operation=data["operation"], + run_id=data["run_id"], + execution_source=data["execution_source"], + organization_id=data.get("organization_id"), + executor_params=data.get("executor_params", {}), + request_id=data.get("request_id"), + ) diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py new file mode 100644 index 0000000000..8d55a5d58c --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -0,0 +1,164 @@ +"""Execution dispatcher for sending Celery tasks to executor workers. + +The dispatcher is the caller-side component used by both: +- Structure tool Celery task (workflow path) +- PromptStudioHelper (IDE path) + +It sends ``execute_extraction`` tasks to the ``executor`` queue +and waits for results via ``AsyncResult.get()``. +""" + +import logging +import os +from typing import Any + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + +# Constants matching workers/shared/enums values. +# Defined here to avoid an SDK1 → workers package dependency. +_TASK_NAME = "execute_extraction" +_QUEUE_NAME = "executor" + +# Caller-side timeout (seconds) for AsyncResult.get(). +# This controls how long the *caller* waits for the executor to +# finish — distinct from the executor worker's +# ``EXECUTOR_TASK_TIME_LIMIT`` which controls how long the +# *worker* allows a task to run. +# +# Resolution order (matches workers convention): +# 1. Explicit ``timeout`` parameter on dispatch() +# 2. ``EXECUTOR_RESULT_TIMEOUT`` env var +# 3. Hardcoded default (3600s) +# +# The default (3600s) is intentionally <= the executor worker's +# ``task_time_limit`` default (also 3600s) so the caller never +# waits longer than the worker allows the task to run. +_DEFAULT_TIMEOUT_ENV = "EXECUTOR_RESULT_TIMEOUT" +_DEFAULT_TIMEOUT = 3600 # 1 hour — matches executor worker default + + +class ExecutionDispatcher: + """Dispatches execution to executor worker via Celery task. + + Usage:: + + dispatcher = ExecutionDispatcher(celery_app=app) + result = dispatcher.dispatch(context, timeout=120) + + Or fire-and-forget:: + + task_id = dispatcher.dispatch_async(context) + """ + + def __init__(self, celery_app: Any = None) -> None: + """Initialize the dispatcher. + + Args: + celery_app: A Celery application instance. Required + for dispatching tasks. Can be ``None`` only if + set later via ``celery_app`` attribute. + """ + self._app = celery_app + + def dispatch( + self, + context: ExecutionContext, + timeout: int | None = None, + ) -> ExecutionResult: + """Dispatch context as a Celery task and wait for result. + + Args: + context: ExecutionContext to dispatch. + timeout: Max seconds to wait. ``None`` reads from + the ``EXECUTOR_RESULT_TIMEOUT`` env var, + falling back to 3600s. + + Returns: + ExecutionResult from the executor. + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError( + "No Celery app configured on ExecutionDispatcher" + ) + + if timeout is None: + timeout = int( + os.environ.get( + _DEFAULT_TIMEOUT_ENV, _DEFAULT_TIMEOUT + ) + ) + + logger.info( + "Dispatching execution: executor=%s operation=%s " + "run_id=%s request_id=%s timeout=%ss", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + timeout, + ) + + async_result = self._app.send_task( + _TASK_NAME, + args=[context.to_dict()], + queue=_QUEUE_NAME, + ) + + try: + result_dict = async_result.get(timeout=timeout) + except Exception as exc: + logger.error( + "Dispatch failed: executor=%s operation=%s " + "run_id=%s error=%s", + context.executor_name, + context.operation, + context.run_id, + exc, + ) + return ExecutionResult.failure( + error=f"{type(exc).__name__}: {exc}", + ) + + return ExecutionResult.from_dict(result_dict) + + def dispatch_async( + self, + context: ExecutionContext, + ) -> str: + """Dispatch without waiting. Returns task_id for polling. + + Args: + context: ExecutionContext to dispatch. + + Returns: + The Celery task ID (use with ``AsyncResult`` to poll). + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError( + "No Celery app configured on ExecutionDispatcher" + ) + + logger.info( + "Dispatching async execution: executor=%s " + "operation=%s run_id=%s request_id=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + ) + + async_result = self._app.send_task( + _TASK_NAME, + args=[context.to_dict()], + queue=_QUEUE_NAME, + ) + return async_result.id diff --git a/unstract/sdk1/src/unstract/sdk1/execution/executor.py b/unstract/sdk1/src/unstract/sdk1/execution/executor.py new file mode 100644 index 0000000000..8d3bda8407 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/executor.py @@ -0,0 +1,46 @@ +"""Base executor interface for the pluggable executor framework. + +All executors must subclass ``BaseExecutor`` and implement ``name`` +and ``execute``. Registration is handled by +``ExecutorRegistry.register``. +""" + +from abc import ABC, abstractmethod + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.result import ExecutionResult + + +class BaseExecutor(ABC): + """Abstract base class for execution strategy implementations. + + Each executor encapsulates a particular extraction strategy + (e.g. the legacy promptservice pipeline, an agentic table + extractor, etc.). Executors are stateless — all request- + specific data arrives via ``ExecutionContext``. + """ + + @property + @abstractmethod + def name(self) -> str: + """Unique identifier used to look up this executor. + + Must match the ``executor_name`` value in + ``ExecutionContext``. Convention: lowercase, snake_case + (e.g. ``"legacy"``, ``"agentic_table"``). + """ + + @abstractmethod + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + """Run the extraction strategy described by *context*. + + Args: + context: Fully-populated execution context with + operation type and executor params. + + Returns: + An ``ExecutionResult`` whose ``data`` dict conforms to + the response contract for the given operation. + """ diff --git a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py new file mode 100644 index 0000000000..fe066c7265 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py @@ -0,0 +1,81 @@ +"""Execution orchestrator for the executor worker. + +The orchestrator is the entry point called by the +``execute_extraction`` Celery task. It resolves the correct +executor from the registry and delegates execution, ensuring +that unhandled exceptions are always wrapped in a failed +``ExecutionResult``. +""" + +import logging +import time + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + + +class ExecutionOrchestrator: + """Looks up and invokes the executor for a given context. + + Usage (inside the Celery task):: + + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(context) + """ + + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + """Resolve the executor and run it. + + Args: + context: Fully-populated execution context. + + Returns: + ``ExecutionResult`` — always, even on unhandled + exceptions (wrapped as a failure result). + """ + logger.info( + "Orchestrating execution: executor=%s operation=%s " + "run_id=%s request_id=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + ) + + start = time.monotonic() + try: + executor = ExecutorRegistry.get(context.executor_name) + except KeyError as exc: + logger.error("Executor lookup failed: %s", exc) + return ExecutionResult.failure(error=str(exc)) + + try: + result = executor.execute(context) + except Exception as exc: + elapsed = time.monotonic() - start + logger.exception( + "Executor %r raised an unhandled exception " + "after %.2fs", + context.executor_name, + elapsed, + ) + return ExecutionResult.failure( + error=f"{type(exc).__name__}: {exc}", + metadata={"elapsed_seconds": round(elapsed, 3)}, + ) + + elapsed = time.monotonic() - start + logger.info( + "Execution completed: executor=%s operation=%s " + "success=%s elapsed=%.2fs", + context.executor_name, + context.operation, + result.success, + elapsed, + ) + return result diff --git a/unstract/sdk1/src/unstract/sdk1/execution/registry.py b/unstract/sdk1/src/unstract/sdk1/execution/registry.py new file mode 100644 index 0000000000..10cfeecaf6 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/registry.py @@ -0,0 +1,114 @@ +"""Executor registry for the pluggable executor framework. + +Provides a simple in-process registry where executor classes +self-register at import time via the ``@ExecutorRegistry.register`` +decorator. The executor worker imports all executor modules so +that registration happens before any task is processed. +""" + +import logging +from typing import TypeVar + +from unstract.sdk1.execution.executor import BaseExecutor + +logger = logging.getLogger(__name__) + +T = TypeVar("T", bound=type[BaseExecutor]) + + +class ExecutorRegistry: + """In-process registry mapping executor names to classes. + + Usage:: + + @ExecutorRegistry.register + class LegacyExecutor(BaseExecutor): + @property + def name(self) -> str: + return "legacy" + ... + + executor = ExecutorRegistry.get("legacy") + """ + + _registry: dict[str, type[BaseExecutor]] = {} + + @classmethod + def register(cls, executor_cls: T) -> T: + """Class decorator that registers an executor. + + Instantiates the class once to read its ``name`` property, + then stores the *class* (not the instance) so a fresh + instance is created per ``get()`` call. + + Args: + executor_cls: A concrete ``BaseExecutor`` subclass. + + Returns: + The same class, unmodified (passthrough decorator). + + Raises: + TypeError: If *executor_cls* is not a BaseExecutor + subclass. + ValueError: If an executor with the same name is + already registered. + """ + if not ( + isinstance(executor_cls, type) + and issubclass(executor_cls, BaseExecutor) + ): + raise TypeError( + f"{executor_cls!r} is not a BaseExecutor subclass" + ) + + # Instantiate temporarily to read the name property + instance = executor_cls() + name = instance.name + + if name in cls._registry: + existing = cls._registry[name] + raise ValueError( + f"Executor name {name!r} is already registered " + f"by {existing.__name__}; cannot register " + f"{executor_cls.__name__}" + ) + + cls._registry[name] = executor_cls + logger.info( + "Registered executor %r (%s)", + name, + executor_cls.__name__, + ) + return executor_cls + + @classmethod + def get(cls, name: str) -> BaseExecutor: + """Look up and instantiate an executor by name. + + Args: + name: The executor name (e.g. ``"legacy"``). + + Returns: + A fresh ``BaseExecutor`` instance. + + Raises: + KeyError: If no executor is registered under *name*. + """ + executor_cls = cls._registry.get(name) + if executor_cls is None: + available = ", ".join(sorted(cls._registry)) or "(none)" + raise KeyError( + f"No executor registered with name {name!r}. " + f"Available: {available}" + ) + return executor_cls() + + @classmethod + def list_executors(cls) -> list[str]: + """Return sorted list of registered executor names.""" + return sorted(cls._registry) + + @classmethod + def clear(cls) -> None: + """Remove all registered executors (for testing).""" + cls._registry.clear() diff --git a/unstract/sdk1/src/unstract/sdk1/execution/result.py b/unstract/sdk1/src/unstract/sdk1/execution/result.py new file mode 100644 index 0000000000..2660dcb7d2 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/execution/result.py @@ -0,0 +1,74 @@ +"""Execution result model for the executor framework. + +Defines the standardized result returned by executors via the +Celery result backend. All executors must return an +``ExecutionResult`` so that callers (structure tool task, +PromptStudioHelper) have a uniform interface. +""" + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class ExecutionResult: + """Standardized result from an executor. + + Returned via the Celery result backend as a JSON dict. + + Attributes: + success: Whether the execution completed without error. + data: Operation-specific output payload. The shape depends + on the operation (see response contract in the + migration plan). + metadata: Auxiliary information such as token usage, + timings, or adapter metrics. + error: Human-readable error message when ``success`` is + ``False``. ``None`` on success. + """ + + success: bool + data: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + error: str | None = None + + def __post_init__(self) -> None: + """Validate result consistency after initialization.""" + if not self.success and not self.error: + raise ValueError( + "error message is required when success is False" + ) + + def to_dict(self) -> dict[str, Any]: + """Serialize to a JSON-compatible dict for Celery.""" + result: dict[str, Any] = { + "success": self.success, + "data": self.data, + "metadata": self.metadata, + } + if self.error is not None: + result["error"] = self.error + return result + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ExecutionResult": + """Deserialize from a dict (e.g. Celery result backend).""" + return cls( + success=data["success"], + data=data.get("data", {}), + metadata=data.get("metadata", {}), + error=data.get("error"), + ) + + @classmethod + def failure( + cls, + error: str, + metadata: dict[str, Any] | None = None, + ) -> "ExecutionResult": + """Convenience factory for a failed result.""" + return cls( + success=False, + error=error, + metadata=metadata or {}, + ) diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py new file mode 100644 index 0000000000..8200ec1530 --- /dev/null +++ b/unstract/sdk1/tests/test_execution.py @@ -0,0 +1,960 @@ +"""Unit tests for execution framework (Phase 1A–1G).""" + +import json +import logging +from typing import Any, Self +from unittest.mock import MagicMock, patch + +import pytest +from unstract.sdk1.constants import LogLevel, ToolEnv +from unstract.sdk1.exceptions import SdkError +from unstract.sdk1.execution.context import ( + ExecutionContext, + ExecutionSource, + Operation, +) +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +class TestExecutionContext: + """Tests for ExecutionContext serialization and validation.""" + + def _make_context(self, **overrides: Any) -> ExecutionContext: + """Create a default ExecutionContext with optional overrides.""" + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-123", + "executor_params": {"file_path": "/tmp/test.pdf"}, + "request_id": "req-abc", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def test_round_trip_serialization(self: Self) -> None: + """to_dict -> from_dict produces identical context.""" + original = self._make_context() + restored = ExecutionContext.from_dict(original.to_dict()) + + assert restored.executor_name == original.executor_name + assert restored.operation == original.operation + assert restored.run_id == original.run_id + assert restored.execution_source == original.execution_source + assert restored.organization_id == original.organization_id + assert restored.executor_params == original.executor_params + assert restored.request_id == original.request_id + + def test_json_serializable(self: Self) -> None: + """to_dict output is JSON-serializable (Celery requirement).""" + ctx = self._make_context() + serialized = json.dumps(ctx.to_dict()) + deserialized = json.loads(serialized) + restored = ExecutionContext.from_dict(deserialized) + assert restored.executor_name == ctx.executor_name + + def test_enum_values_normalized(self: Self) -> None: + """Enum instances are normalized to plain strings.""" + ctx = self._make_context( + operation=Operation.ANSWER_PROMPT, + execution_source=ExecutionSource.IDE, + ) + assert ctx.operation == "answer_prompt" + assert ctx.execution_source == "ide" + # Also check dict output + d = ctx.to_dict() + assert d["operation"] == "answer_prompt" + assert d["execution_source"] == "ide" + + def test_string_values_accepted(self: Self) -> None: + """Plain string values work without enum coercion.""" + ctx = self._make_context( + operation="custom_op", + execution_source="tool", + ) + assert ctx.operation == "custom_op" + assert ctx.execution_source == "tool" + + def test_auto_generates_request_id(self: Self) -> None: + """request_id is generated when not provided.""" + ctx = self._make_context(request_id=None) + assert ctx.request_id is not None + assert len(ctx.request_id) > 0 + + def test_explicit_request_id_preserved(self: Self) -> None: + """Explicit request_id is not overwritten.""" + ctx = self._make_context(request_id="my-req-id") + assert ctx.request_id == "my-req-id" + + def test_optional_organization_id(self: Self) -> None: + """organization_id can be None (public calls).""" + ctx = self._make_context(organization_id=None) + assert ctx.organization_id is None + d = ctx.to_dict() + assert d["organization_id"] is None + restored = ExecutionContext.from_dict(d) + assert restored.organization_id is None + + def test_empty_executor_params_default(self: Self) -> None: + """executor_params defaults to empty dict.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-001", + execution_source="tool", + ) + assert ctx.executor_params == {} + + def test_complex_executor_params(self: Self) -> None: + """Nested executor_params round-trip correctly.""" + params = { + "file_path": "/data/doc.pdf", + "outputs": [ + {"prompt_key": "p1", "llm": "adapter-1"}, + {"prompt_key": "p2", "llm": "adapter-2"}, + ], + "options": {"reindex": True, "chunk_size": 512}, + } + ctx = self._make_context(executor_params=params) + restored = ExecutionContext.from_dict(ctx.to_dict()) + assert restored.executor_params == params + + @pytest.mark.parametrize( + "field,value", + [ + ("executor_name", ""), + ("operation", ""), + ("run_id", ""), + ("execution_source", ""), + ], + ) + def test_validation_rejects_empty_required_fields( + self: Self, field: str, value: str + ) -> None: + """Empty required fields raise ValueError.""" + with pytest.raises(ValueError, match=f"{field} is required"): + self._make_context(**{field: value}) + + def test_all_operations_accepted(self: Self) -> None: + """All Operation enum values create valid contexts.""" + for op in Operation: + ctx = self._make_context(operation=op) + assert ctx.operation == op.value + + def test_from_dict_missing_optional_fields(self: Self) -> None: + """from_dict handles missing optional fields gracefully.""" + minimal = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-001", + "execution_source": "tool", + } + ctx = ExecutionContext.from_dict(minimal) + assert ctx.organization_id is None + assert ctx.executor_params == {} + # request_id is None from dict (no auto-gen in from_dict) + # but __post_init__ auto-generates it + assert ctx.request_id is not None + + +class TestExecutionResult: + """Tests for ExecutionResult serialization and validation.""" + + def test_success_round_trip(self: Self) -> None: + """Successful result round-trips through dict.""" + original = ExecutionResult( + success=True, + data={"output": {"key": "value"}, "metadata": {}}, + metadata={"tokens": 150, "latency_ms": 320}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success is True + assert restored.data == original.data + assert restored.metadata == original.metadata + assert restored.error is None + + def test_failure_round_trip(self: Self) -> None: + """Failed result round-trips through dict.""" + original = ExecutionResult( + success=False, + error="LLM adapter timeout", + metadata={"retry_count": 2}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success is False + assert restored.error == "LLM adapter timeout" + assert restored.data == {} + assert restored.metadata == {"retry_count": 2} + + def test_json_serializable(self: Self) -> None: + """to_dict output is JSON-serializable.""" + result = ExecutionResult( + success=True, + data={"extracted_text": "Hello world"}, + ) + serialized = json.dumps(result.to_dict()) + deserialized = json.loads(serialized) + restored = ExecutionResult.from_dict(deserialized) + assert restored.data == result.data + + def test_failure_requires_error_message(self: Self) -> None: + """success=False without error raises ValueError.""" + with pytest.raises( + ValueError, + match="error message is required", + ): + ExecutionResult(success=False) + + def test_success_allows_no_error(self: Self) -> None: + """success=True with no error is valid.""" + result = ExecutionResult(success=True) + assert result.error is None + + def test_failure_factory(self: Self) -> None: + """ExecutionResult.failure() convenience constructor.""" + result = ExecutionResult.failure( + error="Something broke", + metadata={"debug": True}, + ) + assert result.success is False + assert result.error == "Something broke" + assert result.data == {} + assert result.metadata == {"debug": True} + + def test_failure_factory_no_metadata(self: Self) -> None: + """failure() works without metadata.""" + result = ExecutionResult.failure(error="Oops") + assert result.metadata == {} + + def test_error_not_in_success_dict(self: Self) -> None: + """Successful result dict omits error key.""" + result = ExecutionResult(success=True, data={"k": "v"}) + d = result.to_dict() + assert "error" not in d + + def test_error_in_failure_dict(self: Self) -> None: + """Failed result dict includes error key.""" + result = ExecutionResult.failure(error="fail") + d = result.to_dict() + assert d["error"] == "fail" + + def test_default_empty_dicts(self: Self) -> None: + """data and metadata default to empty dicts.""" + result = ExecutionResult(success=True) + assert result.data == {} + assert result.metadata == {} + + def test_from_dict_missing_optional_fields(self: Self) -> None: + """from_dict handles missing optional fields.""" + minimal = {"success": True} + result = ExecutionResult.from_dict(minimal) + assert result.data == {} + assert result.metadata == {} + assert result.error is None + + def test_response_contract_extract(self: Self) -> None: + """Verify extract operation response shape.""" + result = ExecutionResult( + success=True, + data={"extracted_text": "The quick brown fox"}, + ) + assert "extracted_text" in result.data + + def test_response_contract_index(self: Self) -> None: + """Verify index operation response shape.""" + result = ExecutionResult( + success=True, + data={"doc_id": "doc-abc-123"}, + ) + assert "doc_id" in result.data + + def test_response_contract_answer_prompt(self: Self) -> None: + """Verify answer_prompt operation response shape.""" + result = ExecutionResult( + success=True, + data={ + "output": {"field1": "value1"}, + "metadata": {"confidence": 0.95}, + "metrics": {"tokens": 200}, + }, + ) + assert "output" in result.data + assert "metadata" in result.data + assert "metrics" in result.data + + +# ---- Phase 1B: BaseExecutor & ExecutorRegistry ---- + + +def _make_executor_class( + executor_name: str, +) -> type[BaseExecutor]: + """Helper: build a concrete BaseExecutor subclass dynamically.""" + + class _Executor(BaseExecutor): + @property + def name(self) -> str: + return executor_name + + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + return ExecutionResult( + success=True, + data={"echo": context.operation}, + ) + + # Give it a readable __name__ for error messages + _Executor.__name__ = f"{executor_name.title()}Executor" + _Executor.__qualname__ = _Executor.__name__ + return _Executor + + +class TestBaseExecutor: + """Tests for BaseExecutor ABC contract.""" + + def test_cannot_instantiate_abstract(self: Self) -> None: + """BaseExecutor itself cannot be instantiated.""" + with pytest.raises(TypeError): + BaseExecutor() # type: ignore[abstract] + + def test_concrete_subclass_works(self: Self) -> None: + """A properly implemented subclass can be instantiated.""" + cls = _make_executor_class("test_abc") + instance = cls() + assert instance.name == "test_abc" + + def test_execute_returns_result(self: Self) -> None: + """execute() returns an ExecutionResult.""" + cls = _make_executor_class("test_exec") + instance = cls() + ctx = ExecutionContext( + executor_name="test_exec", + operation="extract", + run_id="run-1", + execution_source="tool", + ) + result = instance.execute(ctx) + assert isinstance(result, ExecutionResult) + assert result.success is True + assert result.data == {"echo": "extract"} + + +class TestExecutorRegistry: + """Tests for ExecutorRegistry.""" + + @pytest.fixture(autouse=True) + def _clean_registry(self: Self) -> None: + """Ensure a clean registry for every test.""" + ExecutorRegistry.clear() + + def test_register_and_get(self: Self) -> None: + """Register an executor and retrieve by name.""" + cls = _make_executor_class("alpha") + ExecutorRegistry.register(cls) + + executor = ExecutorRegistry.get("alpha") + assert isinstance(executor, BaseExecutor) + assert executor.name == "alpha" + + def test_get_returns_fresh_instance(self: Self) -> None: + """Each get() call returns a new instance.""" + cls = _make_executor_class("fresh") + ExecutorRegistry.register(cls) + + a = ExecutorRegistry.get("fresh") + b = ExecutorRegistry.get("fresh") + assert a is not b + + def test_register_as_decorator(self: Self) -> None: + """@ExecutorRegistry.register works as a class decorator.""" + + @ExecutorRegistry.register + class MyExecutor(BaseExecutor): + @property + def name(self) -> str: + return "decorated" + + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + return ExecutionResult(success=True) + + executor = ExecutorRegistry.get("decorated") + assert executor.name == "decorated" + # Decorator returns the class unchanged + assert MyExecutor is not None + + def test_list_executors(self: Self) -> None: + """list_executors() returns sorted names.""" + ExecutorRegistry.register(_make_executor_class("charlie")) + ExecutorRegistry.register(_make_executor_class("alpha")) + ExecutorRegistry.register(_make_executor_class("bravo")) + + assert ExecutorRegistry.list_executors() == [ + "alpha", + "bravo", + "charlie", + ] + + def test_list_executors_empty(self: Self) -> None: + """list_executors() returns empty list when nothing registered.""" + assert ExecutorRegistry.list_executors() == [] + + def test_get_unknown_raises_key_error(self: Self) -> None: + """get() with unknown name raises KeyError.""" + with pytest.raises(KeyError, match="no_such_executor"): + ExecutorRegistry.get("no_such_executor") + + def test_get_unknown_lists_available(self: Self) -> None: + """KeyError message includes available executor names.""" + ExecutorRegistry.register(_make_executor_class("one")) + ExecutorRegistry.register(_make_executor_class("two")) + + with pytest.raises(KeyError, match="one") as exc_info: + ExecutorRegistry.get("missing") + assert "two" in str(exc_info.value) + + def test_duplicate_name_raises_value_error(self: Self) -> None: + """Registering two executors with the same name fails.""" + ExecutorRegistry.register(_make_executor_class("dup")) + with pytest.raises(ValueError, match="already registered"): + ExecutorRegistry.register(_make_executor_class("dup")) + + def test_register_non_subclass_raises_type_error(self: Self) -> None: + """Registering a non-BaseExecutor class raises TypeError.""" + with pytest.raises(TypeError, match="not a BaseExecutor"): + ExecutorRegistry.register(dict) # type: ignore[arg-type] + + def test_register_non_class_raises_type_error(self: Self) -> None: + """Registering a non-class object raises TypeError.""" + with pytest.raises(TypeError, match="not a BaseExecutor"): + ExecutorRegistry.register("not_a_class") # type: ignore[arg-type] + + def test_clear(self: Self) -> None: + """clear() removes all registrations.""" + ExecutorRegistry.register(_make_executor_class("temp")) + assert ExecutorRegistry.list_executors() == ["temp"] + ExecutorRegistry.clear() + assert ExecutorRegistry.list_executors() == [] + + def test_execute_through_registry(self: Self) -> None: + """End-to-end: register, get, execute.""" + ExecutorRegistry.register(_make_executor_class("e2e")) + + ctx = ExecutionContext( + executor_name="e2e", + operation="index", + run_id="run-42", + execution_source="ide", + ) + executor = ExecutorRegistry.get("e2e") + result = executor.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "index"} + + +# ---- Phase 1C: ExecutionOrchestrator ---- + + +def _make_failing_executor_class( + executor_name: str, + exc: Exception, +) -> type[BaseExecutor]: + """Build an executor that always raises *exc*.""" + + class _FailExecutor(BaseExecutor): + @property + def name(self) -> str: + return executor_name + + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + raise exc + + _FailExecutor.__name__ = f"{executor_name.title()}FailExecutor" + _FailExecutor.__qualname__ = _FailExecutor.__name__ + return _FailExecutor + + +class TestExecutionOrchestrator: + """Tests for ExecutionOrchestrator.""" + + @pytest.fixture(autouse=True) + def _clean_registry(self: Self) -> None: + """Ensure a clean registry for every test.""" + ExecutorRegistry.clear() + + def _make_context(self, **overrides: Any) -> ExecutionContext: + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-1", + "execution_source": "tool", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def test_dispatches_to_correct_executor(self: Self) -> None: + """Orchestrator routes to the right executor by name.""" + ExecutorRegistry.register(_make_executor_class("alpha")) + ExecutorRegistry.register(_make_executor_class("bravo")) + + orchestrator = ExecutionOrchestrator() + + result_a = orchestrator.execute( + self._make_context(executor_name="alpha", operation="extract") + ) + assert result_a.success is True + assert result_a.data == {"echo": "extract"} + + result_b = orchestrator.execute( + self._make_context(executor_name="bravo", operation="index") + ) + assert result_b.success is True + assert result_b.data == {"echo": "index"} + + def test_unknown_executor_returns_failure(self: Self) -> None: + """Unknown executor_name yields a failure result (not exception).""" + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute( + self._make_context(executor_name="nonexistent") + ) + assert result.success is False + assert "nonexistent" in result.error + + def test_executor_exception_returns_failure(self: Self) -> None: + """Unhandled executor exception is wrapped in failure result.""" + ExecutorRegistry.register( + _make_failing_executor_class( + "boom", RuntimeError("kaboom") + ) + ) + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute( + self._make_context(executor_name="boom") + ) + assert result.success is False + assert "RuntimeError" in result.error + assert "kaboom" in result.error + + def test_exception_result_has_elapsed_metadata(self: Self) -> None: + """Failure from exception includes elapsed_seconds metadata.""" + ExecutorRegistry.register( + _make_failing_executor_class( + "slow_fail", ValueError("bad input") + ) + ) + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute( + self._make_context(executor_name="slow_fail") + ) + assert result.success is False + assert "elapsed_seconds" in result.metadata + assert isinstance(result.metadata["elapsed_seconds"], float) + + def test_successful_result_passed_through(self: Self) -> None: + """Orchestrator returns the executor's result as-is on success.""" + ExecutorRegistry.register(_make_executor_class("passthru")) + orchestrator = ExecutionOrchestrator() + + ctx = self._make_context( + executor_name="passthru", operation="answer_prompt" + ) + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "answer_prompt"} + + def test_executor_returning_failure_is_not_wrapped( + self: Self, + ) -> None: + """An executor that returns failure result is passed through.""" + + class FailingExecutor(BaseExecutor): + @property + def name(self) -> str: + return "graceful_fail" + + def execute( + self, context: ExecutionContext + ) -> ExecutionResult: + return ExecutionResult.failure( + error="LLM rate limited" + ) + + ExecutorRegistry.register(FailingExecutor) + orchestrator = ExecutionOrchestrator() + + result = orchestrator.execute( + self._make_context(executor_name="graceful_fail") + ) + assert result.success is False + assert result.error == "LLM rate limited" + + +# ---- Phase 1F: ExecutionDispatcher ---- + + +class TestExecutionDispatcher: + """Tests for ExecutionDispatcher (mocked Celery).""" + + def _make_context(self, **overrides: Any) -> ExecutionContext: + defaults: dict[str, Any] = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-1", + "execution_source": "tool", + "request_id": "req-1", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + def _make_mock_app( + self, + result_dict: dict[str, Any] | None = None, + side_effect: Exception | None = None, + task_id: str = "celery-task-123", + ) -> MagicMock: + """Create a mock Celery app with send_task configured.""" + mock_app = MagicMock() + mock_async_result = MagicMock() + mock_async_result.id = task_id + + if side_effect is not None: + mock_async_result.get.side_effect = side_effect + else: + mock_async_result.get.return_value = ( + result_dict + if result_dict is not None + else {"success": True, "data": {}, "metadata": {}} + ) + + mock_app.send_task.return_value = mock_async_result + return mock_app + + def test_dispatch_sends_task_and_returns_result( + self: Self, + ) -> None: + """dispatch() sends task to executor queue and returns result.""" + result_dict = { + "success": True, + "data": {"extracted_text": "hello"}, + "metadata": {}, + } + mock_app = self._make_mock_app(result_dict=result_dict) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=60) + + assert result.success is True + assert result.data == {"extracted_text": "hello"} + + # Verify send_task was called correctly + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="executor", + ) + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=60 + ) + + def test_dispatch_uses_default_timeout(self: Self) -> None: + """dispatch() without timeout uses default (3600s).""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=3600 + ) + + def test_dispatch_timeout_from_env( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """dispatch() reads timeout from EXECUTOR_RESULT_TIMEOUT env.""" + monkeypatch.setenv("EXECUTOR_RESULT_TIMEOUT", "120") + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=120 + ) + + def test_dispatch_explicit_timeout_overrides_env( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Explicit timeout parameter overrides env var.""" + monkeypatch.setenv("EXECUTOR_RESULT_TIMEOUT", "120") + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch(ctx, timeout=30) + + mock_app.send_task.return_value.get.assert_called_once_with( + timeout=30 + ) + + def test_dispatch_timeout_returns_failure( + self: Self, + ) -> None: + """TimeoutError from AsyncResult.get() is wrapped in failure.""" + mock_app = self._make_mock_app( + side_effect=TimeoutError("Task timed out") + ) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=1) + + assert result.success is False + assert "TimeoutError" in result.error + + def test_dispatch_generic_exception_returns_failure( + self: Self, + ) -> None: + """Any exception from AsyncResult.get() becomes a failure.""" + mock_app = self._make_mock_app( + side_effect=RuntimeError("broker down") + ) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=10) + + assert result.success is False + assert "RuntimeError" in result.error + assert "broker down" in result.error + + def test_dispatch_async_returns_task_id(self: Self) -> None: + """dispatch_async() returns the Celery task ID.""" + mock_app = self._make_mock_app(task_id="task-xyz-789") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + task_id = dispatcher.dispatch_async(ctx) + + assert task_id == "task-xyz-789" + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="executor", + ) + + def test_dispatch_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch(ctx) + + def test_dispatch_async_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch_async() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_async(ctx) + + def test_dispatch_failure_result_from_executor( + self: Self, + ) -> None: + """Executor failure is deserialized correctly.""" + result_dict = { + "success": False, + "data": {}, + "metadata": {}, + "error": "LLM adapter timeout", + } + mock_app = self._make_mock_app(result_dict=result_dict) + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch(ctx, timeout=60) + + assert result.success is False + assert result.error == "LLM adapter timeout" + + def test_dispatch_context_serialized_correctly( + self: Self, + ) -> None: + """The full ExecutionContext is serialized in the task args.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context( + executor_name="agentic_table", + operation="agentic_extraction", + organization_id="org-42", + executor_params={"schema": {"name": "str"}}, + ) + + dispatcher.dispatch(ctx, timeout=60) + + sent_args = mock_app.send_task.call_args + context_dict = sent_args[1]["args"][0] + + assert context_dict["executor_name"] == "agentic_table" + assert context_dict["operation"] == "agentic_extraction" + assert context_dict["organization_id"] == "org-42" + assert context_dict["executor_params"] == { + "schema": {"name": "str"} + } + + +# ---- Phase 1G: ExecutorToolShim ---- +# Note: ExecutorToolShim lives in workers/executor/ but the tests +# import it directly via sys.path manipulation since the workers +# package requires Celery (not installed in SDK1 test venv). +# We test the shim's logic here by importing its direct dependencies +# from SDK1 (StreamMixin, SdkError, LogLevel, ToolEnv). + + +class _MockExecutorToolShim: + """In-test replica of ExecutorToolShim for SDK1 test isolation. + + The real ExecutorToolShim lives in workers/executor/ and cannot + be imported here (Celery not in SDK1 venv). This replica + mirrors the same logic so we can verify the behavior contract + without importing the workers package. + """ + + def __init__(self, platform_api_key: str = "") -> None: + self.platform_api_key = platform_api_key + + def get_env_or_die(self, env_key: str) -> str: + import os + + if env_key == ToolEnv.PLATFORM_API_KEY: + if not self.platform_api_key: + raise SdkError( + f"Env variable '{env_key}' is required" + ) + return self.platform_api_key + + env_value = os.environ.get(env_key) + if env_value is None or env_value == "": + raise SdkError( + f"Env variable '{env_key}' is required" + ) + return env_value + + def stream_log( + self, + log: str, + level: LogLevel = LogLevel.INFO, + stage: str = "TOOL_RUN", + **kwargs: Any, + ) -> None: + _level_map = { + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFO: logging.INFO, + LogLevel.WARN: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, + } + py_level = _level_map.get(level, logging.INFO) + logging.getLogger("executor_tool_shim").log(py_level, log) + + def stream_error_and_exit( + self, message: str, err: Exception | None = None + ) -> None: + raise SdkError(message, actual_err=err) + + +class TestExecutorToolShim: + """Tests for ExecutorToolShim behavior contract.""" + + def test_platform_api_key_returned(self: Self) -> None: + """get_env_or_die('PLATFORM_SERVICE_API_KEY') returns configured key.""" + shim = _MockExecutorToolShim(platform_api_key="sk-test-123") + result = shim.get_env_or_die(ToolEnv.PLATFORM_API_KEY) + assert result == "sk-test-123" + + def test_platform_api_key_missing_raises(self: Self) -> None: + """get_env_or_die('PLATFORM_SERVICE_API_KEY') raises when not configured.""" + shim = _MockExecutorToolShim(platform_api_key="") + with pytest.raises(SdkError, match="PLATFORM_SERVICE_API_KEY"): + shim.get_env_or_die(ToolEnv.PLATFORM_API_KEY) + + def test_other_env_var_from_environ( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """get_env_or_die() reads non-platform vars from os.environ.""" + monkeypatch.setenv("MY_CUSTOM_VAR", "custom_value") + shim = _MockExecutorToolShim(platform_api_key="sk-test") + result = shim.get_env_or_die("MY_CUSTOM_VAR") + assert result == "custom_value" + + def test_missing_env_var_raises(self: Self) -> None: + """get_env_or_die() raises SdkError for missing env var.""" + shim = _MockExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="NONEXISTENT_VAR"): + shim.get_env_or_die("NONEXISTENT_VAR") + + def test_empty_env_var_raises( + self: Self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """get_env_or_die() raises SdkError for empty env var.""" + monkeypatch.setenv("EMPTY_VAR", "") + shim = _MockExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="EMPTY_VAR"): + shim.get_env_or_die("EMPTY_VAR") + + def test_stream_log_routes_to_logging( + self: Self, caplog: pytest.LogCaptureFixture + ) -> None: + """stream_log() routes to Python logging, not stdout.""" + shim = _MockExecutorToolShim() + with caplog.at_level(logging.INFO, logger="executor_tool_shim"): + shim.stream_log("test message", level=LogLevel.INFO) + assert "test message" in caplog.text + + def test_stream_log_respects_level( + self: Self, caplog: pytest.LogCaptureFixture + ) -> None: + """stream_log() maps SDK LogLevel to Python logging level.""" + shim = _MockExecutorToolShim() + with caplog.at_level(logging.WARNING, logger="executor_tool_shim"): + shim.stream_log("debug msg", level=LogLevel.DEBUG) + shim.stream_log("warn msg", level=LogLevel.WARN) + # DEBUG should be filtered out at WARNING level + assert "debug msg" not in caplog.text + assert "warn msg" in caplog.text + + def test_stream_error_and_exit_raises_sdk_error( + self: Self, + ) -> None: + """stream_error_and_exit() raises SdkError (no sys.exit).""" + shim = _MockExecutorToolShim() + with pytest.raises(SdkError, match="something failed"): + shim.stream_error_and_exit("something failed") + + def test_stream_error_and_exit_wraps_original( + self: Self, + ) -> None: + """stream_error_and_exit() passes original exception.""" + shim = _MockExecutorToolShim() + original = ValueError("root cause") + with pytest.raises(SdkError) as exc_info: + shim.stream_error_and_exit("wrapper msg", err=original) + assert exc_info.value.actual_err is original diff --git a/workers/.env.test b/workers/.env.test new file mode 100644 index 0000000000..8cda6b9dc0 --- /dev/null +++ b/workers/.env.test @@ -0,0 +1,4 @@ +# Test environment variables for workers tests. +# Loaded by tests/conftest.py before any shared package imports. +INTERNAL_API_BASE_URL=http://localhost:8000 +INTERNAL_SERVICE_API_KEY=test-key diff --git a/workers/executor/__init__.py b/workers/executor/__init__.py new file mode 100644 index 0000000000..7982e4d411 --- /dev/null +++ b/workers/executor/__init__.py @@ -0,0 +1,12 @@ +"""Executor Worker + +Celery worker for running extraction executors. +Dispatches ExecutionContext to registered executors and returns +ExecutionResult via the Celery result backend. +""" + +from .worker import app as celery_app + +__all__ = [ + "celery_app", +] diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py new file mode 100644 index 0000000000..8baee47194 --- /dev/null +++ b/workers/executor/executor_tool_shim.py @@ -0,0 +1,130 @@ +"""ExecutorToolShim — Lightweight BaseTool substitute for executor workers. + +Adapters (PlatformHelper, LLM, Embedding, VectorDB, X2Text) all require +a ``tool: BaseTool`` parameter that provides ``get_env_or_die()`` and +``stream_log()``. The executor worker has no ``BaseTool`` instance, so +this shim provides just those two methods. + +Precedent: ``prompt-service/.../helpers/prompt_ide_base_tool.py`` +(``PromptServiceBaseTool``). +""" + +import logging +import os +from typing import Any + +from unstract.sdk1.constants import LogLevel, ToolEnv +from unstract.sdk1.exceptions import SdkError +from unstract.sdk1.tool.stream import StreamMixin + +logger = logging.getLogger(__name__) + +# Mapping from SDK LogLevel enum to Python logging levels. +_LEVEL_MAP = { + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFO: logging.INFO, + LogLevel.WARN: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, +} + + +class ExecutorToolShim(StreamMixin): + """Minimal BaseTool substitute for use inside executor workers. + + Provides the two methods that adapters actually call: + + - ``get_env_or_die(env_key)`` — reads env vars, with special + handling for ``PLATFORM_SERVICE_API_KEY`` (multitenancy) + - ``stream_log(log, level)`` — routes to Python logging instead + of the Unstract stdout JSON protocol used by tools + + Usage:: + + shim = ExecutorToolShim(platform_api_key="sk-...") + adapter = SomeAdapter(tool=shim) # adapter calls shim.get_env_or_die() + """ + + def __init__(self, platform_api_key: str = "") -> None: + """Initialize the shim. + + Args: + platform_api_key: The platform service API key for this + execution. Returned by ``get_env_or_die()`` when the + caller asks for ``PLATFORM_SERVICE_API_KEY``. + """ + self.platform_api_key = platform_api_key + # Initialize StreamMixin. EXECUTION_BY_TOOL is not set in + # the worker environment, so _exec_by_tool will be False. + super().__init__(log_level=LogLevel.INFO) + + def get_env_or_die(self, env_key: str) -> str: + """Return environment variable value. + + Special-cases ``PLATFORM_SERVICE_API_KEY`` to return the key + passed at construction time (supports multitenancy — each + execution may use a different org's API key). + + Args: + env_key: Environment variable name. + + Returns: + The value of the environment variable. + + Raises: + SdkError: If the variable is missing or empty. + """ + if env_key == ToolEnv.PLATFORM_API_KEY: + if not self.platform_api_key: + raise SdkError( + f"Env variable '{env_key}' is required" + ) + return self.platform_api_key + + env_value = os.environ.get(env_key) + if env_value is None or env_value == "": + raise SdkError( + f"Env variable '{env_key}' is required" + ) + return env_value + + def stream_log( + self, + log: str, + level: LogLevel = LogLevel.INFO, + stage: str = "TOOL_RUN", + **kwargs: dict[str, Any], + ) -> None: + """Route log messages to Python logging. + + In the executor worker context, logs go through the standard + Python logging framework (captured by Celery) rather than the + Unstract stdout JSON protocol used by tools. + + Args: + log: The log message. + level: SDK log level. + stage: Ignored (only meaningful for stdout protocol). + **kwargs: Ignored (only meaningful for stdout protocol). + """ + py_level = _LEVEL_MAP.get(level, logging.INFO) + logger.log(py_level, log) + + def stream_error_and_exit( + self, message: str, err: Exception | None = None + ) -> None: + """Log error and raise SdkError. + + Unlike the base StreamMixin which may call ``sys.exit(1)`` + when running as a tool, the executor worker always raises + an exception so the Celery task can handle it gracefully. + + Args: + message: Error description. + err: Original exception, if any. + + Raises: + SdkError: Always. + """ + logger.error(message) + raise SdkError(message, actual_err=err) diff --git a/workers/executor/executors/__init__.py b/workers/executor/executors/__init__.py new file mode 100644 index 0000000000..c42cc95175 --- /dev/null +++ b/workers/executor/executors/__init__.py @@ -0,0 +1,9 @@ +"""Executor implementations package. + +Importing this module triggers ``@ExecutorRegistry.register`` for all +bundled executors. +""" + +from executor.executors.legacy_executor import LegacyExecutor + +__all__ = ["LegacyExecutor"] diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py new file mode 100644 index 0000000000..25d18edae9 --- /dev/null +++ b/workers/executor/executors/answer_prompt.py @@ -0,0 +1,335 @@ +"""Answer prompt service — prompt construction and LLM execution. + +Ported from prompt-service/.../services/answer_prompt.py. +Flask dependencies (app.logger, PluginManager, APIError) replaced with +standard logging and executor exceptions. + +Plugin-dependent features (highlight, challenge, table-extraction, +line-item-extraction) are omitted — they require a plugin loading +mechanism that will be added in a later phase. +""" + +import ipaddress +import logging +import os +import socket +from typing import Any +from urllib.parse import urlparse + +from executor.executors.constants import PromptServiceConstants as PSKeys +from executor.executors.exceptions import LegacyExecutorError, RateLimitError + +logger = logging.getLogger(__name__) + + +def _is_safe_public_url(url: str) -> bool: + """Validate webhook URL for SSRF protection. + + Only allows HTTPS and blocks private/loopback/internal addresses. + """ + try: + p = urlparse(url) + if p.scheme not in ("https",): + return False + host = p.hostname or "" + if host in ("localhost",): + return False + + addrs: set[str] = set() + try: + ipaddress.ip_address(host) + addrs.add(host) + except ValueError: + try: + for _family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo( + host, None, type=socket.SOCK_STREAM + ): + addrs.add(sockaddr[0]) + except Exception: + return False + + if not addrs: + return False + + for addr in addrs: + try: + ip = ipaddress.ip_address(addr) + except ValueError: + return False + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + ): + return False + return True + except Exception: + return False + + +class AnswerPromptService: + @staticmethod + def extract_variable( + structured_output: dict[str, Any], + variable_names: list[Any], + output: dict[str, Any], + promptx: str, + ) -> str: + """Replace %variable_name% references in the prompt text.""" + for variable_name in variable_names: + if promptx.find(f"%{variable_name}%") >= 0: + if variable_name in structured_output: + promptx = promptx.replace( + f"%{variable_name}%", + str(structured_output[variable_name]), + ) + else: + raise ValueError( + f"Variable {variable_name} not found in structured output" + ) + + if promptx != output[PSKeys.PROMPT]: + logger.info("Prompt after variable replacement: %s", promptx) + return promptx + + @staticmethod + def construct_and_run_prompt( + tool_settings: dict[str, Any], + output: dict[str, Any], + llm: Any, + context: str, + prompt: str, + metadata: dict[str, Any], + file_path: str = "", + execution_source: str | None = "ide", + ) -> str: + """Construct the full prompt and run LLM completion. + + Args: + tool_settings: Global tool settings (preamble, postamble, etc.) + output: The prompt definition dict. + llm: LLM adapter instance. + context: Retrieved context string. + prompt: Key into ``output`` for the prompt text (usually "promptx"). + metadata: Metadata dict (updated in place with highlight info). + file_path: Path to the extracted text file. + execution_source: "ide" or "tool". + + Returns: + The LLM answer string. + """ + platform_postamble = tool_settings.get(PSKeys.PLATFORM_POSTAMBLE, "") + word_confidence_postamble = tool_settings.get( + PSKeys.WORD_CONFIDENCE_POSTAMBLE, "" + ) + summarize_as_source = tool_settings.get(PSKeys.SUMMARIZE_AS_SOURCE) + enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence = tool_settings.get(PSKeys.ENABLE_WORD_CONFIDENCE, False) + if not enable_highlight: + enable_word_confidence = False + prompt_type = output.get(PSKeys.TYPE, PSKeys.TEXT) + if not enable_highlight or summarize_as_source: + platform_postamble = "" + if not enable_word_confidence or summarize_as_source: + word_confidence_postamble = "" + + prompt = AnswerPromptService.construct_prompt( + preamble=tool_settings.get(PSKeys.PREAMBLE, ""), + prompt=output[prompt], + postamble=tool_settings.get(PSKeys.POSTAMBLE, ""), + grammar_list=tool_settings.get(PSKeys.GRAMMAR, []), + context=context, + platform_postamble=platform_postamble, + word_confidence_postamble=word_confidence_postamble, + prompt_type=prompt_type, + ) + output[PSKeys.COMBINED_PROMPT] = prompt + return AnswerPromptService.run_completion( + llm=llm, + prompt=prompt, + metadata=metadata, + prompt_key=output[PSKeys.NAME], + prompt_type=prompt_type, + enable_highlight=enable_highlight, + enable_word_confidence=enable_word_confidence, + file_path=file_path, + execution_source=execution_source, + ) + + @staticmethod + def construct_prompt( + preamble: str, + prompt: str, + postamble: str, + grammar_list: list[dict[str, Any]], + context: str, + platform_postamble: str, + word_confidence_postamble: str, + prompt_type: str = "text", + ) -> str: + """Build the full prompt string with preamble, grammar, postamble, context.""" + prompt = f"{preamble}\n\nQuestion or Instruction: {prompt}" + if grammar_list is not None and len(grammar_list) > 0: + prompt += "\n" + for grammar in grammar_list: + word = "" + synonyms = [] + if PSKeys.WORD in grammar: + word = grammar[PSKeys.WORD] + if PSKeys.SYNONYMS in grammar: + synonyms = grammar[PSKeys.SYNONYMS] + if len(synonyms) > 0 and word != "": + prompt += ( + f"\nNote: You can consider that the word '{word}' " + f"is the same as {', '.join(synonyms)} " + f"in both the question and the context." + ) + if prompt_type == PSKeys.JSON: + json_postamble = os.environ.get( + PSKeys.JSON_POSTAMBLE, PSKeys.DEFAULT_JSON_POSTAMBLE + ) + postamble += f"\n{json_postamble}" + if platform_postamble: + platform_postamble += "\n\n" + if word_confidence_postamble: + platform_postamble += f"{word_confidence_postamble}\n\n" + prompt += ( + f"\n\n{postamble}\n\nContext:\n---------------\n{context}\n" + f"-----------------\n\n{platform_postamble}Answer:" + ) + return prompt + + @staticmethod + def run_completion( + llm: Any, + prompt: str, + metadata: dict[str, str] | None = None, + prompt_key: str | None = None, + prompt_type: str | None = "text", + enable_highlight: bool = False, + enable_word_confidence: bool = False, + file_path: str = "", + execution_source: str | None = None, + ) -> str: + """Run LLM completion and extract the answer. + + Highlight/word-confidence plugin support is not available in the + executor worker yet — those features are skipped here. + """ + try: + from unstract.sdk1.exceptions import RateLimitError as SdkRateLimitError + from unstract.sdk1.exceptions import SdkError + except ImportError: + SdkRateLimitError = Exception + SdkError = Exception + + try: + completion = llm.complete( + prompt=prompt, + process_text=None, + extract_json=prompt_type.lower() != PSKeys.TEXT, + ) + answer: str = completion[PSKeys.RESPONSE].text + highlight_data = completion.get(PSKeys.HIGHLIGHT_DATA, []) + confidence_data = completion.get(PSKeys.CONFIDENCE_DATA) + word_confidence_data = completion.get(PSKeys.WORD_CONFIDENCE_DATA) + line_numbers = completion.get(PSKeys.LINE_NUMBERS, []) + whisper_hash = completion.get(PSKeys.WHISPER_HASH, "") + if metadata is not None and prompt_key: + metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( + highlight_data + ) + metadata.setdefault(PSKeys.LINE_NUMBERS, {})[prompt_key] = line_numbers + metadata[PSKeys.WHISPER_HASH] = whisper_hash + if confidence_data: + metadata.setdefault(PSKeys.CONFIDENCE_DATA, {})[prompt_key] = ( + confidence_data + ) + if enable_word_confidence and word_confidence_data: + metadata.setdefault(PSKeys.WORD_CONFIDENCE_DATA, {})[prompt_key] = ( + word_confidence_data + ) + return answer + except SdkRateLimitError as e: + raise RateLimitError(f"Rate limit error. {str(e)}") from e + except SdkError as e: + logger.error("Error fetching response for prompt: %s", e) + status_code = getattr(e, "status_code", None) or 500 + raise LegacyExecutorError(message=str(e), code=status_code) from e + + @staticmethod + def handle_json( + answer: str, + structured_output: dict[str, Any], + output: dict[str, Any], + llm: Any, + enable_highlight: bool = False, + enable_word_confidence: bool = False, + execution_source: str = "ide", + metadata: dict[str, Any] | None = None, + file_path: str = "", + log_events_id: str = "", + tool_id: str = "", + doc_name: str = "", + ) -> None: + """Handle JSON responses from the LLM.""" + from executor.executors.json_repair_helper import repair_json_with_best_structure + from executor.executors.postprocessor import postprocess_data + + prompt_key = output[PSKeys.NAME] + if answer.lower() == "na": + structured_output[prompt_key] = None + else: + parsed_data = repair_json_with_best_structure(answer) + + if isinstance(parsed_data, str): + logger.error("Error parsing response to JSON") + structured_output[prompt_key] = {} + else: + webhook_enabled = output.get(PSKeys.ENABLE_POSTPROCESSING_WEBHOOK, False) + webhook_url = output.get(PSKeys.POSTPROCESSING_WEBHOOK_URL) + + highlight_data = None + if enable_highlight and metadata and PSKeys.HIGHLIGHT_DATA in metadata: + highlight_data = metadata[PSKeys.HIGHLIGHT_DATA].get(prompt_key) + + processed_data = parsed_data + updated_highlight_data = None + + if webhook_enabled: + if not webhook_url: + logger.warning( + "Postprocessing webhook enabled but URL missing; skipping." + ) + elif not _is_safe_public_url(webhook_url): + logger.warning( + "Postprocessing webhook URL is not allowed; skipping." + ) + else: + try: + processed_data, updated_highlight_data = postprocess_data( + parsed_data, + webhook_enabled=True, + webhook_url=webhook_url, + highlight_data=highlight_data, + timeout=60, + ) + except Exception as e: + logger.warning( + "Postprocessing webhook failed: %s. " + "Using unprocessed data.", + e, + ) + + structured_output[prompt_key] = processed_data + + if ( + enable_highlight + and metadata + and updated_highlight_data is not None + ): + metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( + updated_highlight_data + ) diff --git a/workers/executor/executors/constants.py b/workers/executor/executors/constants.py new file mode 100644 index 0000000000..9eddab8423 --- /dev/null +++ b/workers/executor/executors/constants.py @@ -0,0 +1,203 @@ +from enum import Enum + + +class PromptServiceConstants: + """Constants used in the prompt service.""" + + WORD = "word" + SYNONYMS = "synonyms" + OUTPUTS = "outputs" + TOOL_ID = "tool_id" + RUN_ID = "run_id" + EXECUTION_ID = "execution_id" + FILE_NAME = "file_name" + FILE_HASH = "file_hash" + NAME = "name" + ACTIVE = "active" + PROMPT = "prompt" + CHUNK_SIZE = "chunk-size" + PROMPTX = "promptx" + VECTOR_DB = "vector-db" + EMBEDDING = "embedding" + X2TEXT_ADAPTER = "x2text_adapter" + CHUNK_OVERLAP = "chunk-overlap" + LLM = "llm" + IS_ASSERT = "is_assert" + ASSERTION_FAILURE_PROMPT = "assertion_failure_prompt" + RETRIEVAL_STRATEGY = "retrieval-strategy" + TYPE = "type" + NUMBER = "number" + EMAIL = "email" + DATE = "date" + BOOLEAN = "boolean" + JSON = "json" + PREAMBLE = "preamble" + SIMILARITY_TOP_K = "similarity-top-k" + PROMPT_TOKENS = "prompt_tokens" + COMPLETION_TOKENS = "completion_tokens" + TOTAL_TOKENS = "total_tokens" + RESPONSE = "response" + POSTAMBLE = "postamble" + GRAMMAR = "grammar" + PLATFORM_SERVICE_API_KEY = "PLATFORM_SERVICE_API_KEY" + EMBEDDING_SUFFIX = "embedding_suffix" + EVAL_SETTINGS = "eval_settings" + EVAL_SETTINGS_EVALUATE = "evaluate" + EVAL_SETTINGS_MONITOR_LLM = "monitor_llm" + EVAL_SETTINGS_EXCLUDE_FAILED = "exclude_failed" + TOOL_SETTINGS = "tool_settings" + LOG_EVENTS_ID = "log_events_id" + CHALLENGE_LLM = "challenge_llm" + CHALLENGE = "challenge" + ENABLE_CHALLENGE = "enable_challenge" + EXTRACTION = "extraction" + SUMMARIZE = "summarize" + SINGLE_PASS_EXTRACTION = "single-pass-extraction" + SIMPLE_PROMPT_STUDIO = "simple-prompt-studio" + LLM_USAGE_REASON = "llm_usage_reason" + METADATA = "metadata" + OUTPUT = "output" + CONTEXT = "context" + INCLUDE_METADATA = "include_metadata" + TABLE = "table" + TABLE_SETTINGS = "table_settings" + EPILOGUE = "epilogue" + PLATFORM_POSTAMBLE = "platform_postamble" + WORD_CONFIDENCE_POSTAMBLE = "word_confidence_postamble" + HIGHLIGHT_DATA_PLUGIN = "highlight-data" + SUMMARIZE_AS_SOURCE = "summarize_as_source" + VARIABLE_MAP = "variable_map" + RECORD = "record" + CUSTOM_DATA = "custom_data" + TEXT = "text" + ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" + FILE_PATH = "file_path" + HIGHLIGHT_DATA = "highlight_data" + CONFIDENCE_DATA = "confidence_data" + WORD_CONFIDENCE_DATA = "word_confidence_data" + REQUIRED_FIELDS = "required_fields" + REQUIRED = "required" + EXECUTION_SOURCE = "execution_source" + METRICS = "metrics" + CAPTURE_METRICS = "capture_metrics" + LINE_ITEM = "line-item" + LINE_NUMBERS = "line_numbers" + WHISPER_HASH = "whisper_hash" + PAID_FEATURE_MSG = ( + "It is a cloud / enterprise feature. If you have purchased a plan and still " + "face this issue, please contact support" + ) + NO_CONTEXT_ERROR = ( + "Couldn't fetch context from vector DB. " + "This happens usually due to a delay by the Vector DB " + "provider to confirm writes to DB. " + "Please try again after some time" + ) + COMBINED_PROMPT = "combined_prompt" + TOOL = "tool" + JSON_POSTAMBLE = "JSON_POSTAMBLE" + DEFAULT_JSON_POSTAMBLE = "Wrap the final JSON result inbetween §§§ like below example:\n§§§\n\n§§§" + DOCUMENT_TYPE = "document_type" + # Webhook postprocessing settings + ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook" + POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url" + + +class RunLevel(Enum): + """Different stages of prompt execution. + + Comprises of prompt run and response evaluation stages. + """ + + RUN = "RUN" + EVAL = "EVAL" + CHALLENGE = "CHALLENGE" + TABLE_EXTRACTION = "TABLE_EXTRACTION" + + +class DBTableV2: + """Database tables.""" + + ORGANIZATION = "organization" + ADAPTER_INSTANCE = "adapter_instance" + PROMPT_STUDIO_REGISTRY = "prompt_studio_registry" + PLATFORM_KEY = "platform_key" + TOKEN_USAGE = "usage" + + +class FileStorageKeys: + """File storage keys.""" + + PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE" + TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE" + + +class FileStorageType(Enum): + """File storage type.""" + + PERMANENT = "permanent" + TEMPORARY = "temporary" + + +class ExecutionSource(Enum): + """Execution source.""" + + IDE = "ide" + TOOL = "tool" + + +class VariableType(str, Enum): + """Type of variable.""" + + STATIC = "STATIC" + DYNAMIC = "DYNAMIC" + CUSTOM_DATA = "CUSTOM_DATA" + + +class RetrievalStrategy(str, Enum): + """Available retrieval strategies for prompt service.""" + + SIMPLE = "simple" + SUBQUESTION = "subquestion" + FUSION = "fusion" + RECURSIVE = "recursive" + ROUTER = "router" + KEYWORD_TABLE = "keyword_table" + AUTOMERGING = "automerging" + + +class VariableConstants: + """Constants for variable extraction.""" + + VARIABLE_REGEX = "{{(.+?)}}" + DYNAMIC_VARIABLE_DATA_REGEX = r"\[(.*?)\]" + DYNAMIC_VARIABLE_URL_REGEX = ( + r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»" + "'']))" + ) # noqa: E501 + CUSTOM_DATA_VARIABLE_REGEX = r"custom_data\.([a-zA-Z0-9_\.]+)" + + +class IndexingConstants: + TOOL_ID = "tool_id" + EMBEDDING_INSTANCE_ID = "embedding_instance_id" + VECTOR_DB_INSTANCE_ID = "vector_db_instance_id" + X2TEXT_INSTANCE_ID = "x2text_instance_id" + FILE_PATH = "file_path" + CHUNK_SIZE = "chunk_size" + CHUNK_OVERLAP = "chunk_overlap" + REINDEX = "reindex" + FILE_HASH = "file_hash" + OUTPUT_FILE_PATH = "output_file_path" + ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" + USAGE_KWARGS = "usage_kwargs" + PROCESS_TEXT = "process_text" + EXTRACTED_TEXT = "extracted_text" + TAGS = "tags" + EXECUTION_SOURCE = "execution_source" + DOC_ID = "doc_id" + TOOL_EXECUTION_METATADA = "tool_execution_metadata" + EXECUTION_DATA_DIR = "execution_data_dir" + METADATA_FILE = "METADATA.json" diff --git a/workers/executor/executors/dto.py b/workers/executor/executors/dto.py new file mode 100644 index 0000000000..8c9e4f3d3c --- /dev/null +++ b/workers/executor/executors/dto.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class InstanceIdentifiers: + embedding_instance_id: str + vector_db_instance_id: str + x2text_instance_id: str + llm_instance_id: str + tool_id: str + tags: list[str] | None = None + + +@dataclass +class FileInfo: + file_path: str + file_hash: str + + +@dataclass +class ChunkingConfig: + chunk_size: int + chunk_overlap: int + + def __post_init__(self) -> None: + if self.chunk_size == 0: + raise ValueError( + "Indexing cannot be done for zero chunks." + "Please provide a valid chunk_size." + ) + + +@dataclass +class ProcessingOptions: + reindex: bool = False + enable_highlight: bool = False + enable_word_confidence: bool = False + usage_kwargs: dict[Any, Any] = field(default_factory=dict) diff --git a/workers/executor/executors/exceptions.py b/workers/executor/executors/exceptions.py new file mode 100644 index 0000000000..69cd0a8a16 --- /dev/null +++ b/workers/executor/executors/exceptions.py @@ -0,0 +1,79 @@ +"""Standalone exceptions for the legacy executor. + +Adapted from prompt-service exceptions. The Flask ``APIError`` base +class is replaced with ``LegacyExecutorError`` so these exceptions +work outside of Flask (i.e. inside the Celery executor worker). +""" + + +class LegacyExecutorError(Exception): + """Base exception for legacy executor errors. + + Replaces Flask's ``APIError`` — carries ``message`` and ``code`` + attributes so callers can map to ``ExecutionResult.failure()``. + """ + + code: int = 500 + message: str = "Internal executor error" + + def __init__(self, message: str | None = None, code: int | None = None): + if message is not None: + self.message = message + if code is not None: + self.code = code + super().__init__(self.message) + + +class BadRequest(LegacyExecutorError): + code = 400 + message = "Bad Request / No payload" + + +class RateLimitError(LegacyExecutorError): + code = 429 + message = "Running into rate limit errors, please try again later" + + +class MissingFieldError(LegacyExecutorError): + """Custom error for missing fields.""" + + def __init__(self, missing_fields: list[str]): + message = f"Missing required fields: {', '.join(missing_fields)}" + super().__init__(message=message) + + +class RetrievalError(LegacyExecutorError): + """Custom exception raised for errors during retrieval from VectorDB.""" + + DEFAULT_MESSAGE = ( + "Error while retrieving data from the VectorDB. " + "Please contact the admin for further assistance." + ) + + +class ExtractionError(LegacyExecutorError): + DEFAULT_MESSAGE = "Error while extracting from a document" + + +class UnprocessableEntity(LegacyExecutorError): + code = 422 + message = "Unprocessable Entity" + + +class CustomDataError(LegacyExecutorError): + """Custom exception raised for errors with custom_data variables.""" + + code = 400 + + def __init__(self, variable: str, reason: str, is_ide: bool = True): + if is_ide: + help_text = "Please define this key in Prompt Studio Settings > Custom Data." + else: + help_text = ( + "Please include this key in the 'custom_data' field of your API request." + ) + variable_display = "{{custom_data." + variable + "}}" + message = ( + f"Custom data error for variable '{variable_display}': {reason} {help_text}" + ) + super().__init__(message=message) diff --git a/workers/executor/executors/file_utils.py b/workers/executor/executors/file_utils.py new file mode 100644 index 0000000000..3741aa26d4 --- /dev/null +++ b/workers/executor/executors/file_utils.py @@ -0,0 +1,39 @@ +"""File storage utilities for the legacy executor. + +Adapted from ``prompt-service/.../utils/file_utils.py``. +Returns the appropriate ``FileStorage`` instance based on execution source. +""" + +from executor.executors.constants import ExecutionSource, FileStorageKeys +from unstract.sdk1.file_storage import FileStorage +from unstract.sdk1.file_storage.constants import StorageType +from unstract.sdk1.file_storage.env_helper import EnvHelper + + +class FileUtils: + @staticmethod + def get_fs_instance(execution_source: str) -> FileStorage: + """Returns a FileStorage instance based on the execution source. + + Args: + execution_source: The source from which the execution is triggered. + + Returns: + FileStorage: The file storage instance — Permanent/Shared temporary. + + Raises: + ValueError: If the execution source is invalid. + """ + if execution_source == ExecutionSource.IDE.value: + return EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + + if execution_source == ExecutionSource.TOOL.value: + return EnvHelper.get_storage( + storage_type=StorageType.SHARED_TEMPORARY, + env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE, + ) + + raise ValueError(f"Invalid execution source: {execution_source}") diff --git a/workers/executor/executors/index.py b/workers/executor/executors/index.py new file mode 100644 index 0000000000..cb4de85a11 --- /dev/null +++ b/workers/executor/executors/index.py @@ -0,0 +1,221 @@ +"""Indexing logic for the legacy executor. + +Adapted from ``prompt-service/.../core/index_v2.py``. +Performs document chunking and vector DB indexing. + +Heavy dependencies (``llama_index``, ``openai``, vectordb adapters) +are imported lazily inside methods to avoid protobuf descriptor +conflicts at test-collection time. +""" + +from __future__ import annotations + +import json +import logging +from typing import TYPE_CHECKING, Any + +from executor.executors.dto import ( + ChunkingConfig, + FileInfo, + InstanceIdentifiers, + ProcessingOptions, +) +from unstract.sdk1.constants import LogLevel +from unstract.sdk1.exceptions import SdkError, parse_litellm_err +from unstract.sdk1.file_storage.impl import FileStorage +from unstract.sdk1.file_storage.provider import FileStorageProvider +from unstract.sdk1.platform import PlatformHelper as ToolAdapter +from unstract.sdk1.tool.stream import StreamMixin +from unstract.sdk1.utils.tool import ToolUtils + +if TYPE_CHECKING: + from unstract.sdk1.embedding import Embedding + from unstract.sdk1.vector_db import VectorDB + +logger = logging.getLogger(__name__) + + +class Index: + def __init__( + self, + tool: StreamMixin, + instance_identifiers: InstanceIdentifiers, + chunking_config: ChunkingConfig, + processing_options: ProcessingOptions, + run_id: str | None = None, + capture_metrics: bool = False, + ): + self.tool = tool + self._run_id = run_id + self._capture_metrics = capture_metrics + self.instance_identifiers = instance_identifiers + self.chunking_config = chunking_config + self.processing_options = processing_options + self._metrics = {} + + def generate_index_key( + self, + file_info: FileInfo, + fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL), + ) -> str: + """Generate a unique index key for document indexing.""" + if not file_info.file_path and not file_info.file_hash: + raise ValueError("One of `file_path` or `file_hash` need to be provided") + + file_hash = file_info.file_hash + if not file_hash: + file_hash = fs.get_hash_from_file(path=file_info.file_path) + + index_key = { + "file_hash": file_hash, + "vector_db_config": ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.vector_db_instance_id + ), + "embedding_config": ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.embedding_instance_id + ), + "x2text_config": ToolAdapter.get_adapter_config( + self.tool, self.instance_identifiers.x2text_instance_id + ), + "chunk_size": str(self.chunking_config.chunk_size), + "chunk_overlap": str(self.chunking_config.chunk_overlap), + } + hashed_index_key = ToolUtils.hash_str(json.dumps(index_key, sort_keys=True)) + return hashed_index_key + + def is_document_indexed( + self, + doc_id: str, + embedding: Embedding, + vector_db: VectorDB, + ) -> bool: + """Check if nodes are already present in the vector DB for a doc_id.""" + from llama_index.core.vector_stores import ( + FilterOperator, + MetadataFilter, + MetadataFilters, + VectorStoreQuery, + VectorStoreQueryResult, + ) + + doc_id_eq_filter = MetadataFilter.from_dict( + {"key": "doc_id", "operator": FilterOperator.EQ, "value": doc_id} + ) + filters = MetadataFilters(filters=[doc_id_eq_filter]) + q = VectorStoreQuery( + query_embedding=embedding.get_query_embedding(" "), + doc_ids=[doc_id], + filters=filters, + ) + + doc_id_found = False + try: + n: VectorStoreQueryResult = vector_db.query(query=q) + if len(n.nodes) > 0: + doc_id_found = True + self.tool.stream_log(f"Found {len(n.nodes)} nodes for {doc_id}") + else: + self.tool.stream_log(f"No nodes found for {doc_id}") + except Exception as e: + logger.warning( + f"Error querying {self.instance_identifiers.vector_db_instance_id}:" + f" {str(e)}, proceeding to index", + exc_info=True, + ) + + if doc_id_found and not self.processing_options.reindex: + self.tool.stream_log(f"File was indexed already under {doc_id}") + return doc_id_found + + return doc_id_found + + def perform_indexing( + self, + vector_db: VectorDB, + doc_id: str, + extracted_text: str, + doc_id_found: bool, + ) -> str: + from unstract.sdk1.adapters.vectordb.no_op.src.no_op_custom_vectordb import ( + NoOpCustomVectorDB, + ) + + if isinstance( + vector_db.get_vector_db( + adapter_instance_id=self.instance_identifiers.vector_db_instance_id, + embedding_dimension=1, + ), + (NoOpCustomVectorDB), + ): + return doc_id + + self.tool.stream_log("Indexing file...") + full_text = [ + { + "section": "full", + "text_contents": str(extracted_text), + } + ] + documents = self._prepare_documents(doc_id, full_text) + if self.processing_options.reindex and doc_id_found: + self.delete_nodes(vector_db, doc_id) + self._trigger_indexing(vector_db, documents) + return doc_id + + def _trigger_indexing(self, vector_db: Any, documents: list) -> None: + import openai + + self.tool.stream_log("Adding nodes to vector db...") + try: + vector_db.index_document( + documents, + chunk_size=self.chunking_config.chunk_size, + chunk_overlap=self.chunking_config.chunk_overlap, + show_progress=True, + ) + self.tool.stream_log("File has been indexed successfully") + except openai.OpenAIError as e: + e = parse_litellm_err(e) + raise e + except Exception as e: + self.tool.stream_log( + f"Error adding nodes to vector db: {e}", + level=LogLevel.ERROR, + ) + raise e + + def delete_nodes(self, vector_db: Any, doc_id: str) -> None: + try: + vector_db.delete(ref_doc_id=doc_id) + self.tool.stream_log(f"Deleted nodes for {doc_id}") + except Exception as e: + self.tool.stream_log( + f"Error deleting nodes for {doc_id}: {e}", + level=LogLevel.ERROR, + ) + raise SdkError(f"Error deleting nodes for {doc_id}: {e}") from e + + def _prepare_documents(self, doc_id: str, full_text: Any) -> list: + from llama_index.core import Document + + documents = [] + try: + for item in full_text: + text = item["text_contents"] + document = Document( + text=text, + doc_id=doc_id, + metadata={"section": item["section"]}, + ) + document.id_ = doc_id + documents.append(document) + self.tool.stream_log(f"Number of documents: {len(documents)}") + return documents + except Exception as e: + self.tool.stream_log( + f"Error while processing documents {doc_id}: {e}", + level=LogLevel.ERROR, + ) + raise SdkError( + f"Error while processing documents for indexing {doc_id}: {e}" + ) from e diff --git a/workers/executor/executors/json_repair_helper.py b/workers/executor/executors/json_repair_helper.py new file mode 100644 index 0000000000..f1cf17c0b0 --- /dev/null +++ b/workers/executor/executors/json_repair_helper.py @@ -0,0 +1,63 @@ +"""JSON repair utility functions. + +Copied from prompt-service/.../utils/json_repair_helper.py — already Flask-free. +""" + +import json +from typing import Any + + +def repair_json_with_best_structure(json_str: str) -> Any: + """Intelligently repair JSON string using the best parsing strategy. + + Attempts to parse as valid JSON first, then falls back to basic repair + heuristics. The full ``json_repair`` library is used when available for + more aggressive repair. + + Args: + json_str: The JSON string to repair + + Returns: + The parsed JSON object with the best structure + """ + # Fast path — try strict JSON first + try: + return json.loads(json_str) + except (json.JSONDecodeError, ValueError): + pass + + # Try to import json_repair for advanced repair + try: + from json_repair import repair_json + + parsed_as_is = repair_json( + json_str=json_str, return_objects=True, ensure_ascii=False + ) + parsed_with_wrap = repair_json( + json_str="[" + json_str, return_objects=True, ensure_ascii=False + ) + + if isinstance(parsed_as_is, str) and isinstance(parsed_with_wrap, str): + return parsed_as_is + if isinstance(parsed_as_is, str): + return parsed_with_wrap + if isinstance(parsed_with_wrap, str): + return parsed_as_is + + if ( + isinstance(parsed_with_wrap, list) + and len(parsed_with_wrap) == 1 + and parsed_with_wrap[0] == parsed_as_is + ): + return parsed_as_is + + if isinstance(parsed_as_is, (dict, list)): + if isinstance(parsed_with_wrap, list) and len(parsed_with_wrap) > 1: + return parsed_with_wrap + else: + return parsed_as_is + + return parsed_with_wrap + except ImportError: + # json_repair not installed — return the raw string + return json_str diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py new file mode 100644 index 0000000000..64f2d7c137 --- /dev/null +++ b/workers/executor/executors/legacy_executor.py @@ -0,0 +1,925 @@ +"""Legacy executor — migrates the prompt-service pipeline. + +Phase 2A scaffolds the class with operation routing. +Phase 2B implements ``_handle_extract`` (text extraction via x2text). +Phase 2C implements ``_handle_index`` (vector DB indexing). +Remaining handler methods raise ``NotImplementedError`` and are filled +in by phases 2D–2H. +""" + +import logging +from pathlib import Path +from typing import Any + +from executor.executor_tool_shim import ExecutorToolShim +from executor.executors.constants import ExecutionSource, IndexingConstants as IKeys +from executor.executors.dto import ( + ChunkingConfig, + FileInfo, + InstanceIdentifiers, + ProcessingOptions, +) +from executor.executors.exceptions import ExtractionError, LegacyExecutorError +from executor.executors.file_utils import FileUtils +from unstract.sdk1.adapters.exceptions import AdapterError +from unstract.sdk1.adapters.x2text.constants import X2TextConstants +from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer +from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult +from unstract.sdk1.utils.tool import ToolUtils +from unstract.sdk1.x2txt import TextExtractionResult, X2Text + +logger = logging.getLogger(__name__) + + +@ExecutorRegistry.register +class LegacyExecutor(BaseExecutor): + """Executor that wraps the full prompt-service extraction pipeline. + + Routes incoming ``ExecutionContext`` requests to the appropriate + handler method based on the ``Operation`` enum. Each handler + corresponds to one of the original prompt-service HTTP endpoints. + """ + + # Maps Operation enum values to handler method names. + _OPERATION_MAP: dict[str, str] = { + Operation.EXTRACT.value: "_handle_extract", + Operation.INDEX.value: "_handle_index", + Operation.ANSWER_PROMPT.value: "_handle_answer_prompt", + Operation.SINGLE_PASS_EXTRACTION.value: "_handle_single_pass_extraction", + Operation.SUMMARIZE.value: "_handle_summarize", + Operation.AGENTIC_EXTRACTION.value: "_handle_agentic_extraction", + } + + @property + def name(self) -> str: + return "legacy" + + def execute(self, context: ExecutionContext) -> ExecutionResult: + """Route to the handler for ``context.operation``. + + Returns: + ``ExecutionResult`` on success or for unsupported operations. + ``LegacyExecutorError`` subclasses are caught and mapped to + ``ExecutionResult.failure()`` so callers always get a result. + + Raises: + NotImplementedError: From stub handlers (until 2D–2H). + """ + handler_name = self._OPERATION_MAP.get(context.operation) + if handler_name is None: + return ExecutionResult.failure( + error=( + f"LegacyExecutor does not support operation " + f"'{context.operation}'" + ) + ) + + handler = getattr(self, handler_name) + logger.info( + "LegacyExecutor routing operation=%s to %s " + "(run_id=%s request_id=%s)", + context.operation, + handler_name, + context.run_id, + context.request_id, + ) + try: + return handler(context) + except LegacyExecutorError as exc: + logger.warning( + "Handler %s raised %s: %s", + handler_name, + type(exc).__name__, + exc.message, + ) + return ExecutionResult.failure(error=exc.message) + + # ------------------------------------------------------------------ + # Phase 2B — Extract handler + # ------------------------------------------------------------------ + + def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.EXTRACT`` — text extraction via x2text. + + Migrated from ``ExtractionService.perform_extraction()`` in + ``prompt-service/.../services/extraction.py``. + + Returns: + ExecutionResult with ``data`` containing ``extracted_text``. + """ + params: dict[str, Any] = context.executor_params + + # Required params + x2text_instance_id: str = params.get(IKeys.X2TEXT_INSTANCE_ID, "") + file_path: str = params.get(IKeys.FILE_PATH, "") + platform_api_key: str = params.get("platform_api_key", "") + + if not x2text_instance_id or not file_path: + missing = [] + if not x2text_instance_id: + missing.append(IKeys.X2TEXT_INSTANCE_ID) + if not file_path: + missing.append(IKeys.FILE_PATH) + return ExecutionResult.failure( + error=f"Missing required params: {', '.join(missing)}" + ) + + # Optional params + output_file_path: str | None = params.get(IKeys.OUTPUT_FILE_PATH) + enable_highlight: bool = params.get(IKeys.ENABLE_HIGHLIGHT, False) + usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) + tags: list[str] | None = params.get(IKeys.TAGS) + execution_source: str = context.execution_source + tool_exec_metadata: dict[str, Any] = params.get( + IKeys.TOOL_EXECUTION_METATADA, {} + ) + execution_data_dir: str | None = params.get(IKeys.EXECUTION_DATA_DIR) + + # Build adapter shim and X2Text + shim = ExecutorToolShim(platform_api_key=platform_api_key) + x2text = X2Text( + tool=shim, + adapter_instance_id=x2text_instance_id, + usage_kwargs=usage_kwargs, + ) + fs = FileUtils.get_fs_instance(execution_source=execution_source) + + try: + if enable_highlight and isinstance( + x2text.x2text_instance, (LLMWhisperer, LLMWhispererV2) + ): + process_response: TextExtractionResult = x2text.process( + input_file_path=file_path, + output_file_path=output_file_path, + enable_highlight=enable_highlight, + tags=tags, + fs=fs, + ) + self._update_exec_metadata( + fs=fs, + execution_source=execution_source, + tool_exec_metadata=tool_exec_metadata, + execution_data_dir=execution_data_dir, + process_response=process_response, + ) + else: + process_response = x2text.process( + input_file_path=file_path, + output_file_path=output_file_path, + tags=tags, + fs=fs, + ) + + return ExecutionResult( + success=True, + data={IKeys.EXTRACTED_TEXT: process_response.extracted_text}, + ) + except AdapterError as e: + name = x2text.x2text_instance.get_name() + msg = f"Error from text extractor '{name}'. {e}" + raise ExtractionError(message=msg) from e + + @staticmethod + def _update_exec_metadata( + fs: Any, + execution_source: str, + tool_exec_metadata: dict[str, Any] | None, + execution_data_dir: str | None, + process_response: TextExtractionResult, + ) -> None: + """Write whisper_hash metadata for tool-sourced executions.""" + if execution_source != ExecutionSource.TOOL.value: + return + whisper_hash = process_response.extraction_metadata.whisper_hash + metadata = {X2TextConstants.WHISPER_HASH: whisper_hash} + if tool_exec_metadata is not None: + for key, value in metadata.items(): + tool_exec_metadata[key] = value + metadata_path = str(Path(execution_data_dir) / IKeys.METADATA_FILE) + ToolUtils.dump_json( + file_to_dump=metadata_path, + json_to_dump=metadata, + fs=fs, + ) + + @staticmethod + def _get_indexing_deps(): + """Lazy-import heavy indexing dependencies. + + These imports trigger llama_index/qdrant/protobuf loading, + so they must not happen at module-collection time (tests). + Wrapped in a method so tests can mock it cleanly. + """ + from executor.executors.index import Index + from unstract.sdk1.embedding import EmbeddingCompat + from unstract.sdk1.vector_db import VectorDB + + return Index, EmbeddingCompat, VectorDB + + # ------------------------------------------------------------------ + # Phase 2C — Index handler + # ------------------------------------------------------------------ + + def _handle_index(self, context: ExecutionContext) -> ExecutionResult: + """Handle ``Operation.INDEX`` — vector DB indexing. + + Migrated from ``IndexingService.index()`` in + ``prompt-service/.../services/indexing.py``. + + Returns: + ExecutionResult with ``data`` containing ``doc_id``. + """ + params: dict[str, Any] = context.executor_params + + # Required params + embedding_instance_id: str = params.get(IKeys.EMBEDDING_INSTANCE_ID, "") + vector_db_instance_id: str = params.get(IKeys.VECTOR_DB_INSTANCE_ID, "") + x2text_instance_id: str = params.get(IKeys.X2TEXT_INSTANCE_ID, "") + file_path: str = params.get(IKeys.FILE_PATH, "") + extracted_text: str = params.get(IKeys.EXTRACTED_TEXT, "") + platform_api_key: str = params.get("platform_api_key", "") + + missing = [] + if not embedding_instance_id: + missing.append(IKeys.EMBEDDING_INSTANCE_ID) + if not vector_db_instance_id: + missing.append(IKeys.VECTOR_DB_INSTANCE_ID) + if not x2text_instance_id: + missing.append(IKeys.X2TEXT_INSTANCE_ID) + if not file_path: + missing.append(IKeys.FILE_PATH) + if missing: + return ExecutionResult.failure( + error=f"Missing required params: {', '.join(missing)}" + ) + + # Optional params + tool_id: str = params.get(IKeys.TOOL_ID, "") + file_hash: str | None = params.get(IKeys.FILE_HASH) + chunk_size: int = params.get(IKeys.CHUNK_SIZE, 512) + chunk_overlap: int = params.get(IKeys.CHUNK_OVERLAP, 128) + reindex: bool = params.get(IKeys.REINDEX, False) + enable_highlight: bool = params.get(IKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence: bool = params.get( + IKeys.ENABLE_WORD_CONFIDENCE, False + ) + usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) + tags: list[str] | None = params.get(IKeys.TAGS) + execution_source: str = context.execution_source + + instance_ids = InstanceIdentifiers( + embedding_instance_id=embedding_instance_id, + vector_db_instance_id=vector_db_instance_id, + x2text_instance_id=x2text_instance_id, + tool_id=tool_id, + tags=tags, + llm_instance_id=None, + ) + file_info = FileInfo(file_path=file_path, file_hash=file_hash) + processing_options = ProcessingOptions( + reindex=reindex, + enable_highlight=enable_highlight, + enable_word_confidence=enable_word_confidence, + usage_kwargs=usage_kwargs, + ) + + shim = ExecutorToolShim(platform_api_key=platform_api_key) + fs_instance = FileUtils.get_fs_instance( + execution_source=execution_source + ) + + # Skip indexing when chunk_size is 0 — no vector operations needed. + # ChunkingConfig raises ValueError for 0, so handle before DTO. + if chunk_size == 0: + from unstract.sdk1.utils.indexing import IndexingUtils + + doc_id = IndexingUtils.generate_index_key( + vector_db=vector_db_instance_id, + embedding=embedding_instance_id, + x2text=x2text_instance_id, + chunk_size=str(chunk_size), + chunk_overlap=str(chunk_overlap), + tool=shim, + file_path=file_path, + file_hash=file_hash, + fs=fs_instance, + ) + logger.info("Skipping indexing for chunk_size=0. Doc ID: %s", doc_id) + return ExecutionResult( + success=True, data={IKeys.DOC_ID: doc_id} + ) + + chunking_config = ChunkingConfig( + chunk_size=chunk_size, chunk_overlap=chunk_overlap + ) + + Index, EmbeddingCompat, VectorDB = self._get_indexing_deps() + + vector_db = None + try: + index = Index( + tool=shim, + run_id=context.run_id, + capture_metrics=True, + instance_identifiers=instance_ids, + chunking_config=chunking_config, + processing_options=processing_options, + ) + doc_id = index.generate_index_key( + file_info=file_info, fs=fs_instance + ) + + embedding = EmbeddingCompat( + adapter_instance_id=embedding_instance_id, + tool=shim, + kwargs={**usage_kwargs}, + ) + vector_db = VectorDB( + tool=shim, + adapter_instance_id=vector_db_instance_id, + embedding=embedding, + ) + + doc_id_found = index.is_document_indexed( + doc_id=doc_id, embedding=embedding, vector_db=vector_db + ) + index.perform_indexing( + vector_db=vector_db, + doc_id=doc_id, + extracted_text=extracted_text, + doc_id_found=doc_id_found, + ) + return ExecutionResult( + success=True, data={IKeys.DOC_ID: doc_id} + ) + except Exception as e: + status_code = getattr(e, "status_code", 500) + raise LegacyExecutorError( + message=f"Error while indexing: {e}", code=status_code + ) from e + finally: + if vector_db is not None: + vector_db.close() + + @staticmethod + def _get_prompt_deps(): + """Lazy-import heavy dependencies for answer_prompt processing. + + These imports trigger llama_index/protobuf loading so they must + not happen at module-collection time (tests). + """ + from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.index import Index + from executor.executors.retrieval import RetrievalService + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + from unstract.sdk1.embedding import EmbeddingCompat + from unstract.sdk1.llm import LLM + from unstract.sdk1.vector_db import VectorDB + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM, + EmbeddingCompat, + VectorDB, + ) + + @staticmethod + def _sanitize_null_values( + structured_output: dict[str, Any], + ) -> dict[str, Any]: + """Replace 'NA' strings with None in structured output.""" + for k, v in structured_output.items(): + if isinstance(v, str) and v.lower() == "na": + structured_output[k] = None + elif isinstance(v, list): + for i in range(len(v)): + if isinstance(v[i], str) and v[i].lower() == "na": + v[i] = None + elif isinstance(v[i], dict): + for k1, v1 in v[i].items(): + if isinstance(v1, str) and v1.lower() == "na": + v[i][k1] = None + elif isinstance(v, dict): + for k1, v1 in v.items(): + if isinstance(v1, str) and v1.lower() == "na": + v[k1] = None + return structured_output + + def _handle_answer_prompt( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.ANSWER_PROMPT`` — multi-prompt extraction. + + Migrated from ``prompt_processor()`` in the prompt-service + ``answer_prompt`` controller. Processes all prompts in the + payload: variable replacement, context retrieval, LLM + completion, and type-specific post-processing. + + Returns: + ExecutionResult with ``data`` containing:: + + {"output": dict, "metadata": dict, "metrics": dict} + """ + from executor.executors.constants import ( + PromptServiceConstants as PSKeys, + RetrievalStrategy, + ) + + params: dict[str, Any] = context.executor_params + + # ---- Unpack payload ------------------------------------------------ + tool_settings = params.get(PSKeys.TOOL_SETTINGS, {}) + prompts = params.get(PSKeys.OUTPUTS, []) + tool_id: str = params.get(PSKeys.TOOL_ID, "") + run_id: str = context.run_id + execution_id: str = params.get(PSKeys.EXECUTION_ID, "") + file_hash = params.get(PSKeys.FILE_HASH) + file_path = params.get(PSKeys.FILE_PATH) + doc_name = str(params.get(PSKeys.FILE_NAME, "")) + log_events_id: str = params.get(PSKeys.LOG_EVENTS_ID, "") + custom_data: dict[str, Any] = params.get(PSKeys.CUSTOM_DATA, {}) + execution_source = params.get( + PSKeys.EXECUTION_SOURCE, context.execution_source + ) + platform_api_key: str = params.get( + PSKeys.PLATFORM_SERVICE_API_KEY, "" + ) + + structured_output: dict[str, Any] = {} + metadata: dict[str, Any] = { + PSKeys.RUN_ID: run_id, + PSKeys.FILE_NAME: doc_name, + PSKeys.CONTEXT: {}, + PSKeys.REQUIRED_FIELDS: {}, + } + metrics: dict[str, Any] = {} + variable_names: list[str] = [] + context_retrieval_metrics: dict[str, Any] = {} + + # Lazy imports + ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + _Index, # unused — doc_id via IndexingUtils + LLM, + EmbeddingCompat, + VectorDB, + ) = self._get_prompt_deps() + + # ---- First pass: collect variable names + required fields ---------- + for output in prompts: + variable_names.append(output[PSKeys.NAME]) + metadata[PSKeys.REQUIRED_FIELDS][output[PSKeys.NAME]] = output.get( + PSKeys.REQUIRED, None + ) + + # ---- Process each prompt ------------------------------------------- + for output in prompts: + prompt_name = output[PSKeys.NAME] + prompt_text = output[PSKeys.PROMPT] + chunk_size = output[PSKeys.CHUNK_SIZE] + + logger.info("[%s] chunk size: %s", tool_id, chunk_size) + + shim = ExecutorToolShim(platform_api_key=platform_api_key) + + # {{variable}} template replacement + if VariableReplacementService.is_variables_present( + prompt_text=prompt_text + ): + is_ide = execution_source == "ide" + prompt_text = ( + VariableReplacementService.replace_variables_in_prompt( + prompt=output, + structured_output=structured_output, + log_events_id=log_events_id, + tool_id=tool_id, + prompt_name=prompt_name, + doc_name=doc_name, + custom_data=custom_data, + is_ide=is_ide, + ) + ) + + logger.info("[%s] Executing prompt: '%s'", tool_id, prompt_name) + + # %variable% replacement + output[PSKeys.PROMPTX] = AnswerPromptService.extract_variable( + structured_output, variable_names, output, prompt_text + ) + + # Generate doc_id (standalone util — no Index DTOs needed) + from unstract.sdk1.utils.indexing import IndexingUtils + + doc_id = IndexingUtils.generate_index_key( + vector_db=output[PSKeys.VECTOR_DB], + embedding=output[PSKeys.EMBEDDING], + x2text=output[PSKeys.X2TEXT_ADAPTER], + chunk_size=str(output[PSKeys.CHUNK_SIZE]), + chunk_overlap=str(output[PSKeys.CHUNK_OVERLAP]), + tool=shim, + file_hash=file_hash, + file_path=file_path, + ) + + # Create adapters + try: + usage_kwargs = { + "run_id": run_id, + "execution_id": execution_id, + } + llm = LLM( + adapter_instance_id=output[PSKeys.LLM], + tool=shim, + usage_kwargs={ + **usage_kwargs, + PSKeys.LLM_USAGE_REASON: PSKeys.EXTRACTION, + }, + capture_metrics=True, + ) + embedding = None + vector_db = None + if chunk_size > 0: + embedding = EmbeddingCompat( + adapter_instance_id=output[PSKeys.EMBEDDING], + tool=shim, + kwargs={**usage_kwargs}, + ) + vector_db = VectorDB( + tool=shim, + adapter_instance_id=output[PSKeys.VECTOR_DB], + embedding=embedding, + ) + except Exception as e: + msg = f"Couldn't fetch adapter. {e}" + logger.error(msg) + status_code = getattr(e, "status_code", None) or 500 + raise LegacyExecutorError( + message=msg, code=status_code + ) from e + + # TABLE and LINE_ITEM types require plugins not yet available + if output[PSKeys.TYPE] == PSKeys.TABLE: + raise LegacyExecutorError( + message=( + "TABLE extraction requires plugins not yet " + "available in the executor worker." + ) + ) + if output[PSKeys.TYPE] == PSKeys.LINE_ITEM: + raise LegacyExecutorError( + message=( + "LINE_ITEM extraction requires plugins not yet " + "available in the executor worker." + ) + ) + + # ---- Retrieval + Answer ---------------------------------------- + context_list: list[str] = [] + try: + answer = "NA" + retrieval_strategy = output.get(PSKeys.RETRIEVAL_STRATEGY) + valid_strategies = {s.value for s in RetrievalStrategy} + + if retrieval_strategy in valid_strategies: + logger.info( + "[%s] Performing retrieval for: %s", + tool_id, + file_path, + ) + if chunk_size == 0: + context_list = ( + RetrievalService.retrieve_complete_context( + execution_source=execution_source, + file_path=file_path, + context_retrieval_metrics=context_retrieval_metrics, + prompt_key=prompt_name, + ) + ) + else: + context_list = RetrievalService.run_retrieval( + output=output, + doc_id=doc_id, + llm=llm, + vector_db=vector_db, + retrieval_type=retrieval_strategy, + context_retrieval_metrics=context_retrieval_metrics, + ) + metadata[PSKeys.CONTEXT][prompt_name] = context_list + + # Run prompt with retrieved context + answer = AnswerPromptService.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=llm, + context="\n".join(context_list), + prompt=PSKeys.PROMPTX, + metadata=metadata, + execution_source=execution_source, + file_path=file_path, + ) + else: + logger.info( + "Invalid retrieval strategy: %s", retrieval_strategy + ) + + # ---- Type-specific post-processing ------------------------- + self._apply_type_conversion( + output=output, + answer=answer, + structured_output=structured_output, + llm=llm, + tool_settings=tool_settings, + metadata=metadata, + execution_source=execution_source, + file_path=file_path, + log_events_id=log_events_id, + tool_id=tool_id, + doc_name=doc_name, + ) + + # Strip trailing newline + val = structured_output.get(prompt_name) + if isinstance(val, str): + structured_output[prompt_name] = val.rstrip("\n") + + finally: + # Collect metrics + metrics.setdefault(prompt_name, {}).update( + { + "context_retrieval": context_retrieval_metrics.get( + prompt_name, {} + ), + f"{llm.get_usage_reason()}_llm": llm.get_metrics(), + } + ) + if vector_db: + vector_db.close() + + # ---- Sanitize null values ------------------------------------------ + structured_output = self._sanitize_null_values(structured_output) + + return ExecutionResult( + success=True, + data={ + PSKeys.OUTPUT: structured_output, + PSKeys.METADATA: metadata, + PSKeys.METRICS: metrics, + }, + ) + + @staticmethod + def _apply_type_conversion( + output: dict[str, Any], + answer: str, + structured_output: dict[str, Any], + llm: Any, + tool_settings: dict[str, Any], + metadata: dict[str, Any], + execution_source: str, + file_path: str, + log_events_id: str = "", + tool_id: str = "", + doc_name: str = "", + ) -> None: + """Apply type-specific conversion to the LLM answer. + + Handles NUMBER, EMAIL, DATE, BOOLEAN, JSON, and TEXT types. + """ + from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.constants import PromptServiceConstants as PSKeys + + prompt_name = output[PSKeys.NAME] + output_type = output[PSKeys.TYPE] + + if output_type == PSKeys.NUMBER: + if answer.lower() == "na": + structured_output[prompt_name] = None + else: + prompt = ( + f"Extract the number from the following " + f"text:\n{answer}\n\nOutput just the number. " + f"If the number is expressed in millions " + f"or thousands, expand the number to its numeric value " + f"The number should be directly assignable " + f"to a numeric variable. " + f"It should not have any commas, " + f"percentages or other grouping " + f"characters. No explanation is required. " + f"If you cannot extract the number, output 0." + ) + answer = AnswerPromptService.run_completion( + llm=llm, prompt=prompt + ) + try: + structured_output[prompt_name] = float(answer) + except Exception: + structured_output[prompt_name] = None + + elif output_type == PSKeys.EMAIL: + if answer.lower() == "na": + structured_output[prompt_name] = None + else: + prompt = ( + f"Extract the email from the following text:\n{answer}" + f"\n\nOutput just the email. " + f"The email should be directly assignable to a string " + f"variable. No explanation is required. If you cannot " + f'extract the email, output "NA".' + ) + answer = AnswerPromptService.run_completion( + llm=llm, prompt=prompt + ) + structured_output[prompt_name] = answer + + elif output_type == PSKeys.DATE: + if answer.lower() == "na": + structured_output[prompt_name] = None + else: + prompt = ( + f"Extract the date from the following text:\n{answer}" + f"\n\nOutput just the date. " + f"The date should be in ISO date time format. " + f"No explanation is required. The date should be " + f"directly assignable to a date variable. " + f'If you cannot convert the string into a date, ' + f'output "NA".' + ) + answer = AnswerPromptService.run_completion( + llm=llm, prompt=prompt + ) + structured_output[prompt_name] = answer + + elif output_type == PSKeys.BOOLEAN: + if answer.lower() == "na": + structured_output[prompt_name] = None + else: + prompt = ( + f"Extract yes/no from the following text:\n{answer}\n\n" + f"Output in single word. " + f"If the context is trying to convey that the answer " + f'is true, then return "yes", else return "no".' + ) + answer = AnswerPromptService.run_completion( + llm=llm, prompt=prompt + ) + structured_output[prompt_name] = answer.lower() == "yes" + + elif output_type == PSKeys.JSON: + AnswerPromptService.handle_json( + answer=answer, + structured_output=structured_output, + output=output, + llm=llm, + enable_highlight=tool_settings.get( + PSKeys.ENABLE_HIGHLIGHT, False + ), + enable_word_confidence=tool_settings.get( + PSKeys.ENABLE_WORD_CONFIDENCE, False + ), + execution_source=execution_source, + metadata=metadata, + file_path=file_path, + log_events_id=log_events_id, + tool_id=tool_id, + doc_name=doc_name, + ) + + else: + # TEXT or any other type — store raw answer + structured_output[prompt_name] = answer + + def _handle_single_pass_extraction( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.SINGLE_PASS_EXTRACTION``. + + Functionally identical to ``_handle_answer_prompt``. The "single + pass" vs "multi pass" distinction is at the *caller* level (the + structure tool batches all prompts into one request vs iterating). + The prompt-service processes both with the same ``prompt_processor`` + handler. + + Returns: + ExecutionResult with ``data`` containing:: + + {"output": dict, "metadata": dict, "metrics": dict} + """ + logger.info( + "single_pass_extraction delegating to answer_prompt " + "(run_id=%s)", + context.run_id, + ) + return self._handle_answer_prompt(context) + + def _handle_summarize( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.SUMMARIZE`` — document summarization. + + Called by the structure tool when ``summarize_as_source`` is + enabled. Takes the full extracted document text and a + user-provided summarize prompt, runs LLM completion, and + returns the summarized text. + + Expected ``executor_params`` keys: + - ``llm_adapter_instance_id`` — LLM adapter to use + - ``summarize_prompt`` — user's summarize instruction + - ``context`` — full document text to summarize + - ``prompt_keys`` — list of field names to focus on + - ``PLATFORM_SERVICE_API_KEY`` — auth key for adapters + + Returns: + ExecutionResult with ``data`` containing:: + + {"data": str} # summarized text + """ + from executor.executors.constants import PromptServiceConstants as PSKeys + + params: dict[str, Any] = context.executor_params + + llm_adapter_id: str = params.get("llm_adapter_instance_id", "") + summarize_prompt: str = params.get("summarize_prompt", "") + doc_context: str = params.get(PSKeys.CONTEXT, "") + prompt_keys: list[str] = params.get("prompt_keys", []) + platform_api_key: str = params.get( + PSKeys.PLATFORM_SERVICE_API_KEY, "" + ) + + if not llm_adapter_id: + return ExecutionResult.failure( + error="Missing required param: llm_adapter_instance_id" + ) + if not doc_context: + return ExecutionResult.failure( + error="Missing required param: context" + ) + + # Build the summarize prompt + prompt = f"{summarize_prompt}\n\n" + if prompt_keys: + prompt += ( + f"Focus on these fields: {', '.join(prompt_keys)}\n\n" + ) + prompt += ( + f"Context:\n---------------\n{doc_context}\n" + f"-----------------\n\nSummary:" + ) + + shim = ExecutorToolShim(platform_api_key=platform_api_key) + usage_kwargs = {"run_id": context.run_id} + + _, _, _, _, LLM, _, _ = self._get_prompt_deps() + + try: + llm = LLM( + adapter_instance_id=llm_adapter_id, + tool=shim, + usage_kwargs={**usage_kwargs}, + ) + from executor.executors.answer_prompt import AnswerPromptService + + summary = AnswerPromptService.run_completion( + llm=llm, prompt=prompt + ) + return ExecutionResult( + success=True, + data={"data": summary}, + ) + except Exception as e: + status_code = getattr(e, "status_code", None) or 500 + raise LegacyExecutorError( + message=f"Error during summarization: {e}", + code=status_code, + ) from e + + def _handle_agentic_extraction( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.AGENTIC_EXTRACTION``. + + Agentic extraction requires the agentic extraction plugin + (AutoGen-based multi-agent system). This is not available + in the executor worker — it will be migrated when plugin + support is added. + + Returns: + ExecutionResult.failure indicating the plugin is required. + """ + raise LegacyExecutorError( + message=( + "Agentic extraction requires the agentic extraction " + "plugin which is not yet available in the executor " + "worker." + ), + ) diff --git a/workers/executor/executors/postprocessor.py b/workers/executor/executors/postprocessor.py new file mode 100644 index 0000000000..bf14a56698 --- /dev/null +++ b/workers/executor/executors/postprocessor.py @@ -0,0 +1,119 @@ +"""Webhook postprocessor for structured output. + +Copied from prompt-service/.../helpers/postprocessor.py — already Flask-free. +""" + +import json +import logging +from typing import Any + +import requests + +logger = logging.getLogger(__name__) + + +def _validate_structured_output(data: Any) -> bool: + """Validate that structured output is a dict or list.""" + return isinstance(data, (dict, list)) + + +def _validate_highlight_data(updated_data: Any, original_data: Any) -> Any: + """Validate highlight data and return appropriate value.""" + if ( + updated_data is not None + and updated_data != original_data + and not isinstance(updated_data, list) + ): + logger.warning( + "Ignoring webhook highlight_data due to invalid type (expected list)" + ) + return original_data + return updated_data + + +def _process_successful_response( + response_data: dict, parsed_data: dict, highlight_data: list | None +) -> tuple[dict[str, Any], list | None]: + """Process successful webhook response.""" + if "structured_output" not in response_data: + logger.warning("Response missing 'structured_output' key") + return parsed_data, highlight_data + + updated_parsed_data = response_data["structured_output"] + + if not _validate_structured_output(updated_parsed_data): + logger.warning("Ignoring postprocessing due to invalid structured_output type") + return parsed_data, highlight_data + + updated_highlight_data = response_data.get("highlight_data", highlight_data) + updated_highlight_data = _validate_highlight_data( + updated_highlight_data, highlight_data + ) + + return updated_parsed_data, updated_highlight_data + + +def _make_webhook_request( + webhook_url: str, payload: dict, timeout: float +) -> tuple[dict[str, Any], list | None] | None: + """Make webhook request and return processed response or None on failure.""" + try: + response = requests.post( + webhook_url, + json=payload, + timeout=timeout, + headers={"Content-Type": "application/json"}, + allow_redirects=False, # Prevent redirect-based SSRF + ) + + if response.status_code != 200: + logger.warning( + f"Postprocessing server returned status code: {response.status_code}" + ) + return None + + return response.json() + + except json.JSONDecodeError as e: + logger.warning(f"Invalid JSON response from postprocessing server: {e}") + except requests.exceptions.Timeout: + logger.warning(f"Postprocessing server request timed out after {timeout}s") + except requests.exceptions.RequestException as e: + logger.warning(f"Postprocessing server request failed: {e}") + except Exception as e: + logger.warning(f"Unexpected error during postprocessing: {e}") + + return None + + +def postprocess_data( + parsed_data: dict[str, Any], + webhook_enabled: bool = False, + webhook_url: str | None = None, + timeout: float = 2.0, + highlight_data: list | None = None, +) -> tuple[dict[str, Any], list | None]: + """Post-process parsed data by sending it to an external server. + + Args: + parsed_data: The parsed data to be post-processed + webhook_enabled: Whether webhook postprocessing is enabled + webhook_url: URL endpoint for the webhook + timeout: Request timeout in seconds (default: 2.0) + highlight_data: Highlight data from metadata to send to webhook + + Returns: + tuple: (postprocessed_data, updated_highlight_data) + """ + if not webhook_enabled or not webhook_url: + return parsed_data, highlight_data + + payload = {"structured_output": parsed_data} + if highlight_data is not None: + payload["highlight_data"] = highlight_data + + response_data = _make_webhook_request(webhook_url, payload, timeout) + if response_data is None: + return parsed_data, highlight_data + + return _process_successful_response(response_data, parsed_data, highlight_data) diff --git a/workers/executor/executors/retrieval.py b/workers/executor/executors/retrieval.py new file mode 100644 index 0000000000..3b4cd1da0a --- /dev/null +++ b/workers/executor/executors/retrieval.py @@ -0,0 +1,113 @@ +"""Retrieval service — factory for retriever strategies. + +Lazy-imports retriever classes to avoid llama_index/protobuf conflicts +at test-collection time. Same pattern as _get_indexing_deps() in Phase 2C. +""" + +import datetime +import logging +from typing import Any + +from executor.executors.constants import RetrievalStrategy + +logger = logging.getLogger(__name__) + + +class RetrievalService: + @staticmethod + def _get_retriever_map() -> dict: + """Lazy-import all retriever classes. + + Returns dict mapping strategy string to class. + Wrapped in a method so tests can mock it. + """ + from executor.executors.retrievers.automerging import AutomergingRetriever + from executor.executors.retrievers.fusion import FusionRetriever + from executor.executors.retrievers.keyword_table import KeywordTableRetriever + from executor.executors.retrievers.recursive import RecursiveRetrieval + from executor.executors.retrievers.router import RouterRetriever + from executor.executors.retrievers.simple import SimpleRetriever + from executor.executors.retrievers.subquestion import SubquestionRetriever + + return { + RetrievalStrategy.SIMPLE.value: SimpleRetriever, + RetrievalStrategy.SUBQUESTION.value: SubquestionRetriever, + RetrievalStrategy.FUSION.value: FusionRetriever, + RetrievalStrategy.RECURSIVE.value: RecursiveRetrieval, + RetrievalStrategy.ROUTER.value: RouterRetriever, + RetrievalStrategy.KEYWORD_TABLE.value: KeywordTableRetriever, + RetrievalStrategy.AUTOMERGING.value: AutomergingRetriever, + } + + @staticmethod + def run_retrieval( + output: dict[str, Any], + doc_id: str, + llm: Any, + vector_db: Any, + retrieval_type: str, + context_retrieval_metrics: dict[str, Any] | None = None, + ) -> list[str]: + """Factory: instantiate and execute the retriever for the given strategy.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + + prompt = output[PSKeys.PROMPTX] + top_k = output[PSKeys.SIMILARITY_TOP_K] + prompt_key = output.get(PSKeys.NAME, "") + start = datetime.datetime.now() + + retriever_map = RetrievalService._get_retriever_map() + retriever_class = retriever_map.get(retrieval_type) + if not retriever_class: + raise ValueError(f"Unknown retrieval type: {retrieval_type}") + + retriever = retriever_class( + vector_db=vector_db, + doc_id=doc_id, + prompt=prompt, + top_k=top_k, + llm=llm, + ) + context = retriever.retrieve() + + elapsed = (datetime.datetime.now() - start).total_seconds() + if context_retrieval_metrics is not None: + context_retrieval_metrics[prompt_key] = {"time_taken(s)": elapsed} + + logger.info( + "[Retrieval] prompt='%s' doc_id=%s strategy='%s' top_k=%d " + "chunks=%d time=%.3fs", + prompt_key, + doc_id, + retrieval_type, + top_k, + len(context), + elapsed, + ) + return list(context) + + @staticmethod + def retrieve_complete_context( + execution_source: str, + file_path: str, + context_retrieval_metrics: dict[str, Any] | None = None, + prompt_key: str = "", + ) -> list[str]: + """Load full file content for chunk_size=0 retrieval.""" + from executor.executors.file_utils import FileUtils + + fs = FileUtils.get_fs_instance(execution_source=execution_source) + start = datetime.datetime.now() + content = fs.read(path=file_path, mode="r") + elapsed = (datetime.datetime.now() - start).total_seconds() + + if context_retrieval_metrics is not None: + context_retrieval_metrics[prompt_key] = {"time_taken(s)": elapsed} + + logger.info( + "[Retrieval] prompt='%s' complete_context chars=%d time=%.3fs", + prompt_key, + len(content), + elapsed, + ) + return [content] diff --git a/workers/executor/executors/retrievers/__init__.py b/workers/executor/executors/retrievers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py new file mode 100644 index 0000000000..09adcc7739 --- /dev/null +++ b/workers/executor/executors/retrievers/automerging.py @@ -0,0 +1,85 @@ +import logging + +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import AutoMergingRetriever as LlamaAutoMergingRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class AutomergingRetriever(BaseRetriever): + """Automerging retrieval using LlamaIndex's native AutoMergingRetriever. + + This retriever merges smaller chunks into larger ones when the smaller chunks + don't contain enough information, providing better context for answers. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native AutoMergingRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex AutoMergingRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create base vector retriever with metadata filters + base_retriever = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Try to use native AutoMergingRetriever + try: + # Create AutoMergingRetriever with the base retriever + auto_merging_retriever = LlamaAutoMergingRetriever( + base_retriever, + storage_context=self.vector_db.get_storage_context() + if hasattr(self.vector_db, "get_storage_context") + else None, + verbose=False, + ) + + # Retrieve nodes using auto-merging + nodes = auto_merging_retriever.retrieve(self.prompt) + + except Exception as e: + logger.error(f"AutoMergingRetriever failed : {e}") + raise RetrievalError(f"AutoMergingRetriever failed: {str(e)}") from e + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using AutoMergingRetriever." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during auto-merging retrieval for {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error( + f"Unexpected error during auto-merging retrieval for {self.doc_id}: {e}" + ) + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/retrievers/base_retriever.py b/workers/executor/executors/retrievers/base_retriever.py new file mode 100644 index 0000000000..48c7485255 --- /dev/null +++ b/workers/executor/executors/retrievers/base_retriever.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from unstract.sdk1.llm import LLM + from unstract.sdk1.vector_db import VectorDB + + +class BaseRetriever: + def __init__( + self, + vector_db: VectorDB, + prompt: str, + doc_id: str, + top_k: int, + llm: LLM | None = None, + ): + """Initialize the Retrieval class. + + Args: + vector_db (VectorDB): The vector database instance. + prompt (str): The query prompt. + doc_id (str): Document identifier for query context. + top_k (int): Number of top results to retrieve. + """ + self.vector_db = vector_db + self.prompt = prompt + self.doc_id = doc_id + self.top_k = top_k + self.llm = llm if llm else None + + @staticmethod + def retrieve() -> set[str]: + return set() diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py new file mode 100644 index 0000000000..313f28caba --- /dev/null +++ b/workers/executor/executors/retrievers/fusion.py @@ -0,0 +1,94 @@ +import logging + +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import QueryFusionRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class FusionRetriever(BaseRetriever): + """Fusion retrieval class using LlamaIndex's native QueryFusionRetriever. + + This technique generates multiple query variations and combines results + using reciprocal rank fusion for improved relevance. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's QueryFusionRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex QueryFusionRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create multiple retrievers with different parameters for true fusion + filters = MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ) + + # Retriever 1: Standard similarity search + retriever_1 = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=filters, + ) + + # Retriever 2: Broader search with more candidates + retriever_2 = vector_store_index.as_retriever( + similarity_top_k=self.top_k * 2, + filters=filters, + ) + + # Retriever 3: Focused search with fewer candidates + retriever_3 = vector_store_index.as_retriever( + similarity_top_k=max(1, self.top_k // 2), + filters=filters, + ) + + # Create LlamaIndex QueryFusionRetriever with multiple retrievers + fusion_retriever = QueryFusionRetriever( + [retriever_1, retriever_2, retriever_3], # Multiple retrievers for fusion + similarity_top_k=self.top_k, + num_queries=4, # Generate multiple query variations + mode="simple", # Use simple fusion mode (reciprocal rank fusion) + use_async=False, + verbose=True, + llm=self.llm, # LLM generates query variations + ) + + # Retrieve nodes using fusion technique + nodes = fusion_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info(f"Successfully retrieved {len(chunks)} chunks using fusion.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during fusion retrieval for {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error( + f"Unexpected error during fusion retrieval for {self.doc_id}: {e}" + ) + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py new file mode 100644 index 0000000000..d58db0f74a --- /dev/null +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -0,0 +1,79 @@ +import logging + +from llama_index.core import VectorStoreIndex +from llama_index.core.indices.keyword_table import KeywordTableIndex +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class KeywordTableRetriever(BaseRetriever): + """Keyword table retrieval using LlamaIndex's native KeywordTableIndex.""" + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native KeywordTableIndex. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex KeywordTableIndex." + ) + + # Get documents from vector index for keyword indexing + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Get all nodes for the document + all_retriever = vector_store_index.as_retriever( + similarity_top_k=1000, # Get all nodes + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Retrieve all nodes to build keyword index + all_nodes = all_retriever.retrieve(" ") + + if not all_nodes: + logger.warning(f"No nodes found for doc_id: {self.doc_id}") + return set() + + # Create KeywordTableIndex from nodes using our provided LLM + keyword_index = KeywordTableIndex( + nodes=[node.node for node in all_nodes], + show_progress=True, + llm=self.llm, # Use the provided LLM instead of defaulting to OpenAI + ) + + # Create retriever from keyword index + keyword_retriever = keyword_index.as_retriever( + similarity_top_k=self.top_k, + ) + + # Retrieve nodes using keyword matching + nodes = keyword_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + chunks.add(node.get_content()) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using KeywordTableIndex." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during keyword retrieval for {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error( + f"Unexpected error during keyword retrieval for {self.doc_id}: {e}" + ) + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py new file mode 100644 index 0000000000..b520d26ea0 --- /dev/null +++ b/workers/executor/executors/retrievers/recursive.py @@ -0,0 +1,77 @@ +import logging + +from llama_index.core import VectorStoreIndex +from llama_index.core.retrievers import RecursiveRetriever +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class RecursiveRetrieval(BaseRetriever): + """Recursive retrieval using LlamaIndex's native RecursiveRetriever. + + This retriever performs recursive retrieval by breaking down queries + and refining results through multiple retrieval steps. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's native RecursiveRetriever. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex RecursiveRetriever." + ) + + # Get the vector store index + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + + # Create base retriever with metadata filters + base_retriever = vector_store_index.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + + # Create RecursiveRetriever + recursive_retriever = RecursiveRetriever( + "vector", # root retriever key + retriever_dict={"vector": base_retriever}, + verbose=True, + ) + + # Retrieve nodes using RecursiveRetriever + nodes = recursive_retriever.retrieve(self.prompt) + + # Extract unique text chunks + chunks: set[str] = set() + for node in nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + + logger.info( + f"Successfully retrieved {len(chunks)} chunks using RecursiveRetriever." + ) + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during recursive retrieval for {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error( + f"Unexpected error during recursive retrieval for {self.doc_id}: {e}" + ) + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py new file mode 100644 index 0000000000..ae7a99dfdf --- /dev/null +++ b/workers/executor/executors/retrievers/router.py @@ -0,0 +1,157 @@ +import logging + +from llama_index.core import VectorStoreIndex +from llama_index.core.query_engine import RouterQueryEngine +from llama_index.core.selectors import LLMSingleSelector +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class RouterRetriever(BaseRetriever): + """Router retrieval class using LlamaIndex's native RouterQueryEngine. + + This technique intelligently routes queries to different retrieval strategies + based on query analysis. + """ + + def _create_metadata_filters(self): + """Create metadata filters for doc_id.""" + return MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ) + + def _create_base_query_engine(self, vector_store_index, filters): + """Create the base vector query engine.""" + return vector_store_index.as_query_engine( + similarity_top_k=self.top_k, + filters=filters, + llm=self.llm, + ) + + def _add_keyword_search_tool(self, query_engine_tools, vector_store_index, filters): + """Add keyword search tool to query engine tools list.""" + try: + keyword_query_engine = vector_store_index.as_query_engine( + similarity_top_k=self.top_k * 2, + filters=filters, + llm=self.llm, + ) + query_engine_tools.append( + QueryEngineTool( + query_engine=keyword_query_engine, + metadata=ToolMetadata( + name="keyword_search", + description=( + "Best for finding specific terms, names, numbers, dates, " + "or exact phrases. Use when looking for precise matches." + ), + ), + ) + ) + except Exception as e: + logger.debug(f"Could not create keyword search engine: {e}") + + def _add_broad_search_tool(self, query_engine_tools, vector_store_index, filters): + """Add broad search tool to query engine tools list.""" + try: + broad_query_engine = vector_store_index.as_query_engine( + similarity_top_k=self.top_k * 3, + filters=filters, + llm=self.llm, + ) + query_engine_tools.append( + QueryEngineTool( + query_engine=broad_query_engine, + metadata=ToolMetadata( + name="broad_search", + description=( + "Useful for general questions, exploratory queries, " + "or when you need comprehensive information on a topic." + ), + ), + ) + ) + except Exception as e: + logger.debug(f"Could not create broad search engine: {e}") + + def _extract_chunks_from_response(self, response): + """Extract chunks from router query response.""" + chunks: set[str] = set() + if hasattr(response, "source_nodes"): + for node in response.source_nodes: + if node.score > 0: + chunks.add(node.get_content()) + else: + logger.info( + f"Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + return chunks + + def retrieve(self) -> set[str]: + """Retrieve text chunks using LlamaIndex's RouterQueryEngine. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info( + f"Retrieving chunks for {self.doc_id} using LlamaIndex RouterQueryEngine." + ) + + vector_store_index: VectorStoreIndex = self.vector_db.get_vector_store_index() + filters = self._create_metadata_filters() + vector_query_engine = self._create_base_query_engine( + vector_store_index, filters + ) + + if not self.llm: + return set() + + # Create base query engine tools + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name="vector_search", + description=( + "Useful for semantic similarity search, conceptual questions, " + "and finding information based on meaning and context." + ), + ), + ), + ] + + # Add additional search strategies + self._add_keyword_search_tool(query_engine_tools, vector_store_index, filters) + self._add_broad_search_tool(query_engine_tools, vector_store_index, filters) + + # Create and execute router query + router_query_engine = RouterQueryEngine.from_defaults( + selector=LLMSingleSelector.from_defaults(llm=self.llm), + query_engine_tools=query_engine_tools, + verbose=True, + llm=self.llm, + ) + + response = router_query_engine.query(self.prompt) + chunks = self._extract_chunks_from_response(response) + + logger.info(f"Successfully retrieved {len(chunks)} chunks using router.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during router retrieval for {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error( + f"Unexpected error during router retrieval for {self.doc_id}: {e}" + ) + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/retrievers/simple.py b/workers/executor/executors/retrievers/simple.py new file mode 100644 index 0000000000..71ed7e6af5 --- /dev/null +++ b/workers/executor/executors/retrievers/simple.py @@ -0,0 +1,53 @@ +import logging +import time + +from llama_index.core import VectorStoreIndex +from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters + +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class SimpleRetriever(BaseRetriever): + def retrieve(self) -> set[str]: + context = self._simple_retrieval() + if not context: + # UN-1288 For Pinecone, we are seeing an inconsistent case where + # query with doc_id fails even though indexing just happened. + # This causes the following retrieve to return no text. + # To rule out any lag on the Pinecone vector DB write, + # the following sleep is added + # Note: This will not fix the issue. Since this issue is inconsistent + # and not reproducible easily, this is just a safety net. + logger.info( + f"[doc_id: {self.doc_id}] Could not retrieve context, " + "retrying after 2 secs to handle issues due to lag" + ) + time.sleep(2) + context = self._simple_retrieval() + return context + + def _simple_retrieval(self): + vector_query_engine: VectorStoreIndex = self.vector_db.get_vector_store_index() + retriever = vector_query_engine.as_retriever( + similarity_top_k=self.top_k, + filters=MetadataFilters( + filters=[ + ExactMatchFilter(key="doc_id", value=self.doc_id), + ], + ), + ) + nodes = retriever.retrieve(self.prompt) + context: set[str] = set() + for node in nodes: + # May have to fine-tune this value for node score or keep it + # configurable at the adapter level + if node.score > 0: + context.add(node.get_content()) + else: + logger.info( + "Node score is less than 0. " + f"Ignored: {node.node_id} with score {node.score}" + ) + return context diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py new file mode 100644 index 0000000000..2b4665465b --- /dev/null +++ b/workers/executor/executors/retrievers/subquestion.py @@ -0,0 +1,59 @@ +import logging + +from llama_index.core.query_engine import SubQuestionQueryEngine +from llama_index.core.schema import QueryBundle +from llama_index.core.tools import QueryEngineTool, ToolMetadata + +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever + +logger = logging.getLogger(__name__) + + +class SubquestionRetriever(BaseRetriever): + """SubquestionRetrieval class for querying VectorDB using LlamaIndex's + SubQuestionQueryEngine. + """ + + def retrieve(self) -> set[str]: + """Retrieve text chunks from the VectorDB based on the provided prompt. + + Returns: + set[str]: A set of text chunks retrieved from the database. + """ + try: + logger.info("Initialising vector query engine...") + vector_query_engine = self.vector_db.get_vector_store_index().as_query_engine( + llm=self.llm, similarity_top_k=self.top_k + ) + logger.info( + f"Retrieving chunks for {self.doc_id} using SubQuestionQueryEngine." + ) + query_engine_tools = [ + QueryEngineTool( + query_engine=vector_query_engine, + metadata=ToolMetadata( + name=self.doc_id, description=f"Nodes for {self.doc_id}" + ), + ), + ] + query_bundle = QueryBundle(query_str=self.prompt) + + query_engine = SubQuestionQueryEngine.from_defaults( + query_engine_tools=query_engine_tools, + use_async=True, + llm=self.llm, + ) + + response = query_engine.query(str_or_query_bundle=query_bundle) + + chunks: set[str] = {node.text for node in response.source_nodes} + logger.info(f"Successfully retrieved {len(chunks)} chunks.") + return chunks + + except (ValueError, AttributeError, KeyError, ImportError) as e: + logger.error(f"Error during retrieving chunks {self.doc_id}: {e}") + raise RetrievalError(str(e)) from e + except Exception as e: + logger.error(f"Unexpected error during retrieving chunks {self.doc_id}: {e}") + raise RetrievalError(f"Unexpected error: {str(e)}") from e diff --git a/workers/executor/executors/usage.py b/workers/executor/executors/usage.py new file mode 100644 index 0000000000..ab6296eaeb --- /dev/null +++ b/workers/executor/executors/usage.py @@ -0,0 +1,81 @@ +"""Usage tracking helper for the executor worker. + +Ported from prompt-service/.../helpers/usage.py. +Flask/DB dependencies removed — usage data is pushed via the SDK1 +``Audit`` class (HTTP to platform API) and returned directly in +``ExecutionResult.metadata`` instead of querying the DB. + +Note: The SDK1 adapters (LLM, EmbeddingCompat) already call +``Audit().push_usage_data()`` internally. This helper is for +explicit push calls outside of adapter operations (e.g. rent rolls). +""" + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +class UsageHelper: + @staticmethod + def push_usage_data( + event_type: str, + kwargs: dict[str, Any], + platform_api_key: str, + token_counter: Any = None, + model_name: str = "", + ) -> bool: + """Push usage data to the audit service. + + Wraps ``Audit().push_usage_data()`` with validation and + error handling. + + Args: + event_type: Type of usage event (e.g. "llm", "embedding"). + kwargs: Context dict (run_id, execution_id, etc.). + platform_api_key: API key for platform service auth. + token_counter: Token counter with usage metrics. + model_name: Name of the model used. + + Returns: + True if successful, False otherwise. + """ + if not kwargs or not isinstance(kwargs, dict): + logger.error("Invalid kwargs provided to push_usage_data") + return False + + if not platform_api_key or not isinstance(platform_api_key, str): + logger.error("Invalid platform_api_key provided to push_usage_data") + return False + + try: + from unstract.sdk1.audit import Audit + + logger.debug( + "Pushing usage data for event_type=%s model=%s", + event_type, + model_name, + ) + + Audit().push_usage_data( + platform_api_key=platform_api_key, + token_counter=token_counter, + model_name=model_name, + event_type=event_type, + kwargs=kwargs, + ) + + logger.info("Successfully pushed usage data for %s", model_name) + return True + except Exception: + logger.exception("Error pushing usage data") + return False + + @staticmethod + def format_float_positional(value: float, precision: int = 10) -> str: + """Format a float without scientific notation. + + Removes trailing zeros for clean display of cost values. + """ + formatted: str = f"{value:.{precision}f}" + return formatted.rstrip("0").rstrip(".") if "." in formatted else formatted diff --git a/workers/executor/executors/variable_replacement.py b/workers/executor/executors/variable_replacement.py new file mode 100644 index 0000000000..bd72d42e8c --- /dev/null +++ b/workers/executor/executors/variable_replacement.py @@ -0,0 +1,264 @@ +"""Variable replacement for prompt templates. + +Ported from prompt-service variable_replacement service + helper. +Flask dependencies (app.logger, publish_log) replaced with standard logging. +""" + +import json +import logging +import re +from functools import lru_cache +from typing import Any + +import requests as pyrequests +from requests.exceptions import RequestException + +from executor.executors.constants import VariableConstants, VariableType +from executor.executors.exceptions import CustomDataError, LegacyExecutorError + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# VariableReplacementHelper — low-level replacement logic +# --------------------------------------------------------------------------- + +class VariableReplacementHelper: + @staticmethod + def replace_static_variable( + prompt: str, structured_output: dict[str, Any], variable: str + ) -> str: + output_value = VariableReplacementHelper.check_static_variable_run_status( + structure_output=structured_output, variable=variable + ) + if not output_value: + return prompt + static_variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt: str = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, variable=static_variable_marker_string, value=output_value + ) + return replaced_prompt + + @staticmethod + def check_static_variable_run_status( + structure_output: dict[str, Any], variable: str + ) -> Any: + output = None + try: + output = structure_output[variable] + except KeyError: + logger.warning( + "Prompt with %s is not executed yet. " + "Unable to replace the variable", + variable, + ) + return output + + @staticmethod + def replace_generic_string_value(prompt: str, variable: str, value: Any) -> str: + formatted_value: str = value + if not isinstance(value, str): + formatted_value = VariableReplacementHelper.handle_json_and_str_types(value) + replaced_prompt = prompt.replace(variable, formatted_value) + return replaced_prompt + + @staticmethod + def handle_json_and_str_types(value: Any) -> str: + try: + formatted_value = json.dumps(value) + except ValueError: + formatted_value = str(value) + return formatted_value + + @staticmethod + def identify_variable_type(variable: str) -> VariableType: + custom_data_pattern = re.compile(VariableConstants.CUSTOM_DATA_VARIABLE_REGEX) + if re.findall(custom_data_pattern, variable): + return VariableType.CUSTOM_DATA + + dynamic_pattern = re.compile(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX) + if re.findall(dynamic_pattern, variable): + return VariableType.DYNAMIC + + return VariableType.STATIC + + @staticmethod + def replace_dynamic_variable( + prompt: str, variable: str, structured_output: dict[str, Any] + ) -> str: + url = re.search(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX, variable).group(0) + data = re.findall(VariableConstants.DYNAMIC_VARIABLE_DATA_REGEX, variable)[0] + output_value = VariableReplacementHelper.check_static_variable_run_status( + structure_output=structured_output, variable=data + ) + if not output_value: + return prompt + api_response: Any = VariableReplacementHelper.fetch_dynamic_variable_value( + url=url, data=output_value + ) + formatted_api_response: str = VariableReplacementHelper.handle_json_and_str_types( + api_response + ) + static_variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt: str = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, + variable=static_variable_marker_string, + value=formatted_api_response, + ) + return replaced_prompt + + @staticmethod + def replace_custom_data_variable( + prompt: str, + variable: str, + custom_data: dict[str, Any], + is_ide: bool = True, + ) -> str: + custom_data_match = re.search( + VariableConstants.CUSTOM_DATA_VARIABLE_REGEX, variable + ) + if not custom_data_match: + error_msg = "Invalid variable format." + logger.error("%s: %s", error_msg, variable) + raise CustomDataError(variable=variable, reason=error_msg, is_ide=is_ide) + + path_str = custom_data_match.group(1) + path_parts = path_str.split(".") + + if not custom_data: + error_msg = "Custom data is not configured." + logger.error(error_msg) + raise CustomDataError(variable=path_str, reason=error_msg, is_ide=is_ide) + + try: + value = custom_data + for part in path_parts: + value = value[part] + except (KeyError, TypeError) as e: + error_msg = f"Key '{path_str}' not found in custom data." + logger.error(error_msg) + raise CustomDataError( + variable=path_str, reason=error_msg, is_ide=is_ide + ) from e + + variable_marker_string = "".join(["{{", variable, "}}"]) + replaced_prompt = VariableReplacementHelper.replace_generic_string_value( + prompt=prompt, + variable=variable_marker_string, + value=value, + ) + return replaced_prompt + + @staticmethod + @lru_cache(maxsize=128) + def _extract_variables_cached(prompt_text: str) -> tuple[str, ...]: + return tuple(re.findall(VariableConstants.VARIABLE_REGEX, prompt_text)) + + @staticmethod + def extract_variables_from_prompt(prompt_text: str) -> list[str]: + result = VariableReplacementHelper._extract_variables_cached(prompt_text) + return list(result) + + @staticmethod + def fetch_dynamic_variable_value(url: str, data: str) -> Any: + """Fetch dynamic variable value from an external URL. + + Ported from prompt-service make_http_request — simplified to direct + requests.post since we don't need Flask error classes. + """ + headers = {"Content-Type": "text/plain"} + try: + response = pyrequests.post(url, data=data, headers=headers, timeout=30) + response.raise_for_status() + if response.headers.get("content-type") == "application/json": + return response.json() + return response.text + except RequestException as e: + logger.error("HTTP request error fetching dynamic variable: %s", e) + status_code = None + if getattr(e, "response", None) is not None: + status_code = getattr(e.response, "status_code", None) + raise LegacyExecutorError( + message=f"HTTP POST to {url} failed: {e!s}", + code=status_code or 500, + ) from e + + +# --------------------------------------------------------------------------- +# VariableReplacementService — high-level orchestration +# --------------------------------------------------------------------------- + +class VariableReplacementService: + @staticmethod + def is_variables_present(prompt_text: str) -> bool: + return bool( + len(VariableReplacementHelper.extract_variables_from_prompt(prompt_text)) + ) + + @staticmethod + def replace_variables_in_prompt( + prompt: dict[str, Any], + structured_output: dict[str, Any], + prompt_name: str, + tool_id: str = "", + log_events_id: str = "", + doc_name: str = "", + custom_data: dict[str, Any] | None = None, + is_ide: bool = True, + ) -> str: + from executor.executors.constants import PromptServiceConstants as PSKeys + + logger.info("[%s] Replacing variables in prompt: %s", tool_id, prompt_name) + + prompt_text = prompt[PSKeys.PROMPT] + try: + variable_map = prompt[PSKeys.VARIABLE_MAP] + prompt_text = VariableReplacementService._execute_variable_replacement( + prompt_text=prompt[PSKeys.PROMPT], + variable_map=variable_map, + custom_data=custom_data, + is_ide=is_ide, + ) + except KeyError: + prompt_text = VariableReplacementService._execute_variable_replacement( + prompt_text=prompt_text, + variable_map=structured_output, + custom_data=custom_data, + is_ide=is_ide, + ) + return prompt_text + + @staticmethod + def _execute_variable_replacement( + prompt_text: str, + variable_map: dict[str, Any], + custom_data: dict[str, Any] | None = None, + is_ide: bool = True, + ) -> str: + variables: list[str] = VariableReplacementHelper.extract_variables_from_prompt( + prompt_text=prompt_text + ) + for variable in variables: + variable_type = VariableReplacementHelper.identify_variable_type( + variable=variable + ) + if variable_type == VariableType.STATIC: + prompt_text = VariableReplacementHelper.replace_static_variable( + prompt=prompt_text, + structured_output=variable_map, + variable=variable, + ) + elif variable_type == VariableType.DYNAMIC: + prompt_text = VariableReplacementHelper.replace_dynamic_variable( + prompt=prompt_text, + variable=variable, + structured_output=variable_map, + ) + elif variable_type == VariableType.CUSTOM_DATA: + prompt_text = VariableReplacementHelper.replace_custom_data_variable( + prompt=prompt_text, + variable=variable, + custom_data=custom_data or {}, + is_ide=is_ide, + ) + return prompt_text diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py new file mode 100644 index 0000000000..75bf52b3fb --- /dev/null +++ b/workers/executor/tasks.py @@ -0,0 +1,76 @@ +"""Executor Worker Tasks + +Defines the execute_extraction Celery task that receives an +ExecutionContext dict, runs the appropriate executor via +ExecutionOrchestrator, and returns an ExecutionResult dict. +""" + +import logging + +from celery import shared_task + +from shared.enums.task_enums import TaskName +from shared.infrastructure.logging import WorkerLogger + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.result import ExecutionResult + +logger = WorkerLogger.get_logger(__name__) + + +@shared_task( + bind=True, + name=TaskName.EXECUTE_EXTRACTION, + autoretry_for=(ConnectionError, TimeoutError, OSError), + retry_backoff=True, + retry_backoff_max=60, + max_retries=3, + retry_jitter=True, +) +def execute_extraction( + self, execution_context_dict: dict +) -> dict: + """Execute an extraction operation via the executor framework. + + This is the single Celery task entry point for all extraction + operations. Both the workflow path (structure tool task) and + the IDE path (PromptStudioHelper) dispatch to this task. + + Args: + execution_context_dict: Serialized ExecutionContext. + + Returns: + Serialized ExecutionResult dict. + """ + request_id = execution_context_dict.get("request_id", "") + logger.info( + "Received execute_extraction task: " + "celery_task_id=%s request_id=%s executor=%s operation=%s", + self.request.id, + request_id, + execution_context_dict.get("executor_name"), + execution_context_dict.get("operation"), + ) + + try: + context = ExecutionContext.from_dict(execution_context_dict) + except (KeyError, ValueError) as exc: + logger.error( + "Invalid execution context: %s", exc, exc_info=True + ) + return ExecutionResult.failure( + error=f"Invalid execution context: {exc}" + ).to_dict() + + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(context) + + logger.info( + "execute_extraction complete: " + "celery_task_id=%s request_id=%s success=%s", + self.request.id, + context.request_id, + result.success, + ) + return result.to_dict() diff --git a/workers/executor/worker.py b/workers/executor/worker.py new file mode 100644 index 0000000000..a9ec204e2a --- /dev/null +++ b/workers/executor/worker.py @@ -0,0 +1,77 @@ +"""Executor Worker + +Celery worker for the pluggable executor system. +Routes execute_extraction tasks to registered executors. +""" + +from shared.enums.worker_enums import WorkerType +from shared.infrastructure.config.builder import WorkerBuilder +from shared.infrastructure.config.registry import WorkerRegistry +from shared.infrastructure.logging import WorkerLogger + +# Setup worker +logger = WorkerLogger.setup(WorkerType.EXECUTOR) +app, config = WorkerBuilder.build_celery_app(WorkerType.EXECUTOR) + + +def check_executor_health(): + """Custom health check for executor worker.""" + from shared.infrastructure.monitoring.health import ( + HealthCheckResult, + HealthStatus, + ) + + try: + from unstract.sdk1.execution.registry import ( + ExecutorRegistry, + ) + + executors = ExecutorRegistry.list_executors() + + return HealthCheckResult( + name="executor_health", + status=HealthStatus.HEALTHY, + message="Executor worker is healthy", + details={ + "worker_type": "executor", + "registered_executors": executors, + "executor_count": len(executors), + "queues": ["executor"], + }, + ) + + except Exception as e: + return HealthCheckResult( + name="executor_health", + status=HealthStatus.DEGRADED, + message=f"Health check failed: {e}", + details={"error": str(e)}, + ) + + +# Register health check +WorkerRegistry.register_health_check( + WorkerType.EXECUTOR, + "executor_health", + check_executor_health, +) + + +@app.task(bind=True) +def healthcheck(self): + """Health check task for monitoring systems.""" + return { + "status": "healthy", + "worker_type": "executor", + "task_id": self.request.id, + "worker_name": ( + config.worker_name if config else "executor-worker" + ), + } + + +# Import tasks so shared_task definitions bind to this app. +import executor.tasks # noqa: E402, F401 + +# Import executors to trigger @ExecutorRegistry.register at import time. +import executor.executors # noqa: E402, F401 diff --git a/workers/file_processing/__init__.py b/workers/file_processing/__init__.py index b3f8b74a97..b2b8ece391 100644 --- a/workers/file_processing/__init__.py +++ b/workers/file_processing/__init__.py @@ -4,6 +4,7 @@ direct Django ORM access, implementing the hybrid approach for tool execution. """ +from .structure_tool_task import execute_structure_tool from .tasks import ( process_file_batch, process_file_batch_api, @@ -13,6 +14,7 @@ __all__ = [ "celery_app", + "execute_structure_tool", "process_file_batch", "process_file_batch_api", "process_file_batch_resilient", diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py new file mode 100644 index 0000000000..fb6a7e4a6e --- /dev/null +++ b/workers/file_processing/structure_tool_task.py @@ -0,0 +1,851 @@ +"""Structure tool Celery task — Phase 3 of executor migration. + +Replaces the Docker-container-based StructureTool.run() with a Celery +task that runs in the file_processing worker. Instead of PromptTool +HTTP calls to prompt-service, it uses ExecutionDispatcher to send +operations to the executor worker via Celery. + +Before (Docker-based): + File Processing Worker → WorkflowExecutionService → ToolSandbox + → Docker container → StructureTool.run() → PromptTool (HTTP) → prompt-service + +After (Celery-based): + File Processing Worker → WorkerWorkflowExecutionService + → execute_structure_tool task → ExecutionDispatcher + → executor worker → LegacyExecutor +""" + +import json +import logging +import os +from pathlib import Path +from typing import Any + +from file_processing.worker import app +from shared.enums.task_enums import TaskName +from unstract.sdk1.constants import MetadataKey, ToolEnv, UsageKwargs +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.result import ExecutionResult + +logger = logging.getLogger(__name__) + +# Timeout for executor worker calls (seconds). +# Reads from EXECUTOR_RESULT_TIMEOUT env, defaults to 3600. +EXECUTOR_TIMEOUT = int(os.environ.get("EXECUTOR_RESULT_TIMEOUT", 3600)) + + +# ----------------------------------------------------------------------- +# Constants mirrored from tools/structure/src/constants.py +# These are the keys used in tool_metadata and payload dicts. +# ----------------------------------------------------------------------- + +class _SK: + """SettingsKeys subset needed by the structure tool task.""" + + PROMPT_REGISTRY_ID = "prompt_registry_id" + TOOL_METADATA = "tool_metadata" + TOOL_ID = "tool_id" + OUTPUTS = "outputs" + TOOL_SETTINGS = "tool_settings" + NAME = "name" + ACTIVE = "active" + PROMPT = "prompt" + CHUNK_SIZE = "chunk-size" + CHUNK_OVERLAP = "chunk-overlap" + VECTOR_DB = "vector-db" + EMBEDDING = "embedding" + X2TEXT_ADAPTER = "x2text_adapter" + LLM = "llm" + CHALLENGE_LLM = "challenge_llm" + ENABLE_CHALLENGE = "enable_challenge" + ENABLE_SINGLE_PASS_EXTRACTION = "enable_single_pass_extraction" + SUMMARIZE_AS_SOURCE = "summarize_as_source" + ENABLE_HIGHLIGHT = "enable_highlight" + SUMMARIZE_PROMPT = "summarize_prompt" + TABLE_SETTINGS = "table_settings" + INPUT_FILE = "input_file" + IS_DIRECTORY_MODE = "is_directory_mode" + RUN_ID = "run_id" + EXECUTION_ID = "execution_id" + FILE_HASH = "file_hash" + FILE_NAME = "file_name" + FILE_PATH = "file_path" + EXECUTION_SOURCE = "execution_source" + TOOL = "tool" + EXTRACT = "EXTRACT" + SUMMARIZE = "SUMMARIZE" + METADATA = "metadata" + METRICS = "metrics" + INDEXING = "indexing" + OUTPUT = "output" + CONTEXT = "context" + DATA = "data" + LLM_ADAPTER_INSTANCE_ID = "llm_adapter_instance_id" + PROMPT_KEYS = "prompt_keys" + LLM_PROFILE_ID = "llm_profile_id" + CUSTOM_DATA = "custom_data" + SINGLE_PASS_EXTRACTION_MODE = "single_pass_extraction_mode" + CHALLENGE_LLM_ADAPTER_ID = "challenge_llm_adapter_id" + + +# ----------------------------------------------------------------------- +# Standalone helper functions (extracted from StructureTool methods) +# ----------------------------------------------------------------------- + + +def _apply_profile_overrides( + tool_metadata: dict, profile_data: dict +) -> list[str]: + """Apply profile overrides to tool metadata. + + Standalone version of StructureTool._apply_profile_overrides. + """ + changes: list[str] = [] + + profile_to_tool_mapping = { + "chunk_overlap": "chunk-overlap", + "chunk_size": "chunk-size", + "embedding_model_id": "embedding", + "llm_id": "llm", + "similarity_top_k": "similarity-top-k", + "vector_store_id": "vector-db", + "x2text_id": "x2text_adapter", + "retrieval_strategy": "retrieval-strategy", + } + + if "tool_settings" in tool_metadata: + changes.extend( + _override_section( + tool_metadata["tool_settings"], + profile_data, + profile_to_tool_mapping, + "tool_settings", + ) + ) + + if "outputs" in tool_metadata: + for i, output in enumerate(tool_metadata["outputs"]): + output_name = output.get("name", f"output_{i}") + changes.extend( + _override_section( + output, + profile_data, + profile_to_tool_mapping, + f"output[{output_name}]", + ) + ) + + return changes + + +def _override_section( + section: dict, + profile_data: dict, + mapping: dict, + section_name: str = "section", +) -> list[str]: + """Override values in a section using profile data.""" + changes: list[str] = [] + for profile_key, section_key in mapping.items(): + if profile_key in profile_data and section_key in section: + old_value = section[section_key] + new_value = profile_data[profile_key] + if old_value != new_value: + section[section_key] = new_value + change_desc = ( + f"{section_name}.{section_key}: {old_value} -> {new_value}" + ) + changes.append(change_desc) + logger.info("Overrode %s", change_desc) + return changes + + +def _should_skip_extraction_for_smart_table( + input_file: str, outputs: list[dict[str, Any]] +) -> bool: + """Check if extraction and indexing should be skipped for smart table. + + Standalone version of StructureTool._should_skip_extraction_for_smart_table. + """ + for output in outputs: + if _SK.TABLE_SETTINGS in output: + prompt = output.get(_SK.PROMPT, "") + if prompt and isinstance(prompt, str): + try: + schema_data = json.loads(prompt) + if schema_data and isinstance(schema_data, dict): + return True + except (json.JSONDecodeError, ValueError) as e: + logger.warning( + "Failed to parse prompt as JSON for smart table: %s", e + ) + continue + return False + + +def _merge_metrics(metrics1: dict, metrics2: dict) -> dict: + """Merge two metrics dicts, combining sub-dicts for shared keys.""" + merged: dict = {} + all_keys = set(metrics1) | set(metrics2) + for key in all_keys: + if ( + key in metrics1 + and key in metrics2 + and isinstance(metrics1[key], dict) + and isinstance(metrics2[key], dict) + ): + merged[key] = {**metrics1[key], **metrics2[key]} + elif key in metrics1: + merged[key] = metrics1[key] + else: + merged[key] = metrics2[key] + return merged + + +# ----------------------------------------------------------------------- +# Main Celery task +# ----------------------------------------------------------------------- + + +@app.task(bind=True, name=str(TaskName.EXECUTE_STRUCTURE_TOOL)) +def execute_structure_tool(self, params: dict) -> dict: + """Execute structure tool as a Celery task. + + Replicates StructureTool.run() from tools/structure/src/main.py + but uses ExecutionDispatcher instead of PromptTool HTTP calls. + + Args: + params: Dict with keys described in the Phase 3 plan. + + Returns: + Dict with {"success": bool, "data": dict, "error": str|None}. + """ + try: + return _execute_structure_tool_impl(params) + except Exception as e: + logger.error("Structure tool task failed: %s", e, exc_info=True) + return ExecutionResult.failure( + error=f"Structure tool failed: {e}" + ).to_dict() + + +def _execute_structure_tool_impl(params: dict) -> dict: + """Implementation of the structure tool pipeline. + + Separated from the task function for testability. + """ + # ---- Unpack params ---- + organization_id = params["organization_id"] + workflow_id = params.get("workflow_id", "") + execution_id = params.get("execution_id", "") + file_execution_id = params["file_execution_id"] + tool_instance_metadata = params["tool_instance_metadata"] + platform_service_api_key = params["platform_service_api_key"] + input_file_path = params["input_file_path"] + output_dir_path = params["output_dir_path"] + source_file_name = params["source_file_name"] + execution_data_dir = params["execution_data_dir"] + file_hash = params.get("file_hash", "") + exec_metadata = params.get("exec_metadata", {}) + + # ---- Step 1: Setup ---- + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key=platform_service_api_key) + + platform_helper = _create_platform_helper(shim, file_execution_id) + dispatcher = ExecutionDispatcher(celery_app=app) + fs = _get_file_storage() + + # ---- Step 2: Fetch tool metadata ---- + prompt_registry_id = tool_instance_metadata.get( + _SK.PROMPT_REGISTRY_ID, "" + ) + logger.info( + "Fetching exported tool with UUID '%s'", prompt_registry_id + ) + + tool_metadata, is_agentic = _fetch_tool_metadata( + platform_helper, prompt_registry_id + ) + + # ---- Route agentic vs regular ---- + if is_agentic: + return _run_agentic_extraction( + tool_metadata=tool_metadata, + input_file_path=input_file_path, + output_dir_path=output_dir_path, + tool_instance_metadata=tool_instance_metadata, + dispatcher=dispatcher, + shim=shim, + platform_helper=platform_helper, + file_execution_id=file_execution_id, + organization_id=organization_id, + source_file_name=source_file_name, + fs=fs, + ) + + # ---- Step 3: Profile overrides ---- + _handle_profile_overrides( + exec_metadata, platform_helper, tool_metadata + ) + + # ---- Extract settings from tool_metadata ---- + settings = tool_instance_metadata + is_challenge_enabled = settings.get(_SK.ENABLE_CHALLENGE, False) + is_summarization_enabled = settings.get(_SK.SUMMARIZE_AS_SOURCE, False) + is_single_pass_enabled = settings.get( + _SK.SINGLE_PASS_EXTRACTION_MODE, False + ) + challenge_llm = settings.get(_SK.CHALLENGE_LLM_ADAPTER_ID, "") + is_highlight_enabled = settings.get(_SK.ENABLE_HIGHLIGHT, False) + + tool_id = tool_metadata[_SK.TOOL_ID] + tool_settings = tool_metadata[_SK.TOOL_SETTINGS] + outputs = tool_metadata[_SK.OUTPUTS] + + # Inject workflow-level settings into tool_settings + tool_settings[_SK.CHALLENGE_LLM] = challenge_llm + tool_settings[_SK.ENABLE_CHALLENGE] = is_challenge_enabled + tool_settings[_SK.ENABLE_SINGLE_PASS_EXTRACTION] = is_single_pass_enabled + tool_settings[_SK.SUMMARIZE_AS_SOURCE] = is_summarization_enabled + tool_settings[_SK.ENABLE_HIGHLIGHT] = is_highlight_enabled + + _, file_name = os.path.split(input_file_path) + if is_summarization_enabled: + file_name = _SK.SUMMARIZE + + execution_run_data_folder = Path(execution_data_dir) + extracted_input_file = str(execution_run_data_folder / _SK.EXTRACT) + + # ---- Step 4: Build payload ---- + custom_data = exec_metadata.get(_SK.CUSTOM_DATA, {}) + payload = { + _SK.RUN_ID: file_execution_id, + _SK.EXECUTION_ID: execution_id, + _SK.TOOL_SETTINGS: tool_settings, + _SK.OUTPUTS: outputs, + _SK.TOOL_ID: tool_id, + _SK.FILE_HASH: file_hash, + _SK.FILE_NAME: file_name, + _SK.FILE_PATH: extracted_input_file, + _SK.EXECUTION_SOURCE: _SK.TOOL, + _SK.CUSTOM_DATA: custom_data, + "PLATFORM_SERVICE_API_KEY": platform_service_api_key, + } + + # ---- Step 5: Extract ---- + skip_extraction_and_indexing = _should_skip_extraction_for_smart_table( + input_file_path, outputs + ) + + extracted_text = "" + usage_kwargs: dict[Any, Any] = {} + if skip_extraction_and_indexing: + logger.info( + "Skipping extraction and indexing for Excel table " + "with valid JSON schema" + ) + else: + logger.info("Extracting document '%s'", source_file_name) + usage_kwargs[UsageKwargs.RUN_ID] = file_execution_id + usage_kwargs[UsageKwargs.FILE_NAME] = source_file_name + usage_kwargs[UsageKwargs.EXECUTION_ID] = execution_id + + extract_ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + "x2text_instance_id": tool_settings[_SK.X2TEXT_ADAPTER], + "file_path": input_file_path, + "enable_highlight": is_highlight_enabled, + "output_file_path": str( + execution_run_data_folder / _SK.EXTRACT + ), + "platform_api_key": platform_service_api_key, + "usage_kwargs": usage_kwargs, + "tags": exec_metadata.get("tags"), + "tool_execution_metadata": exec_metadata, + "execution_data_dir": str(execution_run_data_folder), + }, + ) + extract_result = dispatcher.dispatch( + extract_ctx, timeout=EXECUTOR_TIMEOUT + ) + if not extract_result.success: + return extract_result.to_dict() + extracted_text = extract_result.data.get("extracted_text", "") + + # ---- Step 6: Summarize (if enabled) ---- + index_metrics: dict = {} + if is_summarization_enabled: + summarize_file_path, summarize_file_hash = _summarize( + tool_settings=tool_settings, + tool_data_dir=execution_run_data_folder, + dispatcher=dispatcher, + outputs=outputs, + usage_kwargs=usage_kwargs, + file_execution_id=file_execution_id, + organization_id=organization_id, + platform_service_api_key=platform_service_api_key, + fs=fs, + ) + payload[_SK.FILE_HASH] = summarize_file_hash + payload[_SK.FILE_PATH] = summarize_file_path + elif skip_extraction_and_indexing: + # Use source file directly for Excel with valid JSON + payload[_SK.FILE_PATH] = input_file_path + elif not is_single_pass_enabled: + # ---- Step 7: Index ---- + index_metrics = _index_documents( + outputs=outputs, + tool_settings=tool_settings, + tool_id=tool_id, + file_hash=file_hash, + extracted_text=extracted_text, + execution_run_data_folder=execution_run_data_folder, + is_highlight_enabled=is_highlight_enabled, + dispatcher=dispatcher, + file_execution_id=file_execution_id, + organization_id=organization_id, + platform_service_api_key=platform_service_api_key, + ) + + # ---- Step 8: Answer prompt (or single pass) ---- + if is_single_pass_enabled: + logger.info("Fetching response for single pass extraction...") + operation = "single_pass_extraction" + else: + # Handle table_settings injection + for output in outputs: + if _SK.TABLE_SETTINGS in output: + table_settings = output[_SK.TABLE_SETTINGS] + is_directory_mode = table_settings.get( + _SK.IS_DIRECTORY_MODE, False + ) + if skip_extraction_and_indexing: + table_settings[_SK.INPUT_FILE] = input_file_path + payload[_SK.FILE_PATH] = input_file_path + else: + table_settings[_SK.INPUT_FILE] = extracted_input_file + table_settings[_SK.IS_DIRECTORY_MODE] = is_directory_mode + logger.info( + "Performing table extraction with: %s", table_settings + ) + output[_SK.TABLE_SETTINGS] = table_settings + + logger.info( + "Fetching responses for '%d' prompt(s)...", len(outputs) + ) + operation = "answer_prompt" + + answer_ctx = ExecutionContext( + executor_name="legacy", + operation=operation, + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params=payload, + ) + answer_result = dispatcher.dispatch(answer_ctx, timeout=EXECUTOR_TIMEOUT) + if not answer_result.success: + return answer_result.to_dict() + + structured_output = answer_result.data + + # ---- Step 9: Post-process and write output ---- + # Ensure metadata section exists + if _SK.METADATA not in structured_output: + structured_output[_SK.METADATA] = {} + + structured_output[_SK.METADATA][_SK.FILE_NAME] = source_file_name + + # Add extracted text for HITL raw view + if extracted_text: + structured_output[_SK.METADATA]["extracted_text"] = extracted_text + logger.info( + "Added extracted text to metadata (length: %d characters)", + len(extracted_text), + ) + + # Merge index metrics + if merged_metrics := _merge_metrics( + structured_output.get(_SK.METRICS, {}), index_metrics + ): + structured_output[_SK.METRICS] = merged_metrics + + # Write output JSON + try: + output_path = ( + Path(output_dir_path) + / f"{Path(source_file_name).stem}.json" + ) + logger.info("Writing output to %s", output_path) + fs.json_dump(path=output_path, data=structured_output) + logger.info("Output written successfully to workflow storage") + except (OSError, json.JSONDecodeError) as e: + return ExecutionResult.failure( + error=f"Error writing output file: {e}" + ).to_dict() + + # Write tool result to METADATA.json + _write_tool_result(fs, execution_data_dir, structured_output) + + return ExecutionResult( + success=True, data=structured_output + ).to_dict() + + +# ----------------------------------------------------------------------- +# Helper functions for the pipeline steps +# ----------------------------------------------------------------------- + + +def _create_platform_helper(shim, request_id: str): + """Create PlatformHelper using env vars for host/port.""" + from unstract.sdk1.platform import PlatformHelper + + return PlatformHelper( + tool=shim, + platform_host=os.environ.get(ToolEnv.PLATFORM_HOST, ""), + platform_port=os.environ.get(ToolEnv.PLATFORM_PORT, ""), + request_id=request_id, + ) + + +def _get_file_storage(): + """Get workflow execution file storage instance.""" + from unstract.filesystem import FileStorageType, FileSystem + + return FileSystem(FileStorageType.WORKFLOW_EXECUTION).get_file_storage() + + +def _fetch_tool_metadata( + platform_helper, prompt_registry_id: str +) -> tuple[dict, bool]: + """Fetch tool metadata from platform, trying prompt studio then agentic. + + Returns: + Tuple of (tool_metadata dict, is_agentic bool). + + Raises: + RuntimeError: If neither registry returns valid metadata. + """ + exported_tool = None + try: + exported_tool = platform_helper.get_prompt_studio_tool( + prompt_registry_id=prompt_registry_id + ) + except Exception as e: + logger.info( + "Not found as prompt studio project, trying agentic: %s", e + ) + + if exported_tool and _SK.TOOL_METADATA in exported_tool: + tool_metadata = exported_tool[_SK.TOOL_METADATA] + tool_metadata["is_agentic"] = False + return tool_metadata, False + + # Try agentic registry + try: + agentic_tool = platform_helper.get_agentic_studio_tool( + agentic_registry_id=prompt_registry_id + ) + if not agentic_tool or _SK.TOOL_METADATA not in agentic_tool: + raise RuntimeError( + f"Registry returned empty response for {prompt_registry_id}" + ) + tool_metadata = agentic_tool[_SK.TOOL_METADATA] + tool_metadata["is_agentic"] = True + logger.info( + "Retrieved agentic project: %s", + tool_metadata.get("name", prompt_registry_id), + ) + return tool_metadata, True + except Exception as agentic_error: + raise RuntimeError( + f"Error fetching project from both registries " + f"for ID '{prompt_registry_id}': {agentic_error}" + ) from agentic_error + + +def _handle_profile_overrides( + exec_metadata: dict, platform_helper, tool_metadata: dict +) -> None: + """Apply LLM profile overrides if configured.""" + llm_profile_id = exec_metadata.get(_SK.LLM_PROFILE_ID) + if not llm_profile_id: + return + + try: + llm_profile = platform_helper.get_llm_profile(llm_profile_id) + if llm_profile: + profile_name = llm_profile.get( + "profile_name", llm_profile_id + ) + logger.info( + "Applying profile overrides from profile: %s", + profile_name, + ) + changes = _apply_profile_overrides(tool_metadata, llm_profile) + if changes: + logger.info( + "Profile overrides applied. Changes: %s", + "; ".join(changes), + ) + else: + logger.info( + "Profile overrides applied - no changes needed" + ) + except Exception as e: + raise RuntimeError( + f"Error applying profile overrides: {e}" + ) from e + + +def _summarize( + tool_settings: dict, + tool_data_dir: Path, + dispatcher: ExecutionDispatcher, + outputs: list[dict], + usage_kwargs: dict, + file_execution_id: str, + organization_id: str, + platform_service_api_key: str, + fs: Any, +) -> tuple[str, str]: + """Summarize the document, with filesystem caching. + + Returns: + Tuple of (summarize_file_path, summarize_file_hash). + """ + llm_adapter_instance_id = tool_settings[_SK.LLM] + embedding_instance_id = tool_settings[_SK.EMBEDDING] + vector_db_instance_id = tool_settings[_SK.VECTOR_DB] + x2text_instance_id = tool_settings[_SK.X2TEXT_ADAPTER] + summarize_prompt = tool_settings[_SK.SUMMARIZE_PROMPT] + run_id = usage_kwargs.get(UsageKwargs.RUN_ID, file_execution_id) + extract_file_path = tool_data_dir / _SK.EXTRACT + summarize_file_path = tool_data_dir / _SK.SUMMARIZE + + # Check cache + summarized_context = "" + logger.info( + "Checking if summarized context exists at '%s'...", + summarize_file_path, + ) + if fs.exists(summarize_file_path): + summarized_context = fs.read(path=summarize_file_path, mode="r") + + if not summarized_context: + context = fs.read(path=extract_file_path, mode="r") + prompt_keys = [] + for output in outputs: + prompt_keys.append(output[_SK.NAME]) + output[_SK.EMBEDDING] = embedding_instance_id + output[_SK.VECTOR_DB] = vector_db_instance_id + output[_SK.X2TEXT_ADAPTER] = x2text_instance_id + output[_SK.CHUNK_SIZE] = 0 + output[_SK.CHUNK_OVERLAP] = 0 + + logger.info("Summarized context not found, summarizing...") + summarize_ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id=run_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + _SK.LLM_ADAPTER_INSTANCE_ID: llm_adapter_instance_id, + _SK.SUMMARIZE_PROMPT: summarize_prompt, + _SK.CONTEXT: context, + _SK.PROMPT_KEYS: prompt_keys, + "PLATFORM_SERVICE_API_KEY": platform_service_api_key, + }, + ) + summarize_result = dispatcher.dispatch( + summarize_ctx, timeout=EXECUTOR_TIMEOUT + ) + if not summarize_result.success: + raise RuntimeError( + f"Summarization failed: {summarize_result.error}" + ) + summarized_context = summarize_result.data.get(_SK.DATA, "") + logger.info( + "Writing summarized context to '%s'", summarize_file_path + ) + fs.write( + path=summarize_file_path, mode="w", data=summarized_context + ) + + summarize_file_hash = fs.get_hash_from_file(path=summarize_file_path) + return str(summarize_file_path), summarize_file_hash + + +def _index_documents( + outputs: list[dict], + tool_settings: dict, + tool_id: str, + file_hash: str, + extracted_text: str, + execution_run_data_folder: Path, + is_highlight_enabled: bool, + dispatcher: ExecutionDispatcher, + file_execution_id: str, + organization_id: str, + platform_service_api_key: str, +) -> dict: + """Index documents with dedup on parameter combinations. + + Returns: + Dict of index metrics per output name. + """ + import datetime + + index_metrics: dict = {} + seen_params: set = set() + + for output in outputs: + chunk_size = output[_SK.CHUNK_SIZE] + chunk_overlap = output[_SK.CHUNK_OVERLAP] + vector_db = tool_settings[_SK.VECTOR_DB] + embedding = tool_settings[_SK.EMBEDDING] + x2text = tool_settings[_SK.X2TEXT_ADAPTER] + + param_key = ( + f"chunk_size={chunk_size}_" + f"chunk_overlap={chunk_overlap}_" + f"vector_db={vector_db}_" + f"embedding={embedding}_" + f"x2text={x2text}" + ) + + if chunk_size != 0 and param_key not in seen_params: + seen_params.add(param_key) + + indexing_start_time = datetime.datetime.now() + logger.info( + "Indexing document with: chunk_size=%s, " + "chunk_overlap=%s, vector_db=%s, embedding=%s, " + "x2text=%s", + chunk_size, + chunk_overlap, + vector_db, + embedding, + x2text, + ) + + index_ctx = ExecutionContext( + executor_name="legacy", + operation="index", + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + "embedding_instance_id": embedding, + "vector_db_instance_id": vector_db, + "x2text_instance_id": x2text, + "chunk_size": chunk_size, + "chunk_overlap": chunk_overlap, + "file_path": str( + execution_run_data_folder / _SK.EXTRACT + ), + "reindex": True, + "tool_id": tool_id, + "file_hash": file_hash, + "enable_highlight": is_highlight_enabled, + "extracted_text": extracted_text, + "platform_api_key": platform_service_api_key, + }, + ) + index_result = dispatcher.dispatch( + index_ctx, timeout=EXECUTOR_TIMEOUT + ) + if not index_result.success: + logger.warning( + "Indexing failed for param combo %s: %s", + param_key, + index_result.error, + ) + + elapsed = ( + datetime.datetime.now() - indexing_start_time + ).total_seconds() + index_metrics[output[_SK.NAME]] = { + _SK.INDEXING: {"time_taken(s)": elapsed} + } + + return index_metrics + + +def _run_agentic_extraction( + tool_metadata: dict, + input_file_path: str, + output_dir_path: str, + tool_instance_metadata: dict, + dispatcher: ExecutionDispatcher, + shim: Any, + platform_helper: Any, + file_execution_id: str, + organization_id: str, + source_file_name: str, + fs: Any, +) -> dict: + """Execute agentic extraction pipeline via dispatcher. + + Currently returns failure since the agentic extraction plugin + is not yet available in the executor worker. + """ + agentic_ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_extraction", + run_id=file_execution_id, + execution_source="tool", + organization_id=organization_id, + request_id=file_execution_id, + executor_params={ + "tool_metadata": tool_metadata, + "input_file_path": input_file_path, + "tool_instance_metadata": tool_instance_metadata, + }, + ) + agentic_result = dispatcher.dispatch( + agentic_ctx, timeout=EXECUTOR_TIMEOUT + ) + return agentic_result.to_dict() + + +def _write_tool_result( + fs: Any, execution_data_dir: str, data: dict +) -> None: + """Write tool result to METADATA.json (matches BaseTool.write_tool_result).""" + try: + metadata_path = Path(execution_data_dir) / "METADATA.json" + + # Read existing metadata if present + existing: dict = {} + if fs.exists(metadata_path): + try: + existing_raw = fs.read(path=metadata_path, mode="r") + if existing_raw: + existing = json.loads(existing_raw) + except Exception: + pass + + # Add tool result + existing["tool_result"] = data + fs.write( + path=metadata_path, + mode="w", + data=json.dumps(existing, indent=2), + ) + except Exception as e: + logger.warning("Failed to write tool result to METADATA.json: %s", e) diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index cdf7e9538d..42afe9c91c 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -35,6 +35,7 @@ declare -A WORKERS=( ["log-consumer"]="log_consumer" ["scheduler"]="scheduler" ["schedule"]="scheduler" + ["executor"]="executor" ["all"]="all" ) @@ -51,6 +52,7 @@ declare -A WORKER_QUEUES=( ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["log_consumer"]="celery_log_task_queue" ["scheduler"]="scheduler" + ["executor"]="executor" ) # Worker health ports @@ -62,6 +64,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" + ["executor"]="8088" ) # Function to print colored output @@ -196,6 +199,7 @@ detect_worker_type_from_args() { *"notifications"*) echo "notification" ;; *"celery_log_task_queue"*) echo "log_consumer" ;; *"scheduler"*) echo "scheduler" ;; + *"executor"*) echo "executor" ;; *"celery"*) echo "general" ;; *) echo "general" ;; # fallback esac @@ -259,6 +263,9 @@ run_worker() { "scheduler") queues="${CELERY_QUEUES_SCHEDULER:-$queues}" ;; + "executor") + queues="${CELERY_QUEUES_EXECUTOR:-$queues}" + ;; esac # Get health port @@ -294,6 +301,10 @@ run_worker() { export SCHEDULER_HEALTH_PORT="${health_port}" export SCHEDULER_METRICS_PORT="${health_port}" ;; + "executor") + export EXECUTOR_HEALTH_PORT="${health_port}" + export EXECUTOR_METRICS_PORT="${health_port}" + ;; *) # Default for pluggable workers local worker_type_upper=$(echo "$worker_type" | tr '[:lower:]' '[:upper:]' | tr '-' '_') @@ -326,6 +337,9 @@ run_worker() { "scheduler") concurrency="${WORKER_SCHEDULER_CONCURRENCY:-2}" ;; + "executor") + concurrency="${WORKER_EXECUTOR_CONCURRENCY:-2}" + ;; *) # Default for pluggable workers or unknown types local worker_type_upper=$(echo "$worker_type" | tr '[:lower:]' '[:upper:]' | tr '-' '_') @@ -534,6 +548,10 @@ if [[ "$1" == *"celery"* ]] || [[ "$1" == *".venv"* ]]; then export SCHEDULER_HEALTH_PORT="8087" export SCHEDULER_METRICS_PORT="8087" ;; + "executor") + export EXECUTOR_HEALTH_PORT="8088" + export EXECUTOR_METRICS_PORT="8088" + ;; *) # Default for pluggable workers - use dynamic port from WORKER_HEALTH_PORTS health_port="${WORKER_HEALTH_PORTS[$WORKER_TYPE]:-8090}" diff --git a/workers/run-worker.sh b/workers/run-worker.sh index 152a72d859..d974be3955 100755 --- a/workers/run-worker.sh +++ b/workers/run-worker.sh @@ -37,6 +37,7 @@ declare -A WORKERS=( ["notify"]="notification" ["scheduler"]="scheduler" ["schedule"]="scheduler" + ["executor"]="executor" ["all"]="all" ) @@ -52,6 +53,7 @@ declare -A WORKER_QUEUES=( ["log_consumer"]="celery_log_task_queue" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["scheduler"]="scheduler" + ["executor"]="executor" ) # Worker health ports @@ -63,6 +65,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" + ["executor"]="8088" ) # Function to display usage @@ -80,6 +83,7 @@ WORKER_TYPE: log, log-consumer Run log consumer worker notification, notify Run notification worker scheduler, schedule Run scheduler worker (scheduled pipeline tasks) + executor Run executor worker (extraction execution tasks) all Run all workers (in separate processes, includes auto-discovered pluggable workers) Note: Pluggable workers in pluggable_worker/ directory are automatically discovered and can be run by name. @@ -147,6 +151,7 @@ HEALTH CHECKS: - Log Consumer: http://localhost:8084/health - Notification: http://localhost:8085/health - Scheduler: http://localhost:8087/health + - Executor: http://localhost:8088/health - Pluggable workers: http://localhost:8090+/health (auto-assigned ports) EOF @@ -301,7 +306,7 @@ show_status() { print_status $BLUE "Worker Status:" echo "==============" - local workers_to_check="api-deployment general file_processing callback log_consumer notification scheduler" + local workers_to_check="api-deployment general file_processing callback log_consumer notification scheduler executor" # Add discovered pluggable workers if [[ ${#PLUGGABLE_WORKERS[@]} -gt 0 ]]; then @@ -405,6 +410,9 @@ run_worker() { "scheduler") export SCHEDULER_HEALTH_PORT="$health_port" ;; + "executor") + export EXECUTOR_HEALTH_PORT="$health_port" + ;; *) # Handle pluggable workers dynamically if [[ -n "${PLUGGABLE_WORKERS[$worker_type]:-}" ]]; then @@ -478,6 +486,9 @@ run_worker() { "scheduler") cmd_args+=("--concurrency=2") ;; + "executor") + cmd_args+=("--concurrency=2") + ;; *) # Default for pluggable and other workers if [[ -n "${PLUGGABLE_WORKERS[$worker_type]:-}" ]]; then @@ -525,7 +536,7 @@ run_all_workers() { print_status $GREEN "Starting all workers..." # Define core workers - local core_workers="api-deployment general file_processing callback log_consumer notification scheduler" + local core_workers="api-deployment general file_processing callback log_consumer notification scheduler executor" # Add discovered pluggable workers if [[ ${#PLUGGABLE_WORKERS[@]} -gt 0 ]]; then diff --git a/workers/shared/enums/task_enums.py b/workers/shared/enums/task_enums.py index 5f57913cd9..6f3fa1cdd7 100644 --- a/workers/shared/enums/task_enums.py +++ b/workers/shared/enums/task_enums.py @@ -33,6 +33,12 @@ class TaskName(str, Enum): # API deployment worker tasks CHECK_API_DEPLOYMENT_STATUS = "check_api_deployment_status" + # Structure tool task (runs in file_processing worker) + EXECUTE_STRUCTURE_TOOL = "execute_structure_tool" + + # Executor worker tasks + EXECUTE_EXTRACTION = "execute_extraction" + def __str__(self): """Return enum value for Celery task naming.""" return self.value diff --git a/workers/shared/enums/worker_enums_base.py b/workers/shared/enums/worker_enums_base.py index babc19512f..3f1c844fd5 100644 --- a/workers/shared/enums/worker_enums_base.py +++ b/workers/shared/enums/worker_enums_base.py @@ -23,6 +23,7 @@ class WorkerType(str, Enum): NOTIFICATION = "notification" LOG_CONSUMER = "log_consumer" SCHEDULER = "scheduler" + EXECUTOR = "executor" @classmethod def from_directory_name(cls, name: str) -> "WorkerType": @@ -110,6 +111,7 @@ def to_health_port(self) -> int: WorkerType.NOTIFICATION: 8085, WorkerType.LOG_CONSUMER: 8086, WorkerType.SCHEDULER: 8087, + WorkerType.EXECUTOR: 8088, } return port_mapping.get(self, 8080) @@ -147,6 +149,9 @@ class QueueName(str, Enum): # Scheduler queue SCHEDULER = "scheduler" + # Executor queue + EXECUTOR = "executor" + def to_env_var_name(self) -> str: """Convert queue name to environment variable name. diff --git a/workers/shared/infrastructure/config/registry.py b/workers/shared/infrastructure/config/registry.py index 37ad1c08b9..8d1b208032 100644 --- a/workers/shared/infrastructure/config/registry.py +++ b/workers/shared/infrastructure/config/registry.py @@ -64,6 +64,9 @@ class WorkerRegistry: WorkerType.SCHEDULER: WorkerQueueConfig( primary_queue=QueueName.SCHEDULER, additional_queues=[QueueName.GENERAL] ), + WorkerType.EXECUTOR: WorkerQueueConfig( + primary_queue=QueueName.EXECUTOR, + ), } # Pluggable worker configurations loaded dynamically @@ -134,6 +137,13 @@ class WorkerRegistry: TaskRoute("scheduler.tasks.*", QueueName.SCHEDULER), ], ), + WorkerType.EXECUTOR: WorkerTaskRouting( + worker_type=WorkerType.EXECUTOR, + routes=[ + TaskRoute("execute_extraction", QueueName.EXECUTOR), + TaskRoute("executor.tasks.*", QueueName.EXECUTOR), + ], + ), } # Pluggable worker task routes loaded dynamically @@ -171,6 +181,9 @@ class WorkerRegistry: WorkerType.SCHEDULER: { "log_level": "INFO", }, + WorkerType.EXECUTOR: { + "log_level": "INFO", + }, } # Pluggable worker logging configs loaded dynamically diff --git a/workers/shared/workflow/execution/service.py b/workers/shared/workflow/execution/service.py index b19c6c2eaf..d9ff373737 100644 --- a/workers/shared/workflow/execution/service.py +++ b/workers/shared/workflow/execution/service.py @@ -971,17 +971,88 @@ def _prepare_workflow_input_file( def _build_and_execute_workflow( self, execution_service: WorkflowExecutionService, file_name: str ) -> None: - """Build and execute the workflow.""" - # Build workflow - execution_service.build_workflow() - logger.info(f"Workflow built successfully for file {file_name}") + """Build and execute the workflow. - # Execute workflow - from unstract.workflow_execution.enums import ExecutionType + Detects structure tool workflows and routes them to the Celery-based + execute_structure_tool task instead of the Docker container flow. + """ + if self._is_structure_tool_workflow(execution_service): + self._execute_structure_tool_workflow(execution_service, file_name) + else: + # Original Docker-based flow (unchanged) + execution_service.build_workflow() + logger.info(f"Workflow built successfully for file {file_name}") + + from unstract.workflow_execution.enums import ExecutionType - execution_service.execute_workflow(ExecutionType.COMPLETE) + execution_service.execute_workflow(ExecutionType.COMPLETE) logger.info(f"Workflow executed successfully for file {file_name}") + def _is_structure_tool_workflow( + self, execution_service: WorkflowExecutionService + ) -> bool: + """Check if workflow uses the structure tool.""" + structure_image = os.environ.get( + "STRUCTURE_TOOL_IMAGE_NAME", "unstract/tool-structure" + ) + for ti in execution_service.tool_instances: + if ti.image_name == structure_image: + return True + return False + + def _execute_structure_tool_workflow( + self, execution_service: WorkflowExecutionService, file_name: str + ) -> None: + """Execute structure tool as Celery task instead of Docker container. + + Calls execute_structure_tool directly (same process, in-band). + Only the inner ExecutionDispatcher calls go through Celery to + the executor worker. + """ + from file_processing.structure_tool_task import ( + execute_structure_tool as _execute_structure_tool, + ) + + tool_instance = execution_service.tool_instances[0] + file_handler = execution_service.file_handler + + # Read metadata from METADATA.json for file_hash and exec_metadata + metadata = {} + try: + metadata = file_handler.get_workflow_metadata() + except Exception as e: + logger.warning(f"Could not read workflow metadata: {e}") + + params = { + "organization_id": execution_service.organization_id, + "workflow_id": execution_service.workflow_id, + "execution_id": execution_service.execution_id, + "file_execution_id": execution_service.file_execution_id, + "tool_instance_metadata": tool_instance.metadata, + "platform_service_api_key": execution_service.platform_service_api_key, + "input_file_path": str(file_handler.infile), + "output_dir_path": str(file_handler.execution_dir), + "source_file_name": str( + os.path.basename(file_handler.source_file) + if file_handler.source_file + else file_name + ), + "execution_data_dir": str(file_handler.file_execution_dir), + "messaging_channel": getattr( + execution_service, "messaging_channel", "" + ), + "file_hash": metadata.get("source_hash", ""), + "exec_metadata": metadata, + } + + # Call synchronously (same process, in-band) + result = _execute_structure_tool(params) + + if not result.get("success"): + raise Exception( + f"Structure tool failed: {result.get('error', 'Unknown error')}" + ) + def _extract_source_connector_details( self, source_config: dict[str, Any] | None ) -> tuple[str | None, dict[str, Any]]: diff --git a/workers/tests/__init__.py b/workers/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/workers/tests/conftest.py b/workers/tests/conftest.py new file mode 100644 index 0000000000..084a8ef88c --- /dev/null +++ b/workers/tests/conftest.py @@ -0,0 +1,14 @@ +"""Shared fixtures for workers tests. + +Environment variables are loaded from .env.test at module level +BEFORE any shared package imports. This is required because +shared/constants/api_endpoints.py raises ValueError at import +time if INTERNAL_API_BASE_URL is not set. +""" + +from pathlib import Path + +from dotenv import load_dotenv + +_env_test = Path(__file__).resolve().parent.parent / ".env.test" +load_dotenv(_env_test) diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py new file mode 100644 index 0000000000..97a6bb44ae --- /dev/null +++ b/workers/tests/test_answer_prompt.py @@ -0,0 +1,843 @@ +"""Tests for the answer_prompt pipeline (Phase 2E). + +Tests the _handle_answer_prompt method, AnswerPromptService, +VariableReplacementService, and type conversion logic. +All heavy dependencies (LLM, VectorDB, etc.) are mocked. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + PromptServiceConstants as PSKeys, + RetrievalStrategy, +) +from executor.executors.exceptions import LegacyExecutorError +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_prompt( + name: str = "field_a", + prompt: str = "What is the revenue?", + output_type: str = "text", + chunk_size: int = 512, + chunk_overlap: int = 128, + retrieval_strategy: str = "simple", + llm_id: str = "llm-1", + embedding_id: str = "emb-1", + vector_db_id: str = "vdb-1", + x2text_id: str = "x2t-1", + similarity_top_k: int = 5, +): + """Build a minimal prompt definition dict.""" + return { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: chunk_size, + PSKeys.CHUNK_OVERLAP: chunk_overlap, + PSKeys.RETRIEVAL_STRATEGY: retrieval_strategy, + PSKeys.LLM: llm_id, + PSKeys.EMBEDDING: embedding_id, + PSKeys.VECTOR_DB: vector_db_id, + PSKeys.X2TEXT_ADAPTER: x2text_id, + PSKeys.SIMILARITY_TOP_K: similarity_top_k, + } + + +def _make_context( + prompts=None, + tool_settings=None, + file_hash="abc123", + file_path="/data/doc.txt", + file_name="doc.txt", + execution_source="ide", + platform_api_key="pk-test", + run_id="run-1", +): + """Build an ExecutionContext for answer_prompt.""" + if prompts is None: + prompts = [_make_prompt()] + if tool_settings is None: + tool_settings = {} + + params = { + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_SETTINGS: tool_settings, + PSKeys.TOOL_ID: "tool-1", + PSKeys.EXECUTION_ID: "exec-1", + PSKeys.FILE_HASH: file_hash, + PSKeys.FILE_PATH: file_path, + PSKeys.FILE_NAME: file_name, + PSKeys.LOG_EVENTS_ID: "", + PSKeys.CUSTOM_DATA: {}, + PSKeys.EXECUTION_SOURCE: execution_source, + PSKeys.PLATFORM_SERVICE_API_KEY: platform_api_key, + } + return ExecutionContext( + executor_name="legacy", + operation=Operation.ANSWER_PROMPT.value, + executor_params=params, + run_id=run_id, + execution_source=execution_source, + ) + + +def _mock_llm(): + """Create a mock LLM that returns a configurable answer.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = "test answer" + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 100} + return llm + + +def _mock_deps(llm=None): + """Return a tuple of mocked prompt deps matching _get_prompt_deps().""" + if llm is None: + llm = _mock_llm() + + # AnswerPromptService — use the real class + from executor.executors.answer_prompt import AnswerPromptService + + RetrievalService = MagicMock(name="RetrievalService") + RetrievalService.run_retrieval.return_value = ["chunk1", "chunk2"] + RetrievalService.retrieve_complete_context.return_value = ["full content"] + + VariableReplacementService = MagicMock(name="VariableReplacementService") + VariableReplacementService.is_variables_present.return_value = False + + Index = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-id-1" + Index.return_value = index_instance + + LLM_cls = MagicMock(name="LLM") + LLM_cls.return_value = llm + + EmbeddingCompat = MagicMock(name="EmbeddingCompat") + VectorDB = MagicMock(name="VectorDB") + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM_cls, + EmbeddingCompat, + VectorDB, + ) + + +# --------------------------------------------------------------------------- +# Tests — _handle_answer_prompt +# --------------------------------------------------------------------------- + +class TestHandleAnswerPromptText: + """Tests for TEXT type prompts.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_text_prompt_returns_success(self, mock_shim_cls, mock_deps): + """Simple TEXT prompt returns success with structured output.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context() + result = executor._handle_answer_prompt(ctx) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert PSKeys.METADATA in result.data + assert PSKeys.METRICS in result.data + assert "field_a" in result.data[PSKeys.OUTPUT] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_text_prompt_answer_stored(self, mock_shim_cls, mock_deps): + """The LLM answer is stored in structured_output.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context() + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "test answer" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_trailing_newline_stripped(self, mock_shim_cls, mock_deps): + """Trailing newlines are stripped from text answers.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "answer with trailing\n" + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "answer with trailing" + + +class TestHandleAnswerPromptTypes: + """Tests for type-specific post-processing.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_number_type_converts_to_float(self, mock_shim_cls, mock_deps): + """NUMBER type converts answer to float.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + # First call: main retrieval answer. Second call: number extraction. + response1 = MagicMock() + response1.text = "revenue is $42.5M" + response2 = MagicMock() + response2.text = "42500000" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="number")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == 42500000.0 + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_number_na_returns_none(self, mock_shim_cls, mock_deps): + """NUMBER type with NA answer returns None.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "NA" + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="number")]) + result = executor._handle_answer_prompt(ctx) + + # NA → sanitized to None + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_boolean_yes(self, mock_shim_cls, mock_deps): + """BOOLEAN type converts 'yes' to True.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "The document confirms it" + response2 = MagicMock() + response2.text = "yes" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="boolean")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_boolean_no(self, mock_shim_cls, mock_deps): + """BOOLEAN type converts 'no' to False.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "not confirmed" + response2 = MagicMock() + response2.text = "no" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="boolean")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is False + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_email_type(self, mock_shim_cls, mock_deps): + """EMAIL type extracts email address.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "Contact: user@example.com" + response2 = MagicMock() + response2.text = "user@example.com" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="email")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "user@example.com" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_date_type(self, mock_shim_cls, mock_deps): + """DATE type extracts date in ISO format.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response1 = MagicMock() + response1.text = "The date is January 15, 2024" + response2 = MagicMock() + response2.text = "2024-01-15" + llm.complete.side_effect = [ + {PSKeys.RESPONSE: response1, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + {PSKeys.RESPONSE: response2, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: ""}, + ] + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="date")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == "2024-01-15" + + +class TestHandleAnswerPromptJSON: + """Tests for JSON type handling.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_json_parsed(self, mock_shim_cls, mock_deps): + """JSON type parses valid JSON from answer.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = '{"key": "value"}' + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="json")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] == {"key": "value"} + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_json_na_returns_none(self, mock_shim_cls, mock_deps): + """JSON type with NA answer returns None.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + response = MagicMock() + response.text = "NA" + llm.complete.return_value = { + PSKeys.RESPONSE: response, PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], PSKeys.WHISPER_HASH: "", + } + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context(prompts=[_make_prompt(output_type="json")]) + result = executor._handle_answer_prompt(ctx) + + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + +class TestHandleAnswerPromptRetrieval: + """Tests for retrieval integration.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_chunked_retrieval_uses_run_retrieval( + self, mock_shim_cls, mock_deps + ): + """chunk_size > 0 uses RetrievalService.run_retrieval.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + _, RetrievalService, *_ = deps + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(chunk_size=512)] + ) + result = executor._handle_answer_prompt(ctx) + + RetrievalService.run_retrieval.assert_called_once() + assert result.success is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_complete_context_for_chunk_zero( + self, mock_shim_cls, mock_deps + ): + """chunk_size=0 uses RetrievalService.retrieve_complete_context.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + _, RetrievalService, *_ = deps + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(chunk_size=0)] + ) + result = executor._handle_answer_prompt(ctx) + + RetrievalService.retrieve_complete_context.assert_called_once() + assert result.success is True + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_context_stored_in_metadata(self, mock_shim_cls, mock_deps): + """Retrieved context is stored in metadata.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + metadata = result.data[PSKeys.METADATA] + assert "field_a" in metadata[PSKeys.CONTEXT] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_invalid_strategy_skips_retrieval( + self, mock_shim_cls, mock_deps + ): + """Invalid retrieval strategy skips retrieval, answer stays NA.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(retrieval_strategy="nonexistent")] + ) + result = executor._handle_answer_prompt(ctx) + + # Answer stays "NA" which gets sanitized to None + assert result.data[PSKeys.OUTPUT]["field_a"] is None + + +class TestHandleAnswerPromptMultiPrompt: + """Tests for multi-prompt processing.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_multiple_prompts(self, mock_shim_cls, mock_deps): + """Multiple prompts are all processed.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompts = [ + _make_prompt(name="revenue"), + _make_prompt(name="date_signed", output_type="text"), + ] + executor = LegacyExecutor() + ctx = _make_context(prompts=prompts) + result = executor._handle_answer_prompt(ctx) + + output = result.data[PSKeys.OUTPUT] + assert "revenue" in output + assert "date_signed" in output + + +class TestHandleAnswerPromptErrors: + """Tests for error handling.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_table_type_raises_error(self, mock_shim_cls, mock_deps): + """TABLE type raises LegacyExecutorError (plugins not available).""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(output_type="table")] + ) + # TABLE raises LegacyExecutorError which is caught by execute() + result = executor.execute(ctx) + assert result.success is False + assert "TABLE" in result.error + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_line_item_type_raises_error(self, mock_shim_cls, mock_deps): + """LINE_ITEM type raises LegacyExecutorError.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + ctx = _make_context( + prompts=[_make_prompt(output_type="line-item")] + ) + result = executor.execute(ctx) + assert result.success is False + assert "LINE_ITEM" in result.error + + +class TestHandleAnswerPromptMetrics: + """Tests for metrics collection.""" + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_metrics_collected(self, mock_shim_cls, mock_deps): + """Metrics include context_retrieval and LLM metrics.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + mock_deps.return_value = _mock_deps(llm) + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + result = executor._handle_answer_prompt(_make_context()) + + metrics = result.data[PSKeys.METRICS] + assert "field_a" in metrics + assert "context_retrieval" in metrics["field_a"] + assert "extraction_llm" in metrics["field_a"] + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_vectordb_closed(self, mock_shim_cls, mock_deps): + """VectorDB is closed after processing.""" + from executor.executors.legacy_executor import LegacyExecutor + + llm = _mock_llm() + deps = _mock_deps(llm) + mock_deps.return_value = deps + _, _, _, _, _, _, VectorDB = deps + vdb_instance = MagicMock() + VectorDB.return_value = vdb_instance + mock_shim_cls.return_value = MagicMock() + + executor = LegacyExecutor() + executor._handle_answer_prompt(_make_context()) + + vdb_instance.close.assert_called_once() + + +class TestNullSanitization: + """Tests for _sanitize_null_values.""" + + def test_na_string_becomes_none(self): + """Top-level 'NA' string → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "NA"} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] is None + + def test_na_case_insensitive(self): + """'na' (lowercase) → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "na"} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] is None + + def test_nested_list_na(self): + """NA in nested list items → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": ["value", "NA", "other"]} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == ["value", None, "other"] + + def test_nested_dict_in_list_na(self): + """NA in dicts inside lists → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": [{"a": "NA", "b": "ok"}]} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == [{"a": None, "b": "ok"}] + + def test_nested_dict_na(self): + """NA in nested dict values → None.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": {"a": "NA", "b": "ok"}} + result = LegacyExecutor._sanitize_null_values(output) + assert result["field"] == {"a": None, "b": "ok"} + + def test_non_na_values_untouched(self): + """Non-NA values are not modified.""" + from executor.executors.legacy_executor import LegacyExecutor + + output = {"field": "hello", "num": 42, "flag": True} + result = LegacyExecutor._sanitize_null_values(output) + assert result == {"field": "hello", "num": 42, "flag": True} + + +class TestAnswerPromptServiceUnit: + """Unit tests for AnswerPromptService methods.""" + + def test_extract_variable_replaces_percent_vars(self): + """Replace %var% references in prompt text.""" + from executor.executors.answer_prompt import AnswerPromptService + + structured = {"field_a": "42"} + output = {"prompt": "Original: %field_a%"} + result = AnswerPromptService.extract_variable( + structured, ["field_a"], output, "Value is %field_a%" + ) + assert result == "Value is 42" + + def test_extract_variable_missing_raises(self): + """Missing variable raises ValueError.""" + from executor.executors.answer_prompt import AnswerPromptService + + output = {"prompt": "test"} + with pytest.raises(ValueError, match="not found"): + AnswerPromptService.extract_variable( + {}, ["missing_var"], output, "Value is %missing_var%" + ) + + def test_construct_prompt_includes_all_parts(self): + """Constructed prompt includes preamble, prompt, postamble, context.""" + from executor.executors.answer_prompt import AnswerPromptService + + result = AnswerPromptService.construct_prompt( + preamble="You are a helpful assistant", + prompt="What is the revenue?", + postamble="Be precise", + grammar_list=[], + context="Revenue was $1M", + platform_postamble="", + word_confidence_postamble="", + ) + assert "You are a helpful assistant" in result + assert "What is the revenue?" in result + assert "Be precise" in result + assert "Revenue was $1M" in result + assert "Answer:" in result + + def test_construct_prompt_with_grammar(self): + """Grammar list adds synonym notes.""" + from executor.executors.answer_prompt import AnswerPromptService + + result = AnswerPromptService.construct_prompt( + preamble="", + prompt="Find the amount", + postamble="", + grammar_list=[{"word": "amount", "synonyms": ["sum", "total"]}], + context="test", + platform_postamble="", + word_confidence_postamble="", + ) + assert "amount" in result + assert "sum, total" in result + + +class TestVariableReplacementService: + """Tests for the VariableReplacementService.""" + + def test_is_variables_present_true(self): + """Detects {{variables}} in text.""" + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + + assert VariableReplacementService.is_variables_present( + "Hello {{name}}" + ) is True + + def test_is_variables_present_false(self): + """Returns False when no variables present.""" + from executor.executors.variable_replacement import ( + VariableReplacementService, + ) + + assert VariableReplacementService.is_variables_present( + "Hello world" + ) is False + + def test_replace_static_variable(self): + """Static variable {{var}} is replaced with structured output value.""" + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + result = VariableReplacementHelper.replace_static_variable( + prompt="Total is {{revenue}}", + structured_output={"revenue": "$1M"}, + variable="revenue", + ) + assert result == "Total is $1M" + + def test_custom_data_variable(self): + """Custom data variable {{custom_data.key}} is replaced.""" + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + result = VariableReplacementHelper.replace_custom_data_variable( + prompt="Company: {{custom_data.company_name}}", + variable="custom_data.company_name", + custom_data={"company_name": "Acme Inc"}, + ) + assert result == "Company: Acme Inc" + + def test_custom_data_missing_raises(self): + """Missing custom data key raises CustomDataError.""" + from executor.executors.exceptions import CustomDataError + from executor.executors.variable_replacement import ( + VariableReplacementHelper, + ) + + with pytest.raises(CustomDataError): + VariableReplacementHelper.replace_custom_data_variable( + prompt="{{custom_data.missing}}", + variable="custom_data.missing", + custom_data={"other": "value"}, + ) diff --git a/workers/tests/test_executor_sanity.py b/workers/tests/test_executor_sanity.py new file mode 100644 index 0000000000..25d75dad6d --- /dev/null +++ b/workers/tests/test_executor_sanity.py @@ -0,0 +1,288 @@ +"""Phase 1 Sanity Check — Executor worker integration tests. + +These tests verify the full executor chain works end-to-end. + +Verifies: +1. Worker enums and registry configuration +2. ExecutorToolShim works from workers venv +3. NoOpExecutor registers and executes via orchestrator +4. Celery task wiring (execute_extraction task logic) +5. Full dispatch -> task -> orchestrator -> executor round-trip +6. Retry configuration on the task +""" + +import pytest +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _make_context(**overrides): + defaults = { + "executor_name": "noop", + "operation": "extract", + "run_id": "run-sanity-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-sanity-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _register_noop(): + """Register a NoOpExecutor for testing.""" + + @ExecutorRegistry.register + class NoOpExecutor(BaseExecutor): + @property + def name(self): + return "noop" + + def execute(self, context): + return ExecutionResult( + success=True, + data={"echo": context.operation, "run_id": context.run_id}, + metadata={"executor": self.name}, + ) + + +# --- 1. Worker enums and registry --- + + +class TestWorkerEnumsAndRegistry: + """Verify executor is properly registered in worker infrastructure.""" + + def test_worker_type_executor_exists(self): + from shared.enums.worker_enums import WorkerType + + assert WorkerType.EXECUTOR.value == "executor" + + def test_queue_name_executor_exists(self): + from shared.enums.worker_enums import QueueName + + assert QueueName.EXECUTOR.value == "executor" + + def test_task_name_execute_extraction_exists(self): + from shared.enums.task_enums import TaskName + + assert TaskName.EXECUTE_EXTRACTION.value == "execute_extraction" + + def test_health_port_is_8088(self): + from shared.enums.worker_enums import WorkerType + + assert WorkerType.EXECUTOR.to_health_port() == 8088 + + def test_worker_registry_has_executor_config(self): + from shared.enums.worker_enums import WorkerType + from shared.infrastructure.config.registry import WorkerRegistry + + config = WorkerRegistry.get_queue_config(WorkerType.EXECUTOR) + assert "executor" in config.all_queues() + + def test_task_routing_includes_execute_extraction(self): + from shared.enums.worker_enums import WorkerType + from shared.infrastructure.config.registry import WorkerRegistry + + routing = WorkerRegistry.get_task_routing(WorkerType.EXECUTOR) + patterns = [r.pattern for r in routing.routes] + assert "execute_extraction" in patterns + + +# --- 2. ExecutorToolShim --- + + +class TestExecutorToolShim: + """Verify the real ExecutorToolShim works in the workers venv.""" + + def test_import(self): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + assert shim.platform_api_key == "sk-test" + + def test_platform_key_returned(self): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-real-key") + assert shim.get_env_or_die("PLATFORM_SERVICE_API_KEY") == "sk-real-key" + + def test_env_var_from_environ(self, monkeypatch): + from executor.executor_tool_shim import ExecutorToolShim + + monkeypatch.setenv("TEST_SHIM_VAR", "hello") + shim = ExecutorToolShim(platform_api_key="sk-test") + assert shim.get_env_or_die("TEST_SHIM_VAR") == "hello" + + def test_missing_var_raises(self): + from executor.executor_tool_shim import ExecutorToolShim + from unstract.sdk1.exceptions import SdkError + + shim = ExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="NONEXISTENT"): + shim.get_env_or_die("NONEXISTENT") + + def test_stream_log_does_not_print_json(self, capsys): + """stream_log routes to logging, not stdout JSON.""" + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + shim.stream_log("test message") + captured = capsys.readouterr() + # Should NOT produce JSON on stdout (that's the old protocol) + assert '"type": "LOG"' not in captured.out + + def test_stream_error_raises_sdk_error(self): + from executor.executor_tool_shim import ExecutorToolShim + from unstract.sdk1.exceptions import SdkError + + shim = ExecutorToolShim(platform_api_key="sk-test") + with pytest.raises(SdkError, match="boom"): + shim.stream_error_and_exit("boom") + + +# --- 3. NoOpExecutor via Orchestrator --- + + +class TestNoOpExecutorOrchestrator: + """Verify a NoOpExecutor works through the orchestrator.""" + + def test_noop_executor_round_trip(self): + _register_noop() + + ctx = _make_context(operation="extract") + orchestrator = ExecutionOrchestrator() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data == {"echo": "extract", "run_id": "run-sanity-001"} + + def test_unknown_executor_fails_gracefully(self): + orchestrator = ExecutionOrchestrator() + ctx = _make_context(executor_name="nonexistent") + result = orchestrator.execute(ctx) + + assert result.success is False + assert "nonexistent" in result.error + + +# --- 4 & 5. Full chain with Celery eager mode --- +# +# executor/worker.py imports executor/tasks.py which defines +# execute_extraction as a shared_task. We import the real app, +# configure it for eager mode, and exercise the actual task. + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + + yield app + + app.conf.update(original) + + +class TestCeleryTaskWiring: + """Verify the execute_extraction task configuration.""" + + def test_task_is_registered(self, eager_app): + assert "execute_extraction" in eager_app.tasks + + def test_task_has_retry_config(self, eager_app): + task = eager_app.tasks["execute_extraction"] + assert task.max_retries == 3 + assert ConnectionError in task.autoretry_for + assert TimeoutError in task.autoretry_for + assert OSError in task.autoretry_for + + def test_task_retry_backoff_enabled(self, eager_app): + task = eager_app.tasks["execute_extraction"] + assert task.retry_backoff is True + assert task.retry_jitter is True + + +class TestFullChainEager: + """End-to-end test using Celery's eager mode. + + task_always_eager=True makes tasks execute inline in the + calling process — full chain without a broker. + """ + + def _run_task(self, eager_app, context_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[context_dict]) + return result.get() + + def test_eager_dispatch_round_trip(self, eager_app): + """Execute task inline, verify result comes back.""" + _register_noop() + + ctx = _make_context(operation="answer_prompt", run_id="run-eager") + result_dict = self._run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["echo"] == "answer_prompt" + assert result.data["run_id"] == "run-eager" + assert result.metadata.get("executor") == "noop" + + def test_eager_dispatch_invalid_context(self, eager_app): + """Invalid context dict returns failure result (not exception).""" + result_dict = self._run_task(eager_app, {"bad": "data"}) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "Invalid execution context" in result.error + + def test_eager_dispatch_unknown_executor(self, eager_app): + """Unknown executor returns failure (no unhandled exceptions).""" + ctx = _make_context(executor_name="does_not_exist") + result_dict = self._run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "does_not_exist" in result.error + + def test_result_serialization_round_trip(self, eager_app): + """Verify ExecutionResult survives Celery serialization.""" + _register_noop() + + ctx = _make_context( + operation="single_pass_extraction", + executor_params={"schema": {"name": "str", "age": "int"}}, + ) + result_dict = self._run_task(eager_app, ctx.to_dict()) + + # Verify the raw dict is JSON-compatible + import json + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + + result = ExecutionResult.from_dict(deserialized) + assert result.success is True + assert result.data["echo"] == "single_pass_extraction" diff --git a/workers/tests/test_legacy_executor_extract.py b/workers/tests/test_legacy_executor_extract.py new file mode 100644 index 0000000000..a8218f80cb --- /dev/null +++ b/workers/tests/test_legacy_executor_extract.py @@ -0,0 +1,595 @@ +"""Phase 2B — LegacyExecutor._handle_extract tests. + +Verifies: +1. Happy path: extraction returns success with extracted_text +2. With highlight (LLMWhisperer): enable_highlight passed through +3. Without highlight (non-Whisperer): enable_highlight NOT passed +4. AdapterError → failure result +5. Missing required params → failure result +6. Metadata update for tool source: ToolUtils.dump_json called +7. IDE source skips metadata writing +8. FileUtils routing: correct storage type for ide vs tool +9. Orchestrator integration: extract returns success (mocked) +10. Celery eager-mode: full task chain returns extraction result +11. LegacyExecutorError caught by execute() → failure result +""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + ExecutionSource, + FileStorageKeys, + IndexingConstants as IKeys, +) +from executor.executors.exceptions import ExtractionError, LegacyExecutorError +from unstract.sdk1.adapters.x2text.constants import X2TextConstants +from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, +) +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-2b-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2b-001", + "executor_params": { + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-test-key", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _mock_process_response(extracted_text="hello world", whisper_hash="hash-123"): + """Build a mock TextExtractionResult.""" + metadata = TextExtractionMetadata(whisper_hash=whisper_hash) + return TextExtractionResult( + extracted_text=extracted_text, + extraction_metadata=metadata, + ) + + +# --- 1. Happy path --- + + +class TestHappyPath: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extract_returns_success(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("hello") + mock_x2text.x2text_instance = MagicMock() # not a Whisperer + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "hello" + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extract_passes_correct_params_to_x2text( + self, mock_x2text_cls, mock_get_fs + ): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-002", + "file_path": "/data/doc.pdf", + "platform_api_key": "sk-key", + "usage_kwargs": {"org": "test-org"}, + } + ) + executor.execute(ctx) + + mock_x2text_cls.assert_called_once() + call_kwargs = mock_x2text_cls.call_args + assert call_kwargs.kwargs.get("adapter_instance_id") == "x2t-002" or ( + call_kwargs.args + and len(call_kwargs.args) > 1 + and call_kwargs.args[1] == "x2t-002" + ) + + +# --- 2. With highlight (LLMWhisperer) --- + + +class TestWithHighlight: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_with_whisperer_v2( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + # Make isinstance check pass for LLMWhispererV2 + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/data/run", + "tool_execution_metadata": {}, + } + ) + result = executor.execute(ctx) + + assert result.success is True + # Verify enable_highlight was passed to process() + mock_x2text.process.assert_called_once() + call_kwargs = mock_x2text.process.call_args.kwargs + assert call_kwargs.get("enable_highlight") is True + + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_with_whisperer_v1( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhisperer) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer-v1", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/data/run", + "tool_execution_metadata": {}, + } + ) + result = executor.execute(ctx) + + assert result.success is True + call_kwargs = mock_x2text.process.call_args.kwargs + assert call_kwargs.get("enable_highlight") is True + + +# --- 3. Without highlight (non-Whisperer) --- + + +class TestWithoutHighlight: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_no_highlight_for_non_whisperer(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + # Generic adapter — not LLMWhisperer + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-generic", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, # requested but adapter doesn't support it + } + ) + result = executor.execute(ctx) + + assert result.success is True + # enable_highlight should NOT be in process() call + call_kwargs = mock_x2text.process.call_args.kwargs + assert "enable_highlight" not in call_kwargs + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_highlight_false_skips_whisperer_branch( + self, mock_x2text_cls, mock_get_fs + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": False, # highlight disabled + } + ) + result = executor.execute(ctx) + + assert result.success is True + call_kwargs = mock_x2text.process.call_args.kwargs + assert "enable_highlight" not in call_kwargs + + +# --- 4. AdapterError → failure result --- + + +class TestAdapterError: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_adapter_error_returns_failure(self, mock_x2text_cls, mock_get_fs): + from unstract.sdk1.adapters.exceptions import AdapterError + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "TestExtractor" + mock_x2text.process.side_effect = AdapterError("connection timeout") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is False + assert "TestExtractor" in result.error + assert "connection timeout" in result.error + + +# --- 5. Missing required params --- + + +class TestMissingParams: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_x2text_instance_id(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context( + executor_params={ + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + } + ) + result = executor.execute(ctx) + + assert result.success is False + assert "x2text_instance_id" in result.error + mock_x2text_cls.assert_not_called() + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_file_path(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context( + executor_params={ + "x2text_instance_id": "x2t-001", + "platform_api_key": "sk-key", + } + ) + result = executor.execute(ctx) + + assert result.success is False + assert "file_path" in result.error + mock_x2text_cls.assert_not_called() + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_missing_both_params(self, mock_x2text_cls, mock_get_fs): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context(executor_params={"platform_api_key": "sk-key"}) + result = executor.execute(ctx) + + assert result.success is False + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + +# --- 6. Metadata update for tool source --- + + +class TestMetadataToolSource: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_tool_source_writes_metadata( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + whisper_hash="whash-456" + ) + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_fs = MagicMock() + mock_get_fs.return_value = mock_fs + + tool_meta = {} + ctx = _make_context( + execution_source="tool", + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + "execution_data_dir": "/run/data", + "tool_execution_metadata": tool_meta, + }, + ) + result = executor.execute(ctx) + + assert result.success is True + # ToolUtils.dump_json should have been called + mock_dump.assert_called_once() + dump_kwargs = mock_dump.call_args.kwargs + assert dump_kwargs["file_to_dump"] == str( + Path("/run/data") / IKeys.METADATA_FILE + ) + assert dump_kwargs["json_to_dump"] == { + X2TextConstants.WHISPER_HASH: "whash-456" + } + assert dump_kwargs["fs"] is mock_fs + # tool_exec_metadata should be updated in-place + assert tool_meta[X2TextConstants.WHISPER_HASH] == "whash-456" + + +# --- 7. IDE source skips metadata --- + + +class TestMetadataIDESource: + @patch("executor.executors.legacy_executor.ToolUtils.dump_json") + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_ide_source_skips_metadata( + self, mock_x2text_cls, mock_get_fs, mock_dump + ): + from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response() + mock_x2text.x2text_instance = MagicMock(spec=LLMWhispererV2) + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context( + execution_source="ide", + executor_params={ + "x2text_instance_id": "x2t-whisperer", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-key", + "enable_highlight": True, + }, + ) + result = executor.execute(ctx) + + assert result.success is True + mock_dump.assert_not_called() + + +# --- 8. FileUtils routing --- + + +class TestFileUtilsRouting: + @patch("executor.executors.file_utils.EnvHelper.get_storage") + def test_ide_returns_permanent_storage(self, mock_get_storage): + from executor.executors.file_utils import FileUtils + from unstract.sdk1.file_storage.constants import StorageType + + mock_get_storage.return_value = MagicMock() + FileUtils.get_fs_instance("ide") + + mock_get_storage.assert_called_once_with( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + + @patch("executor.executors.file_utils.EnvHelper.get_storage") + def test_tool_returns_temporary_storage(self, mock_get_storage): + from executor.executors.file_utils import FileUtils + from unstract.sdk1.file_storage.constants import StorageType + + mock_get_storage.return_value = MagicMock() + FileUtils.get_fs_instance("tool") + + mock_get_storage.assert_called_once_with( + storage_type=StorageType.SHARED_TEMPORARY, + env_name=FileStorageKeys.TEMPORARY_REMOTE_STORAGE, + ) + + def test_invalid_source_raises_value_error(self): + from executor.executors.file_utils import FileUtils + + with pytest.raises(ValueError, match="Invalid execution source"): + FileUtils.get_fs_instance("unknown") + + +# --- 9. Orchestrator integration --- + + +class TestOrchestratorIntegration: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_orchestrator_extract_returns_success( + self, mock_x2text_cls, mock_get_fs + ): + _register_legacy() + orchestrator = ExecutionOrchestrator() + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("extracted!") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "extracted!" + + +# --- 10. Celery eager-mode --- + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestCeleryEager: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_eager_extract_returns_success( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + _register_legacy() + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("celery text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "celery text" + + +# --- 11. LegacyExecutorError caught by execute() --- + + +class TestExecuteErrorCatching: + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + def test_extraction_error_caught_by_execute( + self, mock_x2text_cls, mock_get_fs + ): + """ExtractionError (a LegacyExecutorError) is caught in execute() + and mapped to ExecutionResult.failure().""" + from unstract.sdk1.adapters.exceptions import AdapterError + + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "BadExtractor" + mock_x2text.process.side_effect = AdapterError("timeout") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _make_context() + result = executor.execute(ctx) + + # Should be a clean failure, NOT an unhandled exception + assert result.success is False + assert "BadExtractor" in result.error + assert "timeout" in result.error + + def test_legacy_executor_error_subclass_caught(self): + """Any LegacyExecutorError subclass raised by a handler is caught.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Monkey-patch _handle_extract to raise a LegacyExecutorError + def _raise_err(ctx): + raise LegacyExecutorError(message="custom error", code=422) + + executor._handle_extract = _raise_err + + ctx = _make_context() + result = executor.execute(ctx) + + assert result.success is False + assert result.error == "custom error" diff --git a/workers/tests/test_legacy_executor_index.py b/workers/tests/test_legacy_executor_index.py new file mode 100644 index 0000000000..d87d5b5b97 --- /dev/null +++ b/workers/tests/test_legacy_executor_index.py @@ -0,0 +1,453 @@ +"""Phase 2C — LegacyExecutor._handle_index tests. + +Verifies: +1. Happy path: indexing returns success with doc_id +2. Chunk size 0: skips indexing, still returns doc_id +3. Missing required params → failure result +4. Reindex flag: passes reindex through to Index +5. VectorDB.close() always called (even on error) +6. Indexing error → LegacyExecutorError → failure result +7. Orchestrator integration: index returns success (mocked) +8. Celery eager-mode: full task chain returns indexing result +9. Index class: generate_index_key called with correct DTOs +10. EmbeddingCompat and VectorDB created with correct params + +Heavy SDK1 dependencies (llama_index, qdrant) are lazily imported +via ``LegacyExecutor._get_indexing_deps()``. We mock that method +to avoid protobuf conflicts in the test environment. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import IndexingConstants as IKeys +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_index_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-2c-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2c-001", + "executor_params": { + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "Hello world", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) + + +@pytest.fixture +def mock_indexing_deps(): + """Mock the heavy indexing dependencies via _get_indexing_deps().""" + mock_index_cls = MagicMock() + mock_emb_cls = MagicMock() + mock_vdb_cls = MagicMock() + + with patch(_PATCH_DEPS, return_value=(mock_index_cls, mock_emb_cls, mock_vdb_cls)): + yield mock_index_cls, mock_emb_cls, mock_vdb_cls + + +def _setup_mock_index(mock_index_cls, doc_id="doc-hash-123"): + """Configure a mock Index instance.""" + mock_index = MagicMock() + mock_index.generate_index_key.return_value = doc_id + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = doc_id + mock_index_cls.return_value = mock_index + return mock_index + + +# --- 1. Happy path --- + + +class TestHappyPath: + @patch(_PATCH_FS) + def test_index_returns_success_with_doc_id( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-hash-123") + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-hash-123" + mock_vdb.close.assert_called_once() + + +# --- 2. Chunk size 0: skips indexing --- + + +class TestChunkSizeZero: + @patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-zero-chunk", + ) + @patch(_PATCH_FS) + def test_chunk_size_zero_skips_indexing(self, mock_get_fs, mock_gen_key): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 0, + "chunk_overlap": 0, + } + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-zero-chunk" + mock_gen_key.assert_called_once() + + +# --- 3. Missing required params --- + + +class TestMissingParams: + def test_missing_embedding_instance_id(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_index_context( + executor_params={ + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "platform_api_key": "sk-test", + } + ) + result = executor.execute(ctx) + assert result.success is False + assert "embedding_instance_id" in result.error + + def test_missing_multiple_params(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_index_context( + executor_params={"platform_api_key": "sk-test"} + ) + result = executor.execute(ctx) + assert result.success is False + assert "embedding_instance_id" in result.error + assert "vector_db_instance_id" in result.error + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + +# --- 4. Reindex flag --- + + +class TestReindex: + @patch(_PATCH_FS) + def test_reindex_passed_through(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-reindex") + mock_index_cls.return_value.is_document_indexed.return_value = True + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-001", + "vector_db_instance_id": "vdb-001", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc123", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + "reindex": True, + } + ) + result = executor.execute(ctx) + + assert result.success is True + init_call = mock_index_cls.call_args + assert init_call.kwargs["processing_options"].reindex is True + + +# --- 5. VectorDB.close() always called --- + + +class TestVectorDBClose: + @patch(_PATCH_FS) + def test_vectordb_closed_on_success(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls) + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + executor.execute(ctx) + mock_vdb.close.assert_called_once() + + @patch(_PATCH_FS) + def test_vectordb_closed_on_error(self, mock_get_fs, mock_indexing_deps): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_index = _setup_mock_index(mock_index_cls) + mock_index.is_document_indexed.side_effect = RuntimeError("boom") + mock_emb_cls.return_value = MagicMock() + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is False + mock_vdb.close.assert_called_once() + + +# --- 6. Indexing error → failure result --- + + +class TestIndexingError: + @patch(_PATCH_FS) + def test_indexing_error_returns_failure( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_index = _setup_mock_index(mock_index_cls, "doc-err") + mock_index.perform_indexing.side_effect = RuntimeError( + "vector DB unavailable" + ) + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = executor.execute(ctx) + + assert result.success is False + assert "indexing" in result.error.lower() + assert "vector DB unavailable" in result.error + + +# --- 7. Orchestrator integration --- + + +class TestOrchestratorIntegration: + @patch(_PATCH_FS) + def test_orchestrator_index_returns_success( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + orchestrator = ExecutionOrchestrator() + + _setup_mock_index(mock_index_cls, "doc-orch") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + result = orchestrator.execute(ctx) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-orch" + + +# --- 8. Celery eager-mode --- + + +@pytest.fixture +def eager_app(): + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestCeleryEager: + @patch(_PATCH_FS) + def test_eager_index_returns_success( + self, mock_get_fs, mock_indexing_deps, eager_app + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + + _setup_mock_index(mock_index_cls, "doc-celery") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context() + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-celery" + + +# --- 9. Index class receives correct DTOs --- + + +class TestIndexDTOs: + @patch(_PATCH_FS) + def test_index_created_with_correct_dtos( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-dto") + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls.return_value = MagicMock() + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-dto", + "vector_db_instance_id": "vdb-dto", + "x2text_instance_id": "x2t-dto", + "file_path": "/data/doc.pdf", + "file_hash": "hash-dto", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 256, + "chunk_overlap": 64, + "tool_id": "tool-dto", + "tags": ["tag1"], + } + ) + executor.execute(ctx) + + init_kwargs = mock_index_cls.call_args.kwargs + ids = init_kwargs["instance_identifiers"] + assert ids.embedding_instance_id == "emb-dto" + assert ids.vector_db_instance_id == "vdb-dto" + assert ids.x2text_instance_id == "x2t-dto" + assert ids.tool_id == "tool-dto" + assert ids.tags == ["tag1"] + + chunking = init_kwargs["chunking_config"] + assert chunking.chunk_size == 256 + assert chunking.chunk_overlap == 64 + + gen_call = mock_index_cls.return_value.generate_index_key.call_args + fi = gen_call.kwargs["file_info"] + assert fi.file_path == "/data/doc.pdf" + assert fi.file_hash == "hash-dto" + + +# --- 10. EmbeddingCompat and VectorDB created with correct params --- + + +class TestAdapterCreation: + @patch(_PATCH_FS) + def test_embedding_and_vectordb_params( + self, mock_get_fs, mock_indexing_deps + ): + mock_index_cls, mock_emb_cls, mock_vdb_cls = mock_indexing_deps + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + _setup_mock_index(mock_index_cls, "doc-adapt") + mock_emb = MagicMock() + mock_emb_cls.return_value = mock_emb + mock_vdb = MagicMock() + mock_vdb_cls.return_value = mock_vdb + mock_get_fs.return_value = MagicMock() + + ctx = _make_index_context( + executor_params={ + "embedding_instance_id": "emb-check", + "vector_db_instance_id": "vdb-check", + "x2text_instance_id": "x2t-001", + "file_path": "/data/test.pdf", + "file_hash": "abc", + "extracted_text": "text", + "platform_api_key": "sk-test", + "chunk_size": 512, + "chunk_overlap": 128, + "usage_kwargs": {"org": "test-org"}, + } + ) + executor.execute(ctx) + + emb_call = mock_emb_cls.call_args + assert emb_call.kwargs["adapter_instance_id"] == "emb-check" + assert emb_call.kwargs["kwargs"] == {"org": "test-org"} + + vdb_call = mock_vdb_cls.call_args + assert vdb_call.kwargs["adapter_instance_id"] == "vdb-check" + assert vdb_call.kwargs["embedding"] is mock_emb diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py new file mode 100644 index 0000000000..4821bb7603 --- /dev/null +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -0,0 +1,282 @@ +"""Phase 2A — LegacyExecutor scaffold tests. + +Verifies: +1. Registration in ExecutorRegistry +2. Name property +3. Unsupported operation handling +4. Each operation raises NotImplementedError +5. Orchestrator wraps NotImplementedError as failure +6. Celery eager-mode chain +7. Dispatch table coverage (every Operation has a handler) +8. Constants importable +9. DTOs importable +10. Exceptions standalone (no Flask dependency) +""" + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + """Import executor.executors to trigger LegacyExecutor registration.""" + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-2a-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2a-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# --- 1. Registration --- + + +class TestRegistration: + def test_legacy_in_registry(self): + _register_legacy() + assert "legacy" in ExecutorRegistry.list_executors() + + +# --- 2. Name --- + + +class TestName: + def test_name_is_legacy(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + assert executor.name == "legacy" + + +# --- 3. Unsupported operation --- + + +class TestUnsupportedOperation: + def test_unsupported_operation_returns_failure(self): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + ctx = _make_context(operation="totally_unknown_op") + result = executor.execute(ctx) + + assert result.success is False + assert "does not support operation" in result.error + assert "totally_unknown_op" in result.error + + +# --- 4. All operations are implemented (no stubs remain) --- +# TestHandlerStubs and TestOrchestratorWrapping removed: +# All operations (extract, index, answer_prompt, single_pass_extraction, +# summarize, agentic_extraction) are now fully implemented. + + +# --- 6. Celery eager-mode chain --- + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + + yield app + + app.conf.update(original) + + +class TestCeleryEagerChain: + def test_eager_unsupported_op_returns_failure(self, eager_app): + """execute_extraction with an unsupported operation returns failure.""" + _register_legacy() + + ctx = _make_context(operation="totally_unknown_op") + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "does not support operation" in result.error + + +# --- 7. Dispatch table coverage --- + + +class TestDispatchTableCoverage: + def test_every_operation_has_handler(self): + from executor.executors.legacy_executor import LegacyExecutor + + for op in Operation: + assert op.value in LegacyExecutor._OPERATION_MAP, ( + f"Operation {op.value} missing from _OPERATION_MAP" + ) + + +# --- 8. Constants importable --- + + +class TestConstants: + def test_prompt_service_constants(self): + from executor.executors.constants import PromptServiceConstants + + assert hasattr(PromptServiceConstants, "TOOL_ID") + assert PromptServiceConstants.TOOL_ID == "tool_id" + + def test_retrieval_strategy(self): + from executor.executors.constants import RetrievalStrategy + + assert RetrievalStrategy.SIMPLE.value == "simple" + assert RetrievalStrategy.SUBQUESTION.value == "subquestion" + + def test_run_level(self): + from executor.executors.constants import RunLevel + + assert RunLevel.RUN.value == "RUN" + assert RunLevel.EVAL.value == "EVAL" + + +# --- 9. DTOs importable --- + + +class TestDTOs: + def test_chunking_config(self): + from executor.executors.dto import ChunkingConfig + + cfg = ChunkingConfig(chunk_size=512, chunk_overlap=64) + assert cfg.chunk_size == 512 + + def test_chunking_config_zero_raises(self): + from executor.executors.dto import ChunkingConfig + + with pytest.raises(ValueError, match="zero chunks"): + ChunkingConfig(chunk_size=0, chunk_overlap=0) + + def test_file_info(self): + from executor.executors.dto import FileInfo + + fi = FileInfo(file_path="/tmp/test.pdf", file_hash="abc123") + assert fi.file_path == "/tmp/test.pdf" + + def test_instance_identifiers(self): + from executor.executors.dto import InstanceIdentifiers + + ids = InstanceIdentifiers( + embedding_instance_id="emb-1", + vector_db_instance_id="vdb-1", + x2text_instance_id="x2t-1", + llm_instance_id="llm-1", + tool_id="tool-1", + ) + assert ids.tool_id == "tool-1" + + def test_processing_options(self): + from executor.executors.dto import ProcessingOptions + + opts = ProcessingOptions(reindex=True) + assert opts.reindex is True + assert opts.enable_highlight is False + + +# --- 10. Exceptions standalone --- + + +class TestExceptions: + def test_legacy_executor_error_has_code_and_message(self): + from executor.executors.exceptions import LegacyExecutorError + + err = LegacyExecutorError(message="test error", code=418) + assert err.message == "test error" + assert err.code == 418 + assert str(err) == "test error" + + def test_extraction_error_has_code_and_message(self): + from executor.executors.exceptions import ExtractionError + + err = ExtractionError(message="extraction failed", code=500) + assert err.message == "extraction failed" + assert err.code == 500 + + def test_no_flask_import(self): + """Verify exceptions module does NOT import Flask.""" + import importlib + import sys + + # Ensure fresh import + mod_name = "executor.executors.exceptions" + if mod_name in sys.modules: + importlib.reload(sys.modules[mod_name]) + else: + importlib.import_module(mod_name) + + # Check that no flask modules were pulled in + flask_modules = [m for m in sys.modules if m.startswith("flask")] + assert flask_modules == [], ( + f"Flask modules imported: {flask_modules}" + ) + + def test_custom_data_error_signature(self): + from executor.executors.exceptions import CustomDataError + + err = CustomDataError( + variable="invoice_num", reason="not found", is_ide=True + ) + assert "invoice_num" in err.message + assert "not found" in err.message + assert "Prompt Studio" in err.message + + def test_custom_data_error_tool_mode(self): + from executor.executors.exceptions import CustomDataError + + err = CustomDataError( + variable="order_id", reason="missing", is_ide=False + ) + assert "API request" in err.message + + def test_missing_field_error(self): + from executor.executors.exceptions import MissingFieldError + + err = MissingFieldError(missing_fields=["tool_id", "file_path"]) + assert "tool_id" in err.message + assert "file_path" in err.message + + def test_bad_request_defaults(self): + from executor.executors.exceptions import BadRequest + + err = BadRequest() + assert err.code == 400 + assert "Bad Request" in err.message + + def test_rate_limit_error_defaults(self): + from executor.executors.exceptions import RateLimitError + + err = RateLimitError() + assert err.code == 429 diff --git a/workers/tests/test_phase2f.py b/workers/tests/test_phase2f.py new file mode 100644 index 0000000000..d2dce922ab --- /dev/null +++ b/workers/tests/test_phase2f.py @@ -0,0 +1,331 @@ +"""Phase 2F — single_pass_extraction, summarize, agentic_extraction tests. + +Verifies: +1. single_pass_extraction delegates to answer_prompt +2. summarize constructs prompt and calls LLM +3. summarize missing params return failure +4. summarize prompt includes prompt_keys +5. agentic_extraction raises LegacyExecutorError (plugin-dependent) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +@pytest.fixture(autouse=True) +def _clean_registry(): + """Ensure a clean executor registry for every test.""" + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor # noqa: F401 + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + + +def _make_context(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "summarize", + "run_id": "run-2f-001", + "execution_source": "tool", + "organization_id": "org-test", + "request_id": "req-2f-001", + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# --------------------------------------------------------------------------- +# 1. single_pass_extraction delegates to answer_prompt +# --------------------------------------------------------------------------- + + +class TestSinglePassExtraction: + def test_delegates_to_answer_prompt(self): + """single_pass_extraction calls _handle_answer_prompt internally.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Mock _handle_answer_prompt so we can verify delegation + expected_result = ExecutionResult( + success=True, + data={"output": {"field1": "value1"}, "metadata": {}, "metrics": {}}, + ) + executor._handle_answer_prompt = MagicMock(return_value=expected_result) + + ctx = _make_context(operation="single_pass_extraction") + result = executor.execute(ctx) + + assert result.success is True + assert result.data["output"]["field1"] == "value1" + executor._handle_answer_prompt.assert_called_once_with(ctx) + + def test_delegates_failure_too(self): + """Failures from answer_prompt propagate through single_pass.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + fail_result = ExecutionResult.failure(error="some error") + executor._handle_answer_prompt = MagicMock(return_value=fail_result) + + ctx = _make_context(operation="single_pass_extraction") + result = executor.execute(ctx) + + assert result.success is False + assert "some error" in result.error + + +# --------------------------------------------------------------------------- +# 2. summarize +# --------------------------------------------------------------------------- + + +def _make_summarize_params(**overrides): + """Build executor_params for summarize operation.""" + defaults = { + "llm_adapter_instance_id": "llm-001", + "summarize_prompt": "Summarize the following document.", + "context": "This is a long document with lots of content.", + "prompt_keys": ["invoice_number", "total_amount"], + "PLATFORM_SERVICE_API_KEY": "test-key", + } + defaults.update(overrides) + return defaults + + +class TestSummarize: + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_success(self, mock_shim_cls, mock_get_deps): + """Successful summarize returns data with summary text.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + # Set up mock LLM + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), # AnswerPromptService + MagicMock(), # RetrievalService + MagicMock(), # VariableReplacementService + MagicMock(), # Index + mock_llm_cls, # LLM + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + # Mock AnswerPromptService.run_completion + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="This is a summary of the document.", + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params(), + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data["data"] == "This is a summary of the document." + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_prompt_includes_keys(self, mock_shim_cls, mock_get_deps): + """The summarize prompt includes prompt_keys.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + captured_prompt = {} + + def capture_run_completion(llm, prompt, **kwargs): + captured_prompt["value"] = prompt + return "summary" + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + side_effect=capture_run_completion, + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params( + prompt_keys=["name", "address"], + ), + ) + executor.execute(ctx) + + assert "name" in captured_prompt["value"] + assert "address" in captured_prompt["value"] + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_no_prompt_keys(self, mock_shim_cls, mock_get_deps): + """Summarize works without prompt_keys.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="summary without keys", + ): + params = _make_summarize_params() + del params["prompt_keys"] + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is True + assert result.data["data"] == "summary without keys" + + def test_summarize_missing_llm_adapter(self): + """Missing llm_adapter_instance_id returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + params = _make_summarize_params(llm_adapter_instance_id="") + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is False + assert "llm_adapter_instance_id" in result.error + + def test_summarize_missing_context(self): + """Missing context returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + params = _make_summarize_params(context="") + ctx = _make_context( + operation="summarize", + executor_params=params, + ) + result = executor.execute(ctx) + + assert result.success is False + assert "context" in result.error + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_llm_error(self, mock_shim_cls, mock_get_deps): + """LLM errors are wrapped in ExecutionResult.failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + side_effect=Exception("LLM unavailable"), + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params(), + ) + result = executor.execute(ctx) + + assert result.success is False + assert "summarization" in result.error.lower() or "LLM" in result.error + + @patch("executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_summarize_creates_llm_with_correct_adapter( + self, mock_shim_cls, mock_get_deps + ): + """LLM is instantiated with the provided adapter instance ID.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + mock_llm_cls = MagicMock() + mock_llm = MagicMock() + mock_llm_cls.return_value = mock_llm + + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="summary", + ): + ctx = _make_context( + operation="summarize", + executor_params=_make_summarize_params( + llm_adapter_instance_id="custom-llm-42", + ), + ) + executor.execute(ctx) + + mock_llm_cls.assert_called_once() + call_kwargs = mock_llm_cls.call_args + assert call_kwargs.kwargs["adapter_instance_id"] == "custom-llm-42" + + +# --------------------------------------------------------------------------- +# 3. agentic_extraction +# --------------------------------------------------------------------------- + + +class TestAgenticExtraction: + def test_returns_failure(self): + """agentic_extraction returns failure (plugin not available).""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = _make_context(operation="agentic_extraction") + result = executor.execute(ctx) + + assert result.success is False + assert "agentic extraction" in result.error.lower() + assert "plugin" in result.error.lower() + + def test_orchestrator_wraps_error(self): + """ExecutionOrchestrator also returns failure for agentic.""" + from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator + + _register_legacy() + orchestrator = ExecutionOrchestrator() + ctx = _make_context(operation="agentic_extraction") + result = orchestrator.execute(ctx) + + assert result.success is False + assert "plugin" in result.error.lower() diff --git a/workers/tests/test_phase2h.py b/workers/tests/test_phase2h.py new file mode 100644 index 0000000000..5d41ee1e15 --- /dev/null +++ b/workers/tests/test_phase2h.py @@ -0,0 +1,484 @@ +"""Phase 2H: Tests for variable replacement and postprocessor modules. + +Covers VariableReplacementHelper, VariableReplacementService, and +the webhook postprocessor — all pure Python with no llama_index deps. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest +import requests as real_requests + +from executor.executors.constants import VariableConstants, VariableType +from executor.executors.exceptions import CustomDataError, LegacyExecutorError +from executor.executors.postprocessor import ( + _validate_structured_output, + postprocess_data, +) +from executor.executors.variable_replacement import ( + VariableReplacementHelper, + VariableReplacementService, +) + + +# ============================================================================ +# 1. VariableReplacementHelper (15 tests) +# ============================================================================ + + +class TestVariableReplacementHelper: + """Tests for the low-level replacement helper.""" + + # --- extract_variables_from_prompt --- + + def test_extract_variables_single(self): + result = VariableReplacementHelper.extract_variables_from_prompt("{{name}}") + assert result == ["name"] + + def test_extract_variables_multiple(self): + result = VariableReplacementHelper.extract_variables_from_prompt( + "{{a}} and {{b}}" + ) + assert result == ["a", "b"] + + def test_extract_variables_none(self): + result = VariableReplacementHelper.extract_variables_from_prompt("no vars here") + assert result == [] + + # --- identify_variable_type --- + + def test_identify_static_type(self): + assert ( + VariableReplacementHelper.identify_variable_type("name") + == VariableType.STATIC + ) + + def test_identify_dynamic_type(self): + assert ( + VariableReplacementHelper.identify_variable_type( + "https://example.com/api[field1]" + ) + == VariableType.DYNAMIC + ) + + def test_identify_custom_data_type(self): + assert ( + VariableReplacementHelper.identify_variable_type("custom_data.company") + == VariableType.CUSTOM_DATA + ) + + # --- handle_json_and_str_types --- + + def test_handle_json_dict(self): + result = VariableReplacementHelper.handle_json_and_str_types({"k": "v"}) + assert result == '{"k": "v"}' + + def test_handle_json_list(self): + result = VariableReplacementHelper.handle_json_and_str_types([1, 2]) + assert result == "[1, 2]" + + # --- replace_generic_string_value --- + + def test_replace_generic_string_non_str(self): + """Non-string values get JSON-formatted before replacement.""" + result = VariableReplacementHelper.replace_generic_string_value( + prompt="value: {{x}}", variable="{{x}}", value={"nested": True} + ) + assert result == 'value: {"nested": true}' + + # --- check_static_variable_run_status --- + + def test_check_static_missing_key(self): + result = VariableReplacementHelper.check_static_variable_run_status( + structure_output={}, variable="missing" + ) + assert result is None + + # --- replace_static_variable --- + + def test_replace_static_missing_returns_prompt(self): + """Missing key in structured_output leaves prompt unchanged.""" + prompt = "Total is {{revenue}}" + result = VariableReplacementHelper.replace_static_variable( + prompt=prompt, structured_output={}, variable="revenue" + ) + assert result == prompt + + # --- replace_custom_data_variable --- + + def test_custom_data_nested_path(self): + """custom_data.nested.key navigates nested dict.""" + result = VariableReplacementHelper.replace_custom_data_variable( + prompt="val: {{custom_data.nested.key}}", + variable="custom_data.nested.key", + custom_data={"nested": {"key": "deep_value"}}, + ) + assert result == "val: deep_value" + + def test_custom_data_empty_dict_raises(self): + """Empty custom_data={} raises CustomDataError.""" + with pytest.raises(CustomDataError, match="Custom data is not configured"): + VariableReplacementHelper.replace_custom_data_variable( + prompt="{{custom_data.company}}", + variable="custom_data.company", + custom_data={}, + ) + + # --- fetch_dynamic_variable_value / replace_dynamic_variable --- + + @patch("executor.executors.variable_replacement.pyrequests.post") + def test_dynamic_variable_success(self, mock_post): + """Mock HTTP POST, verify URL extraction and replacement.""" + mock_resp = MagicMock() + mock_resp.headers = {"content-type": "application/json"} + mock_resp.json.return_value = {"result": "ok"} + mock_resp.raise_for_status = MagicMock() + mock_post.return_value = mock_resp + + variable = "https://example.com/api[field1]" + result = VariableReplacementHelper.replace_dynamic_variable( + prompt="data: {{" + variable + "}}", + variable=variable, + structured_output={"field1": "input_data"}, + ) + mock_post.assert_called_once() + assert '{"result": "ok"}' in result + + @patch("executor.executors.variable_replacement.pyrequests.post") + def test_dynamic_variable_http_error(self, mock_post): + """HTTP error raises LegacyExecutorError.""" + mock_post.side_effect = real_requests.exceptions.ConnectionError("refused") + + with pytest.raises(LegacyExecutorError, match="failed"): + VariableReplacementHelper.fetch_dynamic_variable_value( + url="https://example.com/api", data="payload" + ) + + +# ============================================================================ +# 2. VariableReplacementService (8 tests) +# ============================================================================ + + +class TestVariableReplacementService: + """Tests for the high-level orchestration service.""" + + def test_replace_with_variable_map(self): + """Uses variable_map key from prompt dict when present.""" + prompt = { + "prompt": "Hello {{name}}", + "variable_map": {"name": "World"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={"name": "Fallback"}, + prompt_name="test", + ) + assert result == "Hello World" + + def test_replace_fallback_structured_output(self): + """Falls back to structured_output when no variable_map.""" + prompt = {"prompt": "Hello {{name}}"} + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={"name": "Fallback"}, + prompt_name="test", + ) + assert result == "Hello Fallback" + + def test_mixed_variable_types(self): + """Prompt with static + custom_data variables replaces both.""" + prompt = { + "prompt": "{{name}} works at {{custom_data.company}}", + "variable_map": {"name": "Alice"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={"company": "Acme"}, + ) + assert result == "Alice works at Acme" + + def test_no_variables_noop(self): + """Prompt without {{}} returns unchanged.""" + prompt = {"prompt": "No variables here"} + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == "No variables here" + + def test_replace_with_custom_data(self): + """custom_data dict gets passed through to helper.""" + prompt = { + "prompt": "Company: {{custom_data.name}}", + "variable_map": {}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={"name": "TestCorp"}, + ) + assert result == "Company: TestCorp" + + def test_is_ide_flag_propagated(self): + """is_ide=False propagates — error message says 'API request'.""" + prompt = { + "prompt": "{{custom_data.missing}}", + "variable_map": {}, + } + with pytest.raises(CustomDataError, match="API request"): + VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + custom_data={}, + is_ide=False, + ) + + def test_multiple_same_variable(self): + """{{x}} and {{x}} — both occurrences replaced.""" + prompt = { + "prompt": "{{x}} and {{x}}", + "variable_map": {"x": "val"}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == "val and val" + + def test_json_value_replacement(self): + """Dict value gets JSON-serialized before replacement.""" + prompt = { + "prompt": "data: {{info}}", + "variable_map": {"info": {"key": "value"}}, + } + result = VariableReplacementService.replace_variables_in_prompt( + prompt=prompt, + structured_output={}, + prompt_name="test", + ) + assert result == 'data: {"key": "value"}' + + +# ============================================================================ +# 3. Postprocessor (15 tests) +# ============================================================================ + + +class TestPostprocessor: + """Tests for the webhook postprocessor.""" + + PARSED = {"field": "original"} + HIGHLIGHT = [{"page": 1, "spans": []}] + + # --- disabled / no-op paths --- + + def test_disabled_returns_original(self): + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=False, + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + def test_no_url_returns_original(self): + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url=None, + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + # --- successful webhook --- + + @patch("executor.executors.postprocessor.requests.post") + def test_success_returns_updated(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"field": "updated"}} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result[0] == {"field": "updated"} + + @patch("executor.executors.postprocessor.requests.post") + def test_success_preserves_highlight_data(self, mock_post): + """Response without highlight_data preserves original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"f": "v"}} + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == self.HIGHLIGHT + + @patch("executor.executors.postprocessor.requests.post") + def test_success_updates_highlight_data(self, mock_post): + """Response with valid list highlight_data uses updated.""" + new_highlight = [{"page": 2}] + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = { + "structured_output": {"f": "v"}, + "highlight_data": new_highlight, + } + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == new_highlight + + @patch("executor.executors.postprocessor.requests.post") + def test_invalid_highlight_data_ignored(self, mock_post): + """Response with non-list highlight_data keeps original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = { + "structured_output": {"f": "v"}, + "highlight_data": "not-a-list", + } + mock_post.return_value = mock_resp + + _, highlight = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert highlight == self.HIGHLIGHT + + # --- response validation failures --- + + @patch("executor.executors.postprocessor.requests.post") + def test_missing_structured_output_key(self, mock_post): + """Response without structured_output returns original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"other_key": "value"} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_invalid_structured_output_type(self, mock_post): + """Response with string structured_output returns original.""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": "just-a-string"} + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + # --- HTTP error paths --- + + @patch("executor.executors.postprocessor.requests.post") + def test_http_error_returns_original(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 500 + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_timeout_returns_original(self, mock_post): + mock_post.side_effect = real_requests.exceptions.Timeout("timed out") + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_connection_error_returns_original(self, mock_post): + mock_post.side_effect = real_requests.exceptions.ConnectionError("refused") + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_json_decode_error_returns_original(self, mock_post): + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.side_effect = json.JSONDecodeError("err", "doc", 0) + mock_post.return_value = mock_resp + + result = postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + highlight_data=self.HIGHLIGHT, + ) + assert result == (self.PARSED, self.HIGHLIGHT) + + @patch("executor.executors.postprocessor.requests.post") + def test_custom_timeout_passed(self, mock_post): + """timeout=5.0 is passed to requests.post().""" + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = {"structured_output": {"f": "v"}} + mock_post.return_value = mock_resp + + postprocess_data( + parsed_data=self.PARSED, + webhook_enabled=True, + webhook_url="https://hook.example.com", + timeout=5.0, + ) + _, kwargs = mock_post.call_args + assert kwargs["timeout"] == 5.0 + + # --- _validate_structured_output --- + + def test_validate_structured_output_dict(self): + assert _validate_structured_output({"k": "v"}) is True + + def test_validate_structured_output_list(self): + assert _validate_structured_output([1, 2]) is True diff --git a/workers/tests/test_retrieval.py b/workers/tests/test_retrieval.py new file mode 100644 index 0000000000..a92ce08808 --- /dev/null +++ b/workers/tests/test_retrieval.py @@ -0,0 +1,275 @@ +"""Tests for the RetrievalService factory and complete-context path. + +Retriever internals are NOT tested here — they're llama_index wrappers +that will be validated in Phase 2-SANITY integration tests. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import RetrievalStrategy +from executor.executors.retrieval import RetrievalService + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_output(prompt: str = "What is X?", top_k: int = 5, name: str = "field_a"): + """Build a minimal ``output`` dict matching PromptServiceConstants keys.""" + return { + "promptx": prompt, + "similarity-top-k": top_k, + "name": name, + } + + +def _mock_retriever_class(return_value=None): + """Return a mock class whose instances have a ``.retrieve()`` method.""" + if return_value is None: + return_value = {"chunk1", "chunk2"} + cls = MagicMock() + instance = MagicMock() + instance.retrieve.return_value = return_value + cls.return_value = instance + return cls, instance + + +# --------------------------------------------------------------------------- +# Factory — run_retrieval +# --------------------------------------------------------------------------- + +class TestRunRetrieval: + """Tests for RetrievalService.run_retrieval().""" + + @pytest.mark.parametrize("strategy", list(RetrievalStrategy)) + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_correct_class_selected_for_each_strategy(self, mock_map, strategy): + """Factory returns the correct retriever class for each strategy.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {strategy.value: cls} + + result = RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=strategy.value, + ) + cls.assert_called_once() + assert isinstance(result, list) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_unknown_strategy_raises_value_error(self, mock_map): + """Passing an invalid strategy string raises ValueError.""" + mock_map.return_value = {} + + with pytest.raises(ValueError, match="Unknown retrieval type"): + RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type="nonexistent", + ) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_retriever_instantiated_with_correct_params(self, mock_map): + """Verify vector_db, doc_id, prompt, top_k, llm passed through.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + llm = MagicMock(name="llm") + vdb = MagicMock(name="vdb") + output = _make_output(prompt="Find revenue", top_k=10, name="revenue") + + RetrievalService.run_retrieval( + output=output, + doc_id="doc-42", + llm=llm, + vector_db=vdb, + retrieval_type=RetrievalStrategy.SIMPLE.value, + ) + + cls.assert_called_once_with( + vector_db=vdb, + doc_id="doc-42", + prompt="Find revenue", + top_k=10, + llm=llm, + ) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_retrieve_result_converted_to_list(self, mock_map): + """Mock retriever returns a set; run_retrieval returns a list.""" + cls, _inst = _mock_retriever_class(return_value={"a", "b", "c"}) + mock_map.return_value = {RetrievalStrategy.FUSION.value: cls} + + result = RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.FUSION.value, + ) + assert isinstance(result, list) + assert set(result) == {"a", "b", "c"} + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_metrics_recorded(self, mock_map): + """Verify context_retrieval_metrics dict populated with timing.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + metrics: dict = {} + RetrievalService.run_retrieval( + output=_make_output(name="my_field"), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.SIMPLE.value, + context_retrieval_metrics=metrics, + ) + + assert "my_field" in metrics + assert "time_taken(s)" in metrics["my_field"] + assert isinstance(metrics["my_field"]["time_taken(s)"], float) + + @patch("executor.executors.retrieval.RetrievalService._get_retriever_map") + def test_metrics_optional_none_does_not_crash(self, mock_map): + """context_retrieval_metrics=None doesn't crash.""" + cls, _inst = _mock_retriever_class() + mock_map.return_value = {RetrievalStrategy.SIMPLE.value: cls} + + # Should not raise + RetrievalService.run_retrieval( + output=_make_output(), + doc_id="doc-1", + llm=MagicMock(), + vector_db=MagicMock(), + retrieval_type=RetrievalStrategy.SIMPLE.value, + context_retrieval_metrics=None, + ) + + +# --------------------------------------------------------------------------- +# Complete context — retrieve_complete_context +# --------------------------------------------------------------------------- + +class TestRetrieveCompleteContext: + """Tests for RetrievalService.retrieve_complete_context().""" + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_reads_file_with_correct_path(self, mock_get_fs): + """Mock FileUtils.get_fs_instance, verify fs.read() called correctly.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "full document text" + mock_get_fs.return_value = mock_fs + + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + ) + + mock_get_fs.assert_called_once_with(execution_source="ide") + mock_fs.read.assert_called_once_with(path="/data/doc.txt", mode="r") + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_returns_list_with_single_item(self, mock_get_fs): + """Verify [content] shape.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "hello world" + mock_get_fs.return_value = mock_fs + + result = RetrievalService.retrieve_complete_context( + execution_source="tool", + file_path="/data/doc.txt", + ) + + assert result == ["hello world"] + assert len(result) == 1 + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_complete_context_records_metrics(self, mock_get_fs): + """Timing dict populated.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "content" + mock_get_fs.return_value = mock_fs + + metrics: dict = {} + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + context_retrieval_metrics=metrics, + prompt_key="total_revenue", + ) + + assert "total_revenue" in metrics + assert "time_taken(s)" in metrics["total_revenue"] + assert isinstance(metrics["total_revenue"]["time_taken(s)"], float) + + @patch("executor.executors.file_utils.FileUtils.get_fs_instance") + def test_complete_context_metrics_none_does_not_crash(self, mock_get_fs): + """context_retrieval_metrics=None doesn't crash.""" + mock_fs = MagicMock() + mock_fs.read.return_value = "content" + mock_get_fs.return_value = mock_fs + + # Should not raise + RetrievalService.retrieve_complete_context( + execution_source="ide", + file_path="/data/doc.txt", + context_retrieval_metrics=None, + ) + + +# --------------------------------------------------------------------------- +# BaseRetriever interface +# --------------------------------------------------------------------------- + +class TestBaseRetriever: + """Tests for BaseRetriever base class.""" + + def test_default_retrieve_returns_empty_set(self): + """Default retrieve() returns empty set.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + ) + assert r.retrieve() == set() + + def test_constructor_stores_all_params(self): + """Constructor stores vector_db, prompt, doc_id, top_k, llm.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + vdb = MagicMock(name="vdb") + llm = MagicMock(name="llm") + r = BaseRetriever( + vector_db=vdb, + prompt="my prompt", + doc_id="doc-99", + top_k=3, + llm=llm, + ) + assert r.vector_db is vdb + assert r.prompt == "my prompt" + assert r.doc_id == "doc-99" + assert r.top_k == 3 + assert r.llm is llm + + def test_constructor_llm_defaults_to_none(self): + """When llm not provided, it defaults to None.""" + from executor.executors.retrievers.base_retriever import BaseRetriever + + r = BaseRetriever( + vector_db=MagicMock(), + prompt="test", + doc_id="doc-1", + top_k=5, + ) + assert r.llm is None diff --git a/workers/tests/test_sanity_phase2.py b/workers/tests/test_sanity_phase2.py new file mode 100644 index 0000000000..6961f5cc0e --- /dev/null +++ b/workers/tests/test_sanity_phase2.py @@ -0,0 +1,788 @@ +"""Phase 2-SANITY — Full-chain integration tests for LegacyExecutor. + +All Phase 2 code and unit tests are complete (2A–2H, 194 workers tests). +This file bridges unit tests and real integration by testing the full +Celery chain: + + task.apply() → execute_extraction task → ExecutionOrchestrator + → ExecutorRegistry.get("legacy") → LegacyExecutor.execute() + → _handle_X() → ExecutionResult + +All in Celery eager mode (no broker needed). External adapters +(X2Text, LLM, VectorDB) are mocked. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + IndexingConstants as IKeys, + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered without clearing other state. + + Unlike unit tests that clear() + re-register, sanity tests need + LegacyExecutor always present. We add it idempotently. + """ + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure the real executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="sanity answer"): + """Create a mock LLM matching the test_answer_prompt.py pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 100} + return llm + + +def _mock_prompt_deps(llm=None): + """Return a 7-tuple matching _get_prompt_deps() return shape. + + Uses the real AnswerPromptService + mocked adapters. + """ + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService + + RetrievalService = MagicMock(name="RetrievalService") + RetrievalService.run_retrieval.return_value = ["chunk1", "chunk2"] + RetrievalService.retrieve_complete_context.return_value = ["full content"] + + VariableReplacementService = MagicMock(name="VariableReplacementService") + VariableReplacementService.is_variables_present.return_value = False + + Index = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-id-sanity" + Index.return_value = index_instance + + LLM_cls = MagicMock(name="LLM") + LLM_cls.return_value = llm + + EmbeddingCompat = MagicMock(name="EmbeddingCompat") + VectorDB = MagicMock(name="VectorDB") + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM_cls, + EmbeddingCompat, + VectorDB, + ) + + +def _mock_process_response(text="sanity extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="sanity-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_prompt(name="field_a", prompt="What is the revenue?", + output_type="text", **overrides): + """Build a minimal prompt definition dict.""" + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: 512, + PSKeys.CHUNK_OVERLAP: 128, + PSKeys.RETRIEVAL_STRATEGY: "simple", + PSKeys.LLM: "llm-1", + PSKeys.EMBEDDING: "emb-1", + PSKeys.VECTOR_DB: "vdb-1", + PSKeys.X2TEXT_ADAPTER: "x2t-1", + PSKeys.SIMILARITY_TOP_K: 5, + } + d.update(overrides) + return d + + +# --- Context factories per operation --- + + +def _extract_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-sanity-ext", + "execution_source": "tool", + "organization_id": "org-test", + "executor_params": { + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "platform_api_key": "sk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _index_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-sanity-idx", + "execution_source": "tool", + "organization_id": "org-test", + "executor_params": { + "embedding_instance_id": "emb-sanity", + "vector_db_instance_id": "vdb-sanity", + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "file_hash": "sanity-hash", + "extracted_text": "Sanity test document text", + "platform_api_key": "sk-sanity", + "chunk_size": 512, + "chunk_overlap": 128, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _answer_prompt_ctx(prompts=None, **overrides): + if prompts is None: + prompts = [_make_prompt()] + defaults = { + "executor_name": "legacy", + "operation": Operation.ANSWER_PROMPT.value, + "run_id": "run-sanity-ap", + "execution_source": "ide", + "executor_params": { + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_SETTINGS: {}, + PSKeys.TOOL_ID: "tool-sanity", + PSKeys.EXECUTION_ID: "exec-sanity", + PSKeys.FILE_HASH: "hash-sanity", + PSKeys.FILE_PATH: "/data/sanity.txt", + PSKeys.FILE_NAME: "sanity.txt", + PSKeys.LOG_EVENTS_ID: "", + PSKeys.CUSTOM_DATA: {}, + PSKeys.EXECUTION_SOURCE: "ide", + PSKeys.PLATFORM_SERVICE_API_KEY: "pk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _summarize_ctx(**overrides): + defaults = { + "executor_name": "legacy", + "operation": "summarize", + "run_id": "run-sanity-sum", + "execution_source": "tool", + "executor_params": { + "llm_adapter_instance_id": "llm-sanity", + "summarize_prompt": "Summarize the document.", + "context": "Long document content here.", + "prompt_keys": ["invoice_number", "total"], + "PLATFORM_SERVICE_API_KEY": "pk-sanity", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# =========================================================================== +# Test classes +# =========================================================================== + + +class TestSanityExtract: + """Full-chain extract tests through Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_full_chain(self, mock_x2text_cls, mock_get_fs, eager_app): + """Mocked X2Text + FileUtils → result.data has extracted_text.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "sanity extracted" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "sanity extracted" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_missing_params_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Empty params → failure with missing fields message.""" + ctx = _extract_ctx(executor_params={"platform_api_key": "sk-test"}) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "x2text_instance_id" in result.error + assert "file_path" in result.error + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_adapter_error_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """X2Text raises AdapterError → failure result, no unhandled exception.""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "SanityExtractor" + mock_x2text.process.side_effect = AdapterError("sanity adapter err") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "SanityExtractor" in result.error + assert "sanity adapter err" in result.error + + +class TestSanityIndex: + """Full-chain index tests through Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_full_chain(self, mock_deps, mock_get_fs, eager_app): + """Mocked _get_indexing_deps → result.data has doc_id.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-sanity-idx" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-sanity-idx" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-sanity-idx" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-zero-chunk-sanity") + @patch(_PATCH_FS) + def test_index_chunk_size_zero_full_chain( + self, mock_get_fs, mock_gen_key, eager_app + ): + """chunk_size=0 skips heavy deps → returns doc_id via IndexingUtils.""" + mock_get_fs.return_value = MagicMock() + + params = { + "embedding_instance_id": "emb-sanity", + "vector_db_instance_id": "vdb-sanity", + "x2text_instance_id": "x2t-sanity", + "file_path": "/data/sanity.pdf", + "file_hash": "sanity-hash", + "extracted_text": "text", + "platform_api_key": "sk-sanity", + "chunk_size": 0, + "chunk_overlap": 0, + } + ctx = _index_ctx(executor_params=params) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data[IKeys.DOC_ID] == "doc-zero-chunk-sanity" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_error_full_chain(self, mock_deps, mock_get_fs, eager_app): + """perform_indexing raises → failure result.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-err" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.side_effect = RuntimeError("VDB down") + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "indexing" in result.error.lower() + + +class TestSanityAnswerPrompt: + """Full-chain answer_prompt tests through Celery eager mode.""" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_text_full_chain( + self, mock_shim_cls, mock_deps, eager_app + ): + """TEXT prompt → result.data has output, metadata, metrics.""" + llm = _mock_llm("sanity answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert PSKeys.METADATA in result.data + assert PSKeys.METRICS in result.data + assert result.data[PSKeys.OUTPUT]["field_a"] == "sanity answer" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_multi_prompt_full_chain( + self, mock_shim_cls, mock_deps, eager_app + ): + """Two prompts → both field names in output and metrics.""" + llm = _mock_llm("multi answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompts = [ + _make_prompt(name="revenue"), + _make_prompt(name="date_signed"), + ] + ctx = _answer_prompt_ctx(prompts=prompts) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "revenue" in result.data[PSKeys.OUTPUT] + assert "date_signed" in result.data[PSKeys.OUTPUT] + assert "revenue" in result.data[PSKeys.METRICS] + assert "date_signed" in result.data[PSKeys.METRICS] + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_table_fails_full_chain( + self, mock_shim_cls, mock_deps, eager_app + ): + """TABLE type → failure mentioning TABLE.""" + llm = _mock_llm() + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx( + prompts=[_make_prompt(output_type="table")] + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "TABLE" in result.error + + +class TestSanitySinglePass: + """Full-chain single_pass_extraction test.""" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_single_pass_delegates_full_chain( + self, mock_shim_cls, mock_deps, eager_app + ): + """Same mocks as answer_prompt → same response shape.""" + llm = _mock_llm("single pass answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx( + operation=Operation.SINGLE_PASS_EXTRACTION.value, + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert PSKeys.OUTPUT in result.data + assert result.data[PSKeys.OUTPUT]["field_a"] == "single pass answer" + + +class TestSanitySummarize: + """Full-chain summarize tests through Celery eager mode.""" + + @patch(_PATCH_RUN_COMPLETION, return_value="Sanity summary text.") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_full_chain( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + """Mocked _get_prompt_deps + run_completion → result.data has summary.""" + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["data"] == "Sanity summary text." + + def test_summarize_missing_llm_full_chain(self, eager_app): + """Missing llm_adapter_instance_id → failure.""" + ctx = _summarize_ctx( + executor_params={ + "llm_adapter_instance_id": "", + "summarize_prompt": "Summarize.", + "context": "Document text.", + "PLATFORM_SERVICE_API_KEY": "pk-test", + } + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "llm_adapter_instance_id" in result.error + + @patch(_PATCH_RUN_COMPLETION, side_effect=Exception("LLM down")) + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_error_full_chain( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + """run_completion raises → failure mentioning summarization.""" + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "summarization" in result.error.lower() or "LLM" in result.error + + +class TestSanityAgenticExtraction: + """Full-chain agentic_extraction test.""" + + def test_agentic_extraction_fails_full_chain(self, eager_app): + """No mocks needed → failure mentioning agentic and plugin.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_extraction", + run_id="run-sanity-agentic", + execution_source="tool", + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "agentic" in result.error.lower() + assert "plugin" in result.error.lower() + + +class TestSanityResponseContracts: + """Verify response dicts survive JSON round-trip with expected keys.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_extract_contract(self, mock_x2text_cls, mock_get_fs, eager_app): + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("contract") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + # JSON round-trip + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[IKeys.EXTRACTED_TEXT], str) + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_index_contract(self, mock_deps, mock_get_fs, eager_app): + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-contract" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-contract" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[IKeys.DOC_ID], str) + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_answer_prompt_contract( + self, mock_shim_cls, mock_deps, eager_app + ): + llm = _mock_llm("contract answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data[PSKeys.OUTPUT], dict) + assert isinstance(result.data[PSKeys.METADATA], dict) + assert isinstance(result.data[PSKeys.METRICS], dict) + + @patch(_PATCH_RUN_COMPLETION, return_value="contract summary") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_summarize_contract( + self, mock_shim_cls, mock_get_deps, mock_run, eager_app + ): + mock_llm_cls = MagicMock() + mock_llm_cls.return_value = MagicMock() + mock_get_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_llm_cls, MagicMock(), MagicMock(), + ) + + ctx = _summarize_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is True + assert isinstance(result.data["data"], str) + + +class TestSanityDispatcher: + """Full-chain dispatcher tests with Celery eager mode.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_dispatcher_dispatch_full_chain( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """ExecutionDispatcher dispatches through Celery and returns result. + + Celery's ``send_task`` doesn't reliably use eager mode, so we + patch it to route through ``task.apply()`` instead — this still + exercises the full Dispatcher → task → orchestrator chain. + """ + from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("dispatched") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + task = eager_app.tasks["execute_extraction"] + + def eager_send_task(name, args=None, **kwargs): + return task.apply(args=args) + + with patch.object(eager_app, "send_task", side_effect=eager_send_task): + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _extract_ctx() + result = dispatcher.dispatch(ctx, timeout=10) + + assert isinstance(result, ExecutionResult) + assert result.success is True + assert result.data[IKeys.EXTRACTED_TEXT] == "dispatched" + + def test_dispatcher_no_app_raises(self): + """ExecutionDispatcher(celery_app=None).dispatch() → ValueError.""" + from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = _extract_ctx() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch(ctx) + + +class TestSanityCrossCutting: + """Cross-cutting concerns: unknown ops, invalid contexts, error round-trip.""" + + def test_unknown_operation_full_chain(self, eager_app): + """operation='nonexistent' → failure mentioning unsupported.""" + ctx = ExecutionContext( + executor_name="legacy", + operation="nonexistent", + run_id="run-sanity-unknown", + execution_source="tool", + ) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "nonexistent" in result.error.lower() + + def test_invalid_context_dict_full_chain(self, eager_app): + """Malformed dict → failure mentioning 'Invalid execution context'.""" + result_dict = _run_task(eager_app, {"bad": "data"}) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "Invalid execution context" in result.error + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_failure_result_json_round_trip( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Failure result survives JSON serialization with error preserved.""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "FailExtractor" + mock_x2text.process.side_effect = AdapterError("round trip error") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + + # Verify raw dict survives JSON round-trip + serialized = json.dumps(result_dict) + deserialized = json.loads(serialized) + result = ExecutionResult.from_dict(deserialized) + + assert result.success is False + assert "round trip error" in result.error + assert "FailExtractor" in result.error diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py new file mode 100644 index 0000000000..6c543f8f34 --- /dev/null +++ b/workers/tests/test_sanity_phase3.py @@ -0,0 +1,996 @@ +"""Phase 3-SANITY — Integration tests for the structure tool Celery task. + +Tests the full structure tool pipeline with mocked platform API and +ExecutionDispatcher. Validates that execute_structure_tool correctly +orchestrates extract → index → answer_prompt operations and writes +output to filesystem. +""" + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from shared.enums.task_enums import TaskName +from unstract.sdk1.execution.context import ExecutionContext +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_DISPATCHER = ( + "file_processing.structure_tool_task.ExecutionDispatcher" +) +_PATCH_PLATFORM_HELPER = ( + "file_processing.structure_tool_task._create_platform_helper" +) +_PATCH_FILE_STORAGE = ( + "file_processing.structure_tool_task._get_file_storage" +) +_PATCH_SHIM = ( + "executor.executor_tool_shim.ExecutorToolShim" +) +_PATCH_SERVICE_IS_STRUCTURE = ( + "shared.workflow.execution.service." + "WorkerWorkflowExecutionService._is_structure_tool_workflow" +) +_PATCH_SERVICE_EXECUTE_STRUCTURE = ( + "shared.workflow.execution.service." + "WorkerWorkflowExecutionService._execute_structure_tool_workflow" +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_fs(): + """Create a mock file storage.""" + fs = MagicMock(name="file_storage") + fs.exists.return_value = False + fs.read.return_value = "" + fs.json_dump.return_value = None + fs.write.return_value = None + fs.get_hash_from_file.return_value = "abc123hash" + return fs + + +@pytest.fixture +def mock_dispatcher(): + """Create a mock ExecutionDispatcher that returns success results.""" + dispatcher = MagicMock(name="ExecutionDispatcher") + return dispatcher + + +@pytest.fixture +def mock_platform_helper(): + """Create a mock PlatformHelper.""" + helper = MagicMock(name="PlatformHelper") + return helper + + +@pytest.fixture +def tool_metadata_regular(): + """Standard prompt studio tool metadata.""" + return { + "name": "Test Project", + "is_agentic": False, + "tool_id": "tool-123", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + }, + "outputs": [ + { + "name": "field_a", + "prompt": "What is the revenue?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "retrieval-strategy": "simple", + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "similarity-top-k": 5, + }, + ], + } + + +@pytest.fixture +def base_params(): + """Base params dict for execute_structure_tool.""" + return { + "organization_id": "org-test", + "workflow_id": "wf-123", + "execution_id": "exec-456", + "file_execution_id": "fexec-789", + "tool_instance_metadata": { + "prompt_registry_id": "preg-001", + }, + "platform_service_api_key": "sk-test-key", + "input_file_path": "/data/test.pdf", + "output_dir_path": "/output", + "source_file_name": "test.pdf", + "execution_data_dir": "/data/exec", + "messaging_channel": "channel-1", + "file_hash": "filehash123", + "exec_metadata": {"tags": ["tag1"]}, + } + + +def _make_dispatch_side_effect(operation_results: dict): + """Create a side_effect for dispatcher.dispatch that returns results by operation.""" + + def side_effect(ctx, timeout=None): + op = ctx.operation + if op in operation_results: + return operation_results[op] + return ExecutionResult(success=True, data={}) + + return side_effect + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTaskEnumRegistered: + """3-SANITY: Verify TaskName enum exists.""" + + def test_task_enum_registered(self): + assert hasattr(TaskName, "EXECUTE_STRUCTURE_TOOL") + assert str(TaskName.EXECUTE_STRUCTURE_TOOL) == "execute_structure_tool" + + +class TestStructureToolExtractIndexAnswer: + """Full pipeline: extract → index → answer_prompt.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_extract_index_answer( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + """Full pipeline: extract → index → answer_prompt.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + # Setup mocks + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, + data={"extracted_text": "Revenue is $1M"}, + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "$1M"}, + "metadata": {"run_id": "fexec-789"}, + "metrics": {"field_a": {"extraction_llm": {"tokens": 50}}}, + }, + ) + # extract, index, answer_prompt + dispatcher_instance.dispatch.side_effect = [ + extract_result, + ExecutionResult(success=True, data={"doc_id": "doc-1"}), + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + assert result["data"]["output"]["field_a"] == "$1M" + assert result["data"]["metadata"]["file_name"] == "test.pdf" + # Verify output was written + mock_fs.json_dump.assert_called_once() + + # Verify dispatcher was called 3 times (extract, index, answer) + assert dispatcher_instance.dispatch.call_count == 3 + calls = dispatcher_instance.dispatch.call_args_list + assert calls[0][0][0].operation == "extract" + assert calls[1][0][0].operation == "index" + assert calls[2][0][0].operation == "answer_prompt" + + +class TestStructureToolSinglePass: + """Single-pass flag skips indexing, uses single_pass_extraction.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_single_pass( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + # Enable single pass + base_params["tool_instance_metadata"]["single_pass_extraction_mode"] = True + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "answer"}, + "metadata": {}, + "metrics": {}, + }, + ) + # extract, then single_pass_extraction (no index) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Should be 2 calls: extract + single_pass_extraction (no index) + assert dispatcher_instance.dispatch.call_count == 2 + calls = dispatcher_instance.dispatch.call_args_list + assert calls[0][0][0].operation == "extract" + assert calls[1][0][0].operation == "single_pass_extraction" + + +class TestStructureToolSummarize: + """Summarization path: extract → summarize → index → answer.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_summarize_flow( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + # Add summarize settings + tool_metadata_regular["tool_settings"]["summarize_prompt"] = ( + "Summarize this doc" + ) + base_params["tool_instance_metadata"]["summarize_as_source"] = True + + # Mock that extract file exists for reading + mock_fs.exists.return_value = False # No cached summary + mock_fs.read.return_value = "Full extracted text" + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "Full text"} + ) + summarize_result = ExecutionResult( + success=True, data={"data": "Summarized text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "answer"}, + "metadata": {}, + "metrics": {}, + }, + ) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + summarize_result, + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # extract + summarize + answer (no index because summarize changes payload) + assert dispatcher_instance.dispatch.call_count == 3 + calls = dispatcher_instance.dispatch.call_args_list + assert calls[0][0][0].operation == "extract" + assert calls[1][0][0].operation == "summarize" + assert calls[2][0][0].operation == "answer_prompt" + + # Verify summarized text was written to cache + mock_fs.write.assert_called() + + +class TestStructureToolSmartTable: + """Excel with valid JSON schema skips extract and index.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_skip_extraction_smart_table( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + # Add table_settings with a valid JSON prompt + tool_metadata_regular["outputs"][0]["table_settings"] = { + "is_directory_mode": False, + } + tool_metadata_regular["outputs"][0]["prompt"] = '{"key": "value"}' + + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "table_answer"}, + "metadata": {}, + "metrics": {}, + }, + ) + # Only answer_prompt (skip extract and index) + dispatcher_instance.dispatch.side_effect = [answer_result] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Only 1 call: answer_prompt (no extract, no index) + assert dispatcher_instance.dispatch.call_count == 1 + calls = dispatcher_instance.dispatch.call_args_list + assert calls[0][0][0].operation == "answer_prompt" + + +class TestStructureToolAgentic: + """Agentic project routes to agentic_extraction.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_agentic_routing( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + # Prompt studio lookup fails, agentic succeeds + mock_platform_helper.get_prompt_studio_tool.return_value = None + + agentic_metadata = { + "name": "Agentic Project", + "project_id": "ap-001", + "json_schema": {"field": "string"}, + } + mock_platform_helper.get_agentic_studio_tool.return_value = { + "tool_metadata": agentic_metadata, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + # Agentic extraction currently fails (plugin not available) + agentic_result = ExecutionResult.failure( + error="Agentic extraction requires the agentic extraction plugin" + ) + dispatcher_instance.dispatch.return_value = agentic_result + + result = execute_structure_tool(base_params) + + assert result["success"] is False + assert "agentic" in result["error"].lower() + + # Should dispatch to agentic_extraction + calls = dispatcher_instance.dispatch.call_args_list + assert len(calls) == 1 + assert calls[0][0][0].operation == "agentic_extraction" + + +class TestStructureToolProfileOverrides: + """Profile overrides modify tool_metadata correctly.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_profile_overrides( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + # Add profile override + base_params["exec_metadata"]["llm_profile_id"] = "profile-1" + mock_platform_helper.get_llm_profile.return_value = { + "profile_name": "Test Profile", + "llm_id": "llm-override", + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "answer"}, + "metadata": {}, + "metrics": {}, + }, + ) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + ExecutionResult(success=True, data={"doc_id": "d1"}), + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Verify profile override was applied + mock_platform_helper.get_llm_profile.assert_called_once_with("profile-1") + # The tool_settings should now have llm overridden + assert tool_metadata_regular["tool_settings"]["llm"] == "llm-override" + + +class TestStructureToolExtractFailure: + """Dispatcher extract failure → task returns failure.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_extract_failure( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_failure = ExecutionResult.failure( + error="X2Text adapter error: connection refused" + ) + dispatcher_instance.dispatch.return_value = extract_failure + + result = execute_structure_tool(base_params) + + assert result["success"] is False + assert "X2Text" in result["error"] + # Should only call extract, then bail + assert dispatcher_instance.dispatch.call_count == 1 + + +class TestStructureToolIndexDedup: + """Same (chunk_size, overlap, vdb, emb) combo indexed only once.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_index_dedup( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + # Add a second output with same chunking params + second_output = dict(tool_metadata_regular["outputs"][0]) + second_output["name"] = "field_b" + tool_metadata_regular["outputs"].append(second_output) + + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + index_result = ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "a", "field_b": "b"}, + "metadata": {}, + "metrics": {}, + }, + ) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + index_result, # Only ONE index call despite 2 outputs + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # 3 calls: extract + 1 index (deduped) + answer + assert dispatcher_instance.dispatch.call_count == 3 + index_calls = [ + c + for c in dispatcher_instance.dispatch.call_args_list + if c[0][0].operation == "index" + ] + assert len(index_calls) == 1 + + +class TestStructureToolOutputWritten: + """Output JSON written to correct path with correct structure.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_output_written( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "answer"}, + "metadata": {}, + "metrics": {}, + }, + ) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + ExecutionResult(success=True, data={"doc_id": "d1"}), + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + + # Check json_dump was called with correct path + json_dump_call = mock_fs.json_dump.call_args + output_path = json_dump_call.kwargs.get( + "path", json_dump_call[1].get("path") if len(json_dump_call) > 1 else None + ) + if output_path is None: + # Try positional + output_path = json_dump_call[0][0] if json_dump_call[0] else None + + # Verify it ends with test.json (stem of test.pdf) + assert str(output_path).endswith("test.json") + + +class TestStructureToolMetadataFileName: + """metadata.file_name replaced with actual source filename.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_structure_tool_metadata_file_name( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "answer"}, + "metadata": {"run_id": "123"}, + "metrics": {}, + }, + ) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + ExecutionResult(success=True, data={"doc_id": "d1"}), + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # file_name in metadata should be the source_file_name + assert result["data"]["metadata"]["file_name"] == "test.pdf" + + +class TestStructureToolSummarizeCache: + """Cached summary file skips dispatcher call.""" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_summarize_cache_hit( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + + tool_metadata_regular["tool_settings"]["summarize_prompt"] = ( + "Summarize" + ) + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + base_params["tool_instance_metadata"]["summarize_as_source"] = True + + # Simulate cached summary exists + mock_fs.exists.return_value = True + mock_fs.read.return_value = "Cached summary text" + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + + extract_result = ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "from cache"}, + "metadata": {}, + "metrics": {}, + }, + ) + # extract + answer (no summarize call because cache hit) + dispatcher_instance.dispatch.side_effect = [ + extract_result, + answer_result, + ] + + result = execute_structure_tool(base_params) + + assert result["success"] is True + # Should be 2 calls: extract + answer (no summarize) + assert dispatcher_instance.dispatch.call_count == 2 + ops = [c[0][0].operation for c in dispatcher_instance.dispatch.call_args_list] + assert "summarize" not in ops + + +class TestWorkflowServiceDetection: + """Test _is_structure_tool_workflow detection.""" + + def test_is_structure_tool_detection(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Mock execution_service with a structure tool instance + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "unstract/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + def test_non_structure_tool_uses_docker(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Mock execution_service with a non-structure tool + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "unstract/tool-classifier" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is False + + @patch.dict("os.environ", {"STRUCTURE_TOOL_IMAGE_NAME": "custom/structure"}) + def test_custom_structure_image_name(self): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "custom/structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + +class TestStructureToolParamsPassthrough: + """Task receives correct params from WorkerWorkflowExecutionService.""" + + @patch( + "shared.workflow.execution.service.WorkerWorkflowExecutionService." + "_execute_structure_tool_workflow" + ) + @patch( + "shared.workflow.execution.service.WorkerWorkflowExecutionService." + "_is_structure_tool_workflow", + return_value=True, + ) + def test_structure_tool_params_passthrough( + self, mock_is_struct, mock_exec_struct + ): + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + mock_exec_service.tool_instances = [MagicMock()] + + service._build_and_execute_workflow(mock_exec_service, "test.pdf") + + # Verify _execute_structure_tool_workflow was called + mock_exec_struct.assert_called_once_with( + mock_exec_service, "test.pdf" + ) + + +class TestHelperFunctions: + """Test standalone helper functions.""" + + def test_apply_profile_overrides(self): + from file_processing.structure_tool_task import ( + _apply_profile_overrides, + ) + + tool_metadata = { + "tool_settings": { + "llm": "old-llm", + "embedding": "old-emb", + }, + "outputs": [ + { + "name": "field_a", + "llm": "old-llm", + "embedding": "old-emb", + }, + ], + } + profile_data = { + "llm_id": "new-llm", + "embedding_model_id": "new-emb", + } + + changes = _apply_profile_overrides(tool_metadata, profile_data) + + assert len(changes) == 4 # 2 in tool_settings + 2 in output + assert tool_metadata["tool_settings"]["llm"] == "new-llm" + assert tool_metadata["tool_settings"]["embedding"] == "new-emb" + assert tool_metadata["outputs"][0]["llm"] == "new-llm" + assert tool_metadata["outputs"][0]["embedding"] == "new-emb" + + def test_should_skip_extraction_no_table_settings(self): + from file_processing.structure_tool_task import ( + _should_skip_extraction_for_smart_table, + ) + + outputs = [{"name": "field_a", "prompt": "What?"}] + assert ( + _should_skip_extraction_for_smart_table("file.xlsx", outputs) + is False + ) + + def test_should_skip_extraction_with_json_schema(self): + from file_processing.structure_tool_task import ( + _should_skip_extraction_for_smart_table, + ) + + outputs = [ + { + "name": "field_a", + "table_settings": {}, + "prompt": '{"col1": "string", "col2": "number"}', + } + ] + assert ( + _should_skip_extraction_for_smart_table("file.xlsx", outputs) + is True + ) + + def test_merge_metrics(self): + from file_processing.structure_tool_task import _merge_metrics + + m1 = {"field_a": {"extraction_llm": {"tokens": 50}}} + m2 = {"field_a": {"indexing": {"time_taken(s)": 1.5}}} + merged = _merge_metrics(m1, m2) + assert "extraction_llm" in merged["field_a"] + assert "indexing" in merged["field_a"] + + def test_merge_metrics_empty(self): + from file_processing.structure_tool_task import _merge_metrics + + assert _merge_metrics({}, {}) == {} diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py new file mode 100644 index 0000000000..0c8615884f --- /dev/null +++ b/workers/tests/test_sanity_phase4.py @@ -0,0 +1,873 @@ +"""Phase 4-SANITY — IDE path integration tests through executor chain. + +Phase 4 replaces PromptTool HTTP calls in PromptStudioHelper with +ExecutionDispatcher → executor worker → LegacyExecutor. + +These tests build the EXACT payloads that prompt_studio_helper.py +now sends via ExecutionDispatcher, push them through the full Celery +eager-mode chain, and verify the results match what the IDE expects. + +This validates the full contract: + prompt_studio_helper builds payload + → ExecutionContext(execution_source="ide", ...) + → Celery task → LegacyExecutor._handle_X() + → ExecutionResult → result.data used by IDE + +All tests use execution_source="ide" to match the real IDE path. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + IndexingConstants as IKeys, + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets (same as Phase 2 sanity) +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered.""" + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="ide answer"): + """Create a mock LLM matching the answer_prompt pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 42} + return llm + + +def _mock_prompt_deps(llm=None): + """Return 7-tuple matching _get_prompt_deps() shape.""" + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService + + RetrievalService = MagicMock(name="RetrievalService") + RetrievalService.run_retrieval.return_value = ["chunk1"] + RetrievalService.retrieve_complete_context.return_value = ["full doc"] + + VariableReplacementService = MagicMock(name="VariableReplacementService") + VariableReplacementService.is_variables_present.return_value = False + + Index = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-ide-key" + Index.return_value = index_instance + + LLM_cls = MagicMock(name="LLM") + LLM_cls.return_value = llm + + EmbeddingCompat = MagicMock(name="EmbeddingCompat") + VectorDB = MagicMock(name="VectorDB") + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM_cls, + EmbeddingCompat, + VectorDB, + ) + + +def _mock_process_response(text="ide extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="ide-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_ide_prompt(name="invoice_number", prompt="What is the invoice number?", + output_type="text", **overrides): + """Build a prompt dict matching what prompt_studio_helper builds. + + Uses the exact key strings from ToolStudioPromptKeys / PSKeys. + """ + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: output_type, + # These match the hyphenated keys from ToolStudioPromptKeys + "chunk-size": 512, + "chunk-overlap": 64, + "retrieval-strategy": "simple", + "llm": "llm-ide-1", + "embedding": "emb-ide-1", + "vector-db": "vdb-ide-1", + "x2text_adapter": "x2t-ide-1", + "similarity-top-k": 3, + "active": True, + "required": True, + } + d.update(overrides) + return d + + +# --- IDE context factories matching prompt_studio_helper payloads --- + + +def _ide_extract_ctx(**overrides): + """Build ExecutionContext matching dynamic_extractor() dispatch. + + Key mapping: dynamic_extractor uses IKeys constants for payload keys, + and adds "platform_api_key" for the executor. + """ + defaults = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "run-ide-ext", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "x2text_instance_id": "x2t-ide-1", + "file_path": "/prompt-studio/org/user/tool/doc.pdf", + "enable_highlight": True, + "usage_kwargs": {"run_id": "run-ide-ext", "file_name": "doc.pdf"}, + "run_id": "run-ide-ext", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "output_file_path": "/prompt-studio/org/user/tool/extract/doc.txt", + "platform_api_key": "pk-ide-test", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_index_ctx(**overrides): + """Build ExecutionContext matching dynamic_indexer() dispatch. + + Key mapping: dynamic_indexer uses IKeys constants and adds + "platform_api_key" for the executor. + """ + defaults = { + "executor_name": "legacy", + "operation": "index", + "run_id": "run-ide-idx", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_id": "tool-ide-1", + "embedding_instance_id": "emb-ide-1", + "vector_db_instance_id": "vdb-ide-1", + "x2text_instance_id": "x2t-ide-1", + "file_path": "/prompt-studio/org/user/tool/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": False, + "enable_highlight": True, + "usage_kwargs": {"run_id": "run-ide-idx", "file_name": "doc.pdf"}, + "extracted_text": "IDE extracted document text content", + "run_id": "run-ide-idx", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "platform_api_key": "pk-ide-test", + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_answer_prompt_ctx(prompts=None, **overrides): + """Build ExecutionContext matching _fetch_response() dispatch. + + Key mapping: _fetch_response uses TSPKeys (ToolStudioPromptKeys) + constants and adds PLATFORM_SERVICE_API_KEY + include_metadata. + """ + if prompts is None: + prompts = [_make_ide_prompt()] + defaults = { + "executor_name": "legacy", + "operation": "answer_prompt", + "run_id": "run-ide-ap", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_settings": { + "enable_challenge": False, + "challenge_llm": "llm-challenge-1", + "single_pass_extraction_mode": False, + "summarize_as_source": False, + "preamble": "Extract accurately.", + "postamble": "No explanation.", + "grammar": [], + "enable_highlight": True, + "enable_word_confidence": False, + "platform_postamble": "", + "word_confidence_postamble": "", + }, + "outputs": prompts, + "tool_id": "tool-ide-1", + "run_id": "run-ide-ap", + "file_name": "invoice.pdf", + "file_hash": "abc123hash", + "file_path": "/prompt-studio/org/user/tool/extract/invoice.txt", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "pk-ide-test", + "include_metadata": True, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +def _ide_single_pass_ctx(prompts=None, **overrides): + """Build ExecutionContext matching _fetch_single_pass_response() dispatch.""" + if prompts is None: + prompts = [ + _make_ide_prompt(name="revenue", prompt="What is total revenue?"), + _make_ide_prompt(name="date", prompt="What is the date?"), + ] + defaults = { + "executor_name": "legacy", + "operation": "single_pass_extraction", + "run_id": "run-ide-sp", + "execution_source": "ide", + "organization_id": "org-ide-test", + "executor_params": { + "tool_settings": { + "preamble": "Extract accurately.", + "postamble": "No explanation.", + "grammar": [], + "llm": "llm-ide-1", + "x2text_adapter": "x2t-ide-1", + "vector-db": "vdb-ide-1", + "embedding": "emb-ide-1", + "chunk-size": 0, + "chunk-overlap": 0, + "enable_challenge": False, + "enable_highlight": True, + "enable_word_confidence": False, + "challenge_llm": None, + "platform_postamble": "", + "word_confidence_postamble": "", + "summarize_as_source": False, + }, + "outputs": prompts, + "tool_id": "tool-ide-1", + "run_id": "run-ide-sp", + "file_hash": "abc123hash", + "file_name": "invoice.pdf", + "file_path": "/prompt-studio/org/user/tool/extract/invoice.txt", + "log_events_id": "log-ide-1", + "execution_source": "ide", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "pk-ide-test", + "include_metadata": True, + }, + } + defaults.update(overrides) + return ExecutionContext(**defaults) + + +# =========================================================================== +# Test classes +# =========================================================================== + + +class TestIDEExtract: + """IDE extract payload → executor → extracted_text.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_returns_text(self, mock_x2text_cls, mock_get_fs, eager_app): + """IDE extract payload produces extracted_text in result.data.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "Invoice #12345 dated 2024-01-15" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "extracted_text" in result.data + assert result.data["extracted_text"] == "Invoice #12345 dated 2024-01-15" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_with_output_file_path( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """IDE extract passes output_file_path to x2text.process().""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + _run_task(eager_app, ctx.to_dict()) + + # Verify output_file_path was passed through + call_kwargs = mock_x2text.process.call_args + assert call_kwargs is not None + assert "output_file_path" in call_kwargs.kwargs + assert call_kwargs.kwargs["output_file_path"] == ( + "/prompt-studio/org/user/tool/extract/doc.txt" + ) + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_extract_failure(self, mock_x2text_cls, mock_get_fs, eager_app): + """Adapter failure → ExecutionResult(success=False).""" + from unstract.sdk1.adapters.exceptions import AdapterError + + mock_x2text = MagicMock() + mock_x2text.x2text_instance = MagicMock() + mock_x2text.x2text_instance.get_name.return_value = "LLMWhisperer" + mock_x2text.process.side_effect = AdapterError("extraction failed") + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + ctx = _ide_extract_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + assert "extraction failed" in result.error + + +class TestIDEIndex: + """IDE index payload → executor → doc_id.""" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_returns_doc_id(self, mock_deps, mock_get_fs, eager_app): + """IDE index payload produces doc_id in result.data.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-ide-indexed" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-ide-indexed" + mock_index_cls.return_value = mock_index + + mock_emb_cls = MagicMock() + mock_emb_cls.return_value = MagicMock() + mock_vdb_cls = MagicMock() + mock_vdb_cls.return_value = MagicMock() + + mock_deps.return_value = (mock_index_cls, mock_emb_cls, mock_vdb_cls) + mock_get_fs.return_value = MagicMock() + + ctx = _ide_index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["doc_id"] == "doc-ide-indexed" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_with_null_file_hash(self, mock_deps, mock_get_fs, eager_app): + """IDE indexer sends file_hash=None — executor handles it.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-null-hash" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-null-hash" + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + # file_hash=None is exactly what dynamic_indexer sends + ctx = _ide_index_ctx() + assert ctx.executor_params["file_hash"] is None + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert result.data["doc_id"] == "doc-null-hash" + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_ide_index_failure(self, mock_deps, mock_get_fs, eager_app): + """Index failure → ExecutionResult(success=False).""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-fail" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.side_effect = RuntimeError("VDB timeout") + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + ctx = _ide_index_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is False + + +class TestIDEAnswerPrompt: + """IDE answer_prompt payload → executor → {output, metadata, metrics}.""" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, eager_app): + """IDE text prompt → output dict with prompt_key → answer.""" + llm = _mock_llm("INV-2024-001") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + # IDE expects result.data to have "output", "metadata", "metrics" + assert "output" in result.data + assert "metadata" in result.data + assert "metrics" in result.data + assert result.data["output"]["invoice_number"] == "INV-2024-001" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_metadata_has_run_id( + self, mock_shim_cls, mock_deps, eager_app + ): + """IDE response metadata contains run_id and file_name.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + metadata = result.data["metadata"] + assert metadata["run_id"] == "run-ide-ap" + assert metadata["file_name"] == "invoice.pdf" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_with_eval_settings( + self, mock_shim_cls, mock_deps, eager_app + ): + """Prompt with eval_settings passes through to executor cleanly.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompt = _make_ide_prompt( + eval_settings={ + "evaluate": True, + "monitor_llm": ["llm-monitor-1"], + "exclude_failed": True, + } + ) + ctx = _ide_answer_prompt_ctx(prompts=[prompt]) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_platform_key_reaches_shim( + self, mock_shim_cls, mock_deps, eager_app + ): + """PLATFORM_SERVICE_API_KEY in payload reaches ExecutorToolShim.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + _run_task(eager_app, ctx.to_dict()) + + # Verify shim was constructed with the platform key + mock_shim_cls.assert_called() + call_kwargs = mock_shim_cls.call_args + assert call_kwargs.kwargs.get("platform_api_key") == "pk-ide-test" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_answer_prompt_webhook_settings( + self, mock_shim_cls, mock_deps, eager_app + ): + """Prompt with webhook settings passes through cleanly.""" + llm = _mock_llm("answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + prompt = _make_ide_prompt( + enable_postprocessing_webhook=True, + postprocessing_webhook_url="https://example.com/hook", + ) + ctx = _ide_answer_prompt_ctx(prompts=[prompt]) + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + + +class TestIDESinglePass: + """IDE single_pass_extraction → executor → same shape as answer_prompt.""" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_single_pass_multi_prompt( + self, mock_shim_cls, mock_deps, eager_app + ): + """Single pass with multiple prompts → all fields in output.""" + llm = _mock_llm("single pass value") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_single_pass_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "output" in result.data + assert "revenue" in result.data["output"] + assert "date" in result.data["output"] + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_single_pass_has_metadata( + self, mock_shim_cls, mock_deps, eager_app + ): + """Single pass returns metadata with run_id.""" + llm = _mock_llm("value") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_single_pass_ctx() + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + + assert result.success is True + assert "metadata" in result.data + assert result.data["metadata"]["run_id"] == "run-ide-sp" + + +class TestIDEDispatcherIntegration: + """Test ExecutionDispatcher dispatch() with IDE payloads in eager mode. + + Celery's send_task() doesn't work with eager mode for AsyncResult.get(), + so we patch send_task to delegate to task.apply() instead. + """ + + @staticmethod + def _patch_send_task(eager_app): + """Patch send_task on eager_app to use task.apply().""" + original_send_task = eager_app.send_task + + def patched_send_task(name, args=None, kwargs=None, **opts): + task = eager_app.tasks[name] + return task.apply(args=args, kwargs=kwargs) + + eager_app.send_task = patched_send_task + return original_send_task + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_dispatcher_extract_round_trip( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """ExecutionDispatcher.dispatch() → extract → ExecutionResult.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response( + "dispatcher extracted" + ) + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_get_fs.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_extract_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["extracted_text"] == "dispatcher extracted" + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_dispatcher_answer_prompt_round_trip( + self, mock_shim_cls, mock_deps, eager_app + ): + """ExecutionDispatcher.dispatch() → answer_prompt → ExecutionResult.""" + llm = _mock_llm("dispatcher answer") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_answer_prompt_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["output"]["invoice_number"] == "dispatcher answer" + assert "metadata" in result.data + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_dispatcher_single_pass_round_trip( + self, mock_shim_cls, mock_deps, eager_app + ): + """ExecutionDispatcher.dispatch() → single_pass → ExecutionResult.""" + llm = _mock_llm("sp dispatch") + mock_deps.return_value = _mock_prompt_deps(llm) + mock_shim_cls.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_single_pass_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert "revenue" in result.data["output"] + + @patch(_PATCH_FS) + @patch(_PATCH_INDEX_DEPS) + def test_dispatcher_index_round_trip( + self, mock_deps, mock_get_fs, eager_app + ): + """ExecutionDispatcher.dispatch() → index → ExecutionResult.""" + mock_index_cls = MagicMock() + mock_index = MagicMock() + mock_index.generate_index_key.return_value = "doc-dispatch-idx" + mock_index.is_document_indexed.return_value = False + mock_index.perform_indexing.return_value = "doc-dispatch-idx" + mock_index_cls.return_value = mock_index + + mock_deps.return_value = (mock_index_cls, MagicMock(), MagicMock()) + mock_get_fs.return_value = MagicMock() + + original = self._patch_send_task(eager_app) + try: + dispatcher = ExecutionDispatcher(celery_app=eager_app) + ctx = _ide_index_ctx() + result = dispatcher.dispatch(ctx) + finally: + eager_app.send_task = original + + assert result.success is True + assert result.data["doc_id"] == "doc-dispatch-idx" + + +class TestIDEExecutionSourceRouting: + """Verify execution_source='ide' propagates correctly.""" + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + def test_ide_source_reaches_extract_handler( + self, mock_x2text_cls, mock_get_fs, eager_app + ): + """Extract handler receives execution_source='ide' from context.""" + mock_x2text = MagicMock() + mock_x2text.process.return_value = _mock_process_response("text") + mock_x2text.x2text_instance = MagicMock() + mock_x2text_cls.return_value = mock_x2text + mock_fs = MagicMock() + mock_get_fs.return_value = mock_fs + + ctx = _ide_extract_ctx() + assert ctx.execution_source == "ide" + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert result.success is True + + # For IDE source, _update_exec_metadata should NOT write + # (it only writes for execution_source="tool") + # This is verified by the fact that no dump_json was called + # on the fs mock. In IDE mode, whisper_hash metadata is skipped. + + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_SHIM) + def test_ide_source_in_answer_prompt_enables_variable_replacement( + self, mock_shim_cls, mock_deps, eager_app + ): + """execution_source='ide' in payload sets is_ide=True for variable replacement.""" + llm = _mock_llm("var answer") + deps = _mock_prompt_deps(llm) + # Enable variable checking to verify is_ide routing + var_service = deps[2] # VariableReplacementService + var_service.is_variables_present.return_value = False + mock_deps.return_value = deps + mock_shim_cls.return_value = MagicMock() + + ctx = _ide_answer_prompt_ctx() + # Verify execution_source is in both context and payload + assert ctx.execution_source == "ide" + assert ctx.executor_params["execution_source"] == "ide" + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert result.success is True + + +class TestIDEPayloadKeyCompatibility: + """Verify the exact key names in IDE payloads match executor expectations.""" + + def test_extract_payload_keys_match_executor(self): + """dynamic_extractor payload keys match _handle_extract reads.""" + ctx = _ide_extract_ctx() + params = ctx.executor_params + + # These are the keys _handle_extract reads from params + assert "x2text_instance_id" in params + assert "file_path" in params + assert "platform_api_key" in params + assert "output_file_path" in params + assert "enable_highlight" in params + assert "usage_kwargs" in params + + def test_index_payload_keys_match_executor(self): + """dynamic_indexer payload keys match _handle_index reads.""" + ctx = _ide_index_ctx() + params = ctx.executor_params + + # These are the keys _handle_index reads from params + assert "embedding_instance_id" in params + assert "vector_db_instance_id" in params + assert "x2text_instance_id" in params + assert "file_path" in params + assert "extracted_text" in params + assert "platform_api_key" in params + assert "chunk_size" in params + assert "chunk_overlap" in params + + def test_answer_prompt_payload_keys_match_executor(self): + """_fetch_response payload keys match _handle_answer_prompt reads.""" + ctx = _ide_answer_prompt_ctx() + params = ctx.executor_params + + # These are the keys _handle_answer_prompt reads + assert "tool_settings" in params + assert "outputs" in params + assert "tool_id" in params + assert "file_hash" in params + assert "file_path" in params + assert "file_name" in params + assert "PLATFORM_SERVICE_API_KEY" in params + assert "log_events_id" in params + assert "execution_source" in params + assert "custom_data" in params + + def test_answer_prompt_platform_key_is_uppercase(self): + """answer_prompt uses PLATFORM_SERVICE_API_KEY (uppercase, not snake_case).""" + ctx = _ide_answer_prompt_ctx() + # _handle_answer_prompt reads PSKeys.PLATFORM_SERVICE_API_KEY + # which is "PLATFORM_SERVICE_API_KEY" + assert "PLATFORM_SERVICE_API_KEY" in ctx.executor_params + # NOT "platform_api_key" (that's for extract/index) + assert ctx.executor_params["PLATFORM_SERVICE_API_KEY"] == "pk-ide-test" + + def test_extract_platform_key_is_lowercase(self): + """extract/index uses platform_api_key (lowercase snake_case).""" + ctx = _ide_extract_ctx() + assert "platform_api_key" in ctx.executor_params + + def test_execution_context_has_ide_source(self): + """All IDE contexts have execution_source='ide'.""" + assert _ide_extract_ctx().execution_source == "ide" + assert _ide_index_ctx().execution_source == "ide" + assert _ide_answer_prompt_ctx().execution_source == "ide" + assert _ide_single_pass_ctx().execution_source == "ide" diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py new file mode 100644 index 0000000000..720f5388cc --- /dev/null +++ b/workers/tests/test_usage.py @@ -0,0 +1,309 @@ +"""Phase 2G — Usage tracking tests. + +Verifies: +1. UsageHelper.push_usage_data wraps Audit correctly +2. Invalid kwargs returns False +3. Invalid platform_api_key returns False +4. Audit exceptions are caught and return False +5. format_float_positional formats correctly +6. SDK1 adapters already push usage (integration check) +7. answer_prompt handler returns metrics in ExecutionResult +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.usage import UsageHelper + + +# --------------------------------------------------------------------------- +# 1. push_usage_data success +# --------------------------------------------------------------------------- + + +class TestPushUsageData: + @patch("unstract.sdk1.audit.Audit") + def test_push_success(self, mock_audit_cls): + """Successful push returns True and calls Audit.""" + mock_audit = MagicMock() + mock_audit_cls.return_value = mock_audit + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "run-001", "execution_id": "exec-001"}, + platform_api_key="test-key", + token_counter=MagicMock(), + model_name="gpt-4", + ) + + assert result is True + mock_audit.push_usage_data.assert_called_once() + call_kwargs = mock_audit.push_usage_data.call_args + assert call_kwargs.kwargs["platform_api_key"] == "test-key" + assert call_kwargs.kwargs["model_name"] == "gpt-4" + assert call_kwargs.kwargs["event_type"] == "llm" + + @patch("unstract.sdk1.audit.Audit") + def test_push_passes_token_counter(self, mock_audit_cls): + """Token counter is passed through to Audit.""" + mock_audit = MagicMock() + mock_audit_cls.return_value = mock_audit + mock_counter = MagicMock() + + UsageHelper.push_usage_data( + event_type="embedding", + kwargs={"run_id": "run-002"}, + platform_api_key="key-2", + token_counter=mock_counter, + ) + + call_kwargs = mock_audit.push_usage_data.call_args + assert call_kwargs.kwargs["token_counter"] is mock_counter + + +# --------------------------------------------------------------------------- +# 2. Invalid kwargs +# --------------------------------------------------------------------------- + + +class TestPushValidation: + def test_none_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs=None, + platform_api_key="key", + ) + assert result is False + + def test_empty_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={}, + platform_api_key="key", + ) + assert result is False + + def test_non_dict_kwargs_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs="not a dict", + platform_api_key="key", + ) + assert result is False + + +# --------------------------------------------------------------------------- +# 3. Invalid platform_api_key +# --------------------------------------------------------------------------- + + +class TestPushApiKeyValidation: + def test_none_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key=None, + ) + assert result is False + + def test_empty_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="", + ) + assert result is False + + def test_non_string_key_returns_false(self): + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key=12345, + ) + assert result is False + + +# --------------------------------------------------------------------------- +# 4. Audit exceptions are caught +# --------------------------------------------------------------------------- + + +class TestPushErrorHandling: + @patch("unstract.sdk1.audit.Audit") + def test_audit_exception_returns_false(self, mock_audit_cls): + """Audit errors are caught and return False.""" + mock_audit = MagicMock() + mock_audit.push_usage_data.side_effect = Exception("Network error") + mock_audit_cls.return_value = mock_audit + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="key", + token_counter=MagicMock(), + ) + + assert result is False + + @patch("unstract.sdk1.audit.Audit") + def test_import_error_returns_false(self, mock_audit_cls): + """Import errors are caught gracefully.""" + mock_audit_cls.side_effect = ImportError("no module") + + result = UsageHelper.push_usage_data( + event_type="llm", + kwargs={"run_id": "r1"}, + platform_api_key="key", + ) + + assert result is False + + +# --------------------------------------------------------------------------- +# 5. format_float_positional +# --------------------------------------------------------------------------- + + +class TestFormatFloat: + def test_normal_float(self): + assert UsageHelper.format_float_positional(0.0001234) == "0.0001234" + + def test_trailing_zeros_removed(self): + assert UsageHelper.format_float_positional(1.50) == "1.5" + + def test_integer_value(self): + assert UsageHelper.format_float_positional(42.0) == "42" + + def test_zero(self): + assert UsageHelper.format_float_positional(0.0) == "0" + + def test_small_value(self): + result = UsageHelper.format_float_positional(0.00000001) + assert "0.00000001" == result + + def test_custom_precision(self): + result = UsageHelper.format_float_positional(1.123456789, precision=3) + assert result == "1.123" + + +# --------------------------------------------------------------------------- +# 6. SDK1 adapters already push usage +# --------------------------------------------------------------------------- + + +class TestAdapterUsageTracking: + def test_llm_calls_audit_push(self): + """Verify the LLM adapter imports and calls Audit.push_usage_data. + + This is a static analysis check — we verify the SDK1 LLM module + references Audit.push_usage_data, confirming adapters handle + usage tracking internally. + """ + import inspect + + from unstract.sdk1.llm import LLM + + source = inspect.getsource(LLM) + assert "push_usage_data" in source + assert "Audit" in source + + +# --------------------------------------------------------------------------- +# 7. answer_prompt handler returns metrics +# --------------------------------------------------------------------------- + + +class TestMetricsInResult: + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_answer_prompt_returns_metrics( + self, mock_shim_cls, mock_get_deps + ): + """answer_prompt result includes metrics dict.""" + from unstract.sdk1.execution.context import ExecutionContext + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + + executor = ExecutorRegistry.get("legacy") + + # Mock all dependencies + mock_llm = MagicMock() + mock_llm.get_metrics.return_value = {"total_tokens": 100} + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.complete.return_value = { + "response": MagicMock(text="test answer"), + "highlight_data": [], + "confidence_data": None, + "word_confidence_data": None, + "line_numbers": [], + "whisper_hash": "", + } + + mock_llm_cls = MagicMock(return_value=mock_llm) + mock_index = MagicMock() + mock_index.return_value.generate_index_key.return_value = "doc-123" + + mock_get_deps.return_value = ( + MagicMock(), # AnswerPromptService — use real for construct + MagicMock(), # RetrievalService + MagicMock(), # VariableReplacementService + mock_index, # Index + mock_llm_cls, # LLM + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + # Patch AnswerPromptService methods at their real location + with patch( + "executor.executors.answer_prompt.AnswerPromptService.extract_variable", + return_value="test prompt", + ), patch( + "executor.executors.answer_prompt.AnswerPromptService.construct_and_run_prompt", + return_value="test answer", + ): + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-metrics-001", + execution_source="tool", + organization_id="org-test", + request_id="req-metrics-001", + executor_params={ + "tool_settings": {}, + "outputs": [ + { + "name": "field1", + "prompt": "What is X?", + "chunk-size": 512, + "chunk-overlap": 64, + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "type": "text", + "retrieval-strategy": "simple", + "similarity-top-k": 5, + }, + ], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_path": "/tmp/test.txt", + "file_name": "test.txt", + "PLATFORM_SERVICE_API_KEY": "test-key", + }, + ) + result = executor.execute(ctx) + + assert result.success is True + assert "metrics" in result.data + assert "field1" in result.data["metrics"] + + ExecutorRegistry.clear() From 41eeef8a30238bf8061930dded1fd31c63675c44 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Fri, 20 Feb 2026 00:37:57 +0530 Subject: [PATCH 02/64] async flow --- backend/backend/celery_config.py | 8 + .../prompt_studio_core_v2/tasks.py | 175 +++++++ .../prompt_studio_core_v2/test_tasks.py | 461 ++++++++++++++++++ .../prompt_studio_core_v2/urls.py | 7 + .../prompt_studio_core_v2/views.py | 120 +++-- docker/docker-compose.yaml | 2 +- .../src/unstract/sdk1/execution/dispatcher.py | 9 +- 7 files changed, 749 insertions(+), 33 deletions(-) create mode 100644 backend/prompt_studio/prompt_studio_core_v2/tasks.py create mode 100644 backend/prompt_studio/prompt_studio_core_v2/test_tasks.py diff --git a/backend/backend/celery_config.py b/backend/backend/celery_config.py index f8833556e7..9ddd8a342a 100644 --- a/backend/backend/celery_config.py +++ b/backend/backend/celery_config.py @@ -31,3 +31,11 @@ class CeleryConfig: beat_scheduler = "django_celery_beat.schedulers:DatabaseScheduler" task_acks_late = True + + # Route long-running Prompt Studio IDE tasks to a dedicated queue + # so they don't compete with beat/logging/API-deployment tasks. + task_routes = { + "prompt_studio_index_document": {"queue": "celery_prompt_studio"}, + "prompt_studio_fetch_response": {"queue": "celery_prompt_studio"}, + "prompt_studio_single_pass": {"queue": "celery_prompt_studio"}, + } diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py new file mode 100644 index 0000000000..a52f6441be --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -0,0 +1,175 @@ +import logging +from typing import Any + +from celery import shared_task + +from account_v2.constants import Common +from utils.local_context import StateStore +from utils.log_events import _emit_websocket_event + +logger = logging.getLogger(__name__) + +PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result" + + +def _setup_state_store(log_events_id: str, request_id: str) -> None: + """Restore thread-local context that was captured in the Django view.""" + StateStore.set(Common.LOG_EVENTS_ID, log_events_id) + StateStore.set(Common.REQUEST_ID, request_id) + + +def _clear_state_store() -> None: + """Clean up thread-local context to prevent leaking between tasks.""" + StateStore.clear(Common.LOG_EVENTS_ID) + StateStore.clear(Common.REQUEST_ID) + + +def _emit_result( + log_events_id: str, + task_id: str, + operation: str, + result: dict[str, Any], +) -> None: + """Push a success event to the frontend via Socket.IO.""" + _emit_websocket_event( + room=log_events_id, + event=PROMPT_STUDIO_RESULT_EVENT, + data={ + "task_id": task_id, + "status": "completed", + "operation": operation, + "result": result, + }, + ) + + +def _emit_error( + log_events_id: str, + task_id: str, + operation: str, + error: str, +) -> None: + """Push a failure event to the frontend via Socket.IO.""" + _emit_websocket_event( + room=log_events_id, + event=PROMPT_STUDIO_RESULT_EVENT, + data={ + "task_id": task_id, + "status": "failed", + "operation": operation, + "error": error, + }, + ) + + +@shared_task(name="prompt_studio_index_document", bind=True) +def run_index_document( + self, + tool_id: str, + file_name: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, +) -> dict[str, Any]: + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id) + PromptStudioHelper.index_document( + tool_id=tool_id, + file_name=file_name, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + ) + result: dict[str, Any] = {"message": "Document indexed successfully."} + _emit_result(log_events_id, self.request.id, "index_document", result) + return result + except Exception as e: + logger.exception("run_index_document failed") + _emit_error(log_events_id, self.request.id, "index_document", str(e)) + raise + finally: + _clear_state_store() + + +@shared_task(name="prompt_studio_fetch_response", bind=True) +def run_fetch_response( + self, + tool_id: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, + id: str | None = None, + profile_manager_id: str | None = None, +) -> dict[str, Any]: + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id) + response: dict[str, Any] = PromptStudioHelper.prompt_responder( + id=id, + tool_id=tool_id, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + profile_manager_id=profile_manager_id, + ) + _emit_result(log_events_id, self.request.id, "fetch_response", response) + return response + except Exception as e: + logger.exception("run_fetch_response failed") + _emit_error(log_events_id, self.request.id, "fetch_response", str(e)) + raise + finally: + _clear_state_store() + + +@shared_task(name="prompt_studio_single_pass", bind=True) +def run_single_pass_extraction( + self, + tool_id: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + log_events_id: str, + request_id: str, +) -> dict[str, Any]: + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import ( + PromptStudioHelper, + ) + + try: + _setup_state_store(log_events_id, request_id) + response: dict[str, Any] = PromptStudioHelper.prompt_responder( + tool_id=tool_id, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + ) + _emit_result( + log_events_id, self.request.id, "single_pass_extraction", response + ) + return response + except Exception as e: + logger.exception("run_single_pass_extraction failed") + _emit_error( + log_events_id, self.request.id, "single_pass_extraction", str(e) + ) + raise + finally: + _clear_state_store() diff --git a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py new file mode 100644 index 0000000000..d8e2731144 --- /dev/null +++ b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py @@ -0,0 +1,461 @@ +"""Phase 7-9 sanity tests for Prompt Studio IDE async backend. + +Tests the Celery task definitions (Phase 7), view dispatch (Phase 8), +and polling endpoint (Phase 9). + +Requires Django to be configured (source .env before running): + set -a && source .env && set +a + uv run pytest prompt_studio/prompt_studio_core_v2/test_tasks.py -v +""" + +import os +from unittest.mock import MagicMock, patch + +import django + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings.dev") +django.setup() + +import pytest # noqa: E402 +from celery import Celery # noqa: E402 + +from account_v2.constants import Common # noqa: E402 +from prompt_studio.prompt_studio_core_v2.tasks import ( # noqa: E402 + PROMPT_STUDIO_RESULT_EVENT, + run_fetch_response, + run_index_document, + run_single_pass_extraction, +) +from utils.local_context import StateStore # noqa: E402 + +# --------------------------------------------------------------------------- +# Celery eager-mode app for testing +# --------------------------------------------------------------------------- +test_app = Celery("test") +test_app.conf.update( + task_always_eager=True, + task_eager_propagates=True, + result_backend="cache+memory://", +) +run_index_document.bind(test_app) +run_fetch_response.bind(test_app) +run_single_pass_extraction.bind(test_app) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- +COMMON_KWARGS = { + "tool_id": "tool-123", + "org_id": "org-456", + "user_id": "user-789", + "document_id": "doc-abc", + "run_id": "run-def", + "log_events_id": "session-room-xyz", + "request_id": "req-001", +} + + +# =================================================================== +# Phase 7: Task definition tests +# =================================================================== +class TestTaskNames: + def test_index_document_task_name(self): + assert run_index_document.name == "prompt_studio_index_document" + + def test_fetch_response_task_name(self): + assert run_fetch_response.name == "prompt_studio_fetch_response" + + def test_single_pass_task_name(self): + assert run_single_pass_extraction.name == "prompt_studio_single_pass" + + +class TestRunIndexDocument: + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_returns_result(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "unique-id-123" + result = run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert result == {"message": "Document indexed successfully."} + mock_helper.index_document.assert_called_once_with( + tool_id="tool-123", + file_name="test.pdf", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + ) + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_emits_completed_event(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "unique-id-123" + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + mock_emit.assert_called_once() + kwargs = mock_emit.call_args.kwargs + assert kwargs["room"] == "session-room-xyz" + assert kwargs["event"] == PROMPT_STUDIO_RESULT_EVENT + assert kwargs["data"]["status"] == "completed" + assert kwargs["data"]["operation"] == "index_document" + assert kwargs["data"]["result"] == { + "message": "Document indexed successfully." + } + assert "task_id" in kwargs["data"] + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_failure_emits_error_and_reraises(self, mock_helper, mock_emit): + mock_helper.index_document.side_effect = RuntimeError("index boom") + + with pytest.raises(RuntimeError, match="index boom"): + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + mock_emit.assert_called_once() + assert mock_emit.call_args.kwargs["data"]["status"] == "failed" + assert "index boom" in mock_emit.call_args.kwargs["data"]["error"] + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_state_store_cleared_on_success(self, mock_helper, mock_emit): + mock_helper.index_document.return_value = "ok" + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert StateStore.get(Common.LOG_EVENTS_ID) is None + assert StateStore.get(Common.REQUEST_ID) is None + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_state_store_cleared_on_failure(self, mock_helper, mock_emit): + mock_helper.index_document.side_effect = RuntimeError("fail") + with pytest.raises(RuntimeError): + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert StateStore.get(Common.LOG_EVENTS_ID) is None + assert StateStore.get(Common.REQUEST_ID) is None + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_state_store_set_during_execution(self, mock_helper, mock_emit): + """Verify StateStore has the right values while the helper runs.""" + captured = {} + + def capture_state(**kwargs): + captured["log_events_id"] = StateStore.get(Common.LOG_EVENTS_ID) + captured["request_id"] = StateStore.get(Common.REQUEST_ID) + return "ok" + + mock_helper.index_document.side_effect = capture_state + run_index_document.apply( + kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} + ).get() + + assert captured["log_events_id"] == "session-room-xyz" + assert captured["request_id"] == "req-001" + # And cleared after + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +class TestRunFetchResponse: + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_returns_response(self, mock_helper, mock_emit): + expected = {"output": {"field": "value"}, "metadata": {"tokens": 42}} + mock_helper.prompt_responder.return_value = expected + + result = run_fetch_response.apply( + kwargs={ + **COMMON_KWARGS, + "id": "prompt-1", + "profile_manager_id": "pm-1", + } + ).get() + + assert result == expected + mock_helper.prompt_responder.assert_called_once_with( + id="prompt-1", + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + profile_manager_id="pm-1", + ) + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_emits_fetch_response_event(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {"output": "data"} + run_fetch_response.apply( + kwargs={**COMMON_KWARGS, "id": "p1", "profile_manager_id": None} + ).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "completed" + assert data["operation"] == "fetch_response" + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_failure_emits_error(self, mock_helper, mock_emit): + mock_helper.prompt_responder.side_effect = ValueError("prompt fail") + + with pytest.raises(ValueError, match="prompt fail"): + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "failed" + assert "prompt fail" in data["error"] + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_optional_params_default_none(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + + mock_helper.prompt_responder.assert_called_once_with( + id=None, + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + profile_manager_id=None, + ) + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_state_store_cleared(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_fetch_response.apply(kwargs=COMMON_KWARGS).get() + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +class TestRunSinglePassExtraction: + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_returns_response(self, mock_helper, mock_emit): + expected = {"output": {"key": "val"}} + mock_helper.prompt_responder.return_value = expected + + result = run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + assert result == expected + mock_helper.prompt_responder.assert_called_once_with( + tool_id="tool-123", + org_id="org-456", + user_id="user-789", + document_id="doc-abc", + run_id="run-def", + ) + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_success_emits_single_pass_event(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {"data": "ok"} + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "completed" + assert data["operation"] == "single_pass_extraction" + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_failure_emits_error(self, mock_helper, mock_emit): + mock_helper.prompt_responder.side_effect = TypeError("single pass fail") + + with pytest.raises(TypeError, match="single pass fail"): + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + + data = mock_emit.call_args.kwargs["data"] + assert data["status"] == "failed" + + @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") + @patch( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" + ) + def test_state_store_cleared(self, mock_helper, mock_emit): + mock_helper.prompt_responder.return_value = {} + run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() + assert StateStore.get(Common.LOG_EVENTS_ID) is None + + +# =================================================================== +# Phase 8: View dispatch tests +# =================================================================== +class TestViewsDispatchTasks: + """Verify the three views no longer call helpers directly.""" + + def test_index_document_view_has_no_blocking_call(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.index_document) + assert "run_index_document.apply_async" in source + assert "PromptStudioHelper.index_document(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_fetch_response_view_has_no_blocking_call(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.fetch_response) + assert "run_fetch_response.apply_async" in source + assert "PromptStudioHelper.prompt_responder(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_single_pass_view_has_no_blocking_call(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.single_pass_extraction) + assert "run_single_pass_extraction.apply_async" in source + assert "PromptStudioHelper.prompt_responder(" not in source + assert "HTTP_202_ACCEPTED" in source + + def test_views_capture_state_store_context(self): + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + for method_name in [ + "index_document", + "fetch_response", + "single_pass_extraction", + ]: + source = inspect.getsource(getattr(PromptStudioCoreView, method_name)) + assert "StateStore.get(Common.LOG_EVENTS_ID)" in source, ( + f"{method_name} missing LOG_EVENTS_ID capture" + ) + assert "StateStore.get(Common.REQUEST_ID)" in source, ( + f"{method_name} missing REQUEST_ID capture" + ) + + +# =================================================================== +# Phase 9: Polling endpoint tests +# =================================================================== +class TestTaskStatusAction: + def test_task_status_method_exists(self): + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + assert hasattr(PromptStudioCoreView, "task_status") + assert callable(getattr(PromptStudioCoreView, "task_status")) + + def test_task_status_url_registered(self): + from prompt_studio.prompt_studio_core_v2.urls import urlpatterns + + task_status_urls = [ + p + for p in urlpatterns + if hasattr(p, "name") and p.name == "prompt-studio-task-status" + ] + assert len(task_status_urls) >= 1 + url = task_status_urls[0] + assert "" in str(url.pattern) + assert "" in str(url.pattern) + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_processing(self, MockAsyncResult): + """Verify processing response for unfinished task.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert "not result.ready()" in source + assert '"processing"' in source + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_completed(self, MockAsyncResult): + """Verify completed response structure.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert "result.successful()" in source + assert '"completed"' in source + assert "result.result" in source + + @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) + def test_task_status_failed(self, MockAsyncResult): + """Verify failed response structure.""" + import inspect + + from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView + + source = inspect.getsource(PromptStudioCoreView.task_status) + assert '"failed"' in source + assert "HTTP_500_INTERNAL_SERVER_ERROR" in source + + +# =================================================================== +# Phase 6: Config tests +# =================================================================== +class TestCeleryConfig: + def test_task_routes_defined(self): + from backend.celery_config import CeleryConfig + + assert hasattr(CeleryConfig, "task_routes") + + def test_all_three_tasks_routed(self): + from backend.celery_config import CeleryConfig + + routes = CeleryConfig.task_routes + assert routes["prompt_studio_index_document"] == { + "queue": "celery_prompt_studio" + } + assert routes["prompt_studio_fetch_response"] == { + "queue": "celery_prompt_studio" + } + assert routes["prompt_studio_single_pass"] == { + "queue": "celery_prompt_studio" + } + + def test_celery_app_loads_routes(self): + from backend.celery_service import app + + assert app.conf.task_routes is not None + assert "prompt_studio_index_document" in app.conf.task_routes diff --git a/backend/prompt_studio/prompt_studio_core_v2/urls.py b/backend/prompt_studio/prompt_studio_core_v2/urls.py index 228368544a..f0fcb63513 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/urls.py +++ b/backend/prompt_studio/prompt_studio_core_v2/urls.py @@ -59,6 +59,8 @@ {"get": "check_deployment_usage"} ) +prompt_studio_task_status = PromptStudioCoreView.as_view({"get": "task_status"}) + urlpatterns = format_suffix_patterns( [ @@ -143,5 +145,10 @@ prompt_studio_deployment_usage, name="prompt_studio_deployment_usage", ), + path( + "prompt-studio//task-status/", + prompt_studio_task_status, + name="prompt-studio-task-status", + ), ] ) diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 5e1f0d2a3f..3c515401ff 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -47,7 +47,14 @@ ToolDeleteError, ) from prompt_studio.prompt_studio_core_v2.migration_utils import SummarizeMigrationUtils +from account_v2.constants import Common from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import PromptStudioHelper +from prompt_studio.prompt_studio_core_v2.tasks import ( + run_fetch_response, + run_index_document, + run_single_pass_extraction, +) +from utils.local_context import StateStore from prompt_studio.prompt_studio_core_v2.retrieval_strategies import ( get_retrieval_strategy_metadata, ) @@ -369,22 +376,25 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: # Generate a run_id run_id = CommonUtils.generate_uuid() - unique_id = PromptStudioHelper.index_document( - tool_id=str(tool.tool_id), - file_name=file_name, - org_id=UserSessionUtils.get_organization_id(request), - user_id=tool.created_by.user_id, - document_id=document_id, - run_id=run_id, + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) + request_id = StateStore.get(Common.REQUEST_ID) + + task = run_index_document.apply_async( + kwargs={ + "tool_id": str(tool.tool_id), + "file_name": file_name, + "org_id": UserSessionUtils.get_organization_id(request), + "user_id": tool.created_by.user_id, + "document_id": document_id, + "run_id": run_id, + "log_events_id": log_events_id, + "request_id": request_id, + } + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, ) - if unique_id: - return Response( - {"message": "Document indexed successfully."}, - status=status.HTTP_200_OK, - ) - else: - logger.error("Error occured while indexing. Unique ID is not valid.") - raise IndexingAPIError() @action(detail=True, methods=["post"]) def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: @@ -408,16 +418,26 @@ def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: if not run_id: # Generate a run_id run_id = CommonUtils.generate_uuid() - response: dict[str, Any] = PromptStudioHelper.prompt_responder( - id=id, - tool_id=tool_id, - org_id=UserSessionUtils.get_organization_id(request), - user_id=custom_tool.created_by.user_id, - document_id=document_id, - run_id=run_id, - profile_manager_id=profile_manager, + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) + request_id = StateStore.get(Common.REQUEST_ID) + + task = run_fetch_response.apply_async( + kwargs={ + "tool_id": tool_id, + "org_id": UserSessionUtils.get_organization_id(request), + "user_id": custom_tool.created_by.user_id, + "document_id": document_id, + "run_id": run_id, + "id": id, + "profile_manager_id": profile_manager, + "log_events_id": log_events_id, + "request_id": request_id, + } + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, ) - return Response(response, status=status.HTTP_200_OK) @action(detail=True, methods=["post"]) def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: @@ -439,14 +459,52 @@ def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: if not run_id: # Generate a run_id run_id = CommonUtils.generate_uuid() - response: dict[str, Any] = PromptStudioHelper.prompt_responder( - tool_id=tool_id, - org_id=UserSessionUtils.get_organization_id(request), - user_id=custom_tool.created_by.user_id, - document_id=document_id, - run_id=run_id, + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) + request_id = StateStore.get(Common.REQUEST_ID) + + task = run_single_pass_extraction.apply_async( + kwargs={ + "tool_id": tool_id, + "org_id": UserSessionUtils.get_organization_id(request), + "user_id": custom_tool.created_by.user_id, + "document_id": document_id, + "run_id": run_id, + "log_events_id": log_events_id, + "request_id": request_id, + } + ) + return Response( + {"task_id": task.id, "run_id": run_id, "status": "accepted"}, + status=status.HTTP_202_ACCEPTED, + ) + + @action(detail=True, methods=["get"]) + def task_status(self, request: HttpRequest, pk: Any = None, task_id: str = None) -> Response: + """Poll the status of an async Prompt Studio task. + + Args: + request (HttpRequest) + pk: Primary key of the CustomTool (for permission check) + task_id: Celery task ID returned by the 202 response + + Returns: + Response with {task_id, status} and optionally result or error + """ + from celery.result import AsyncResult + + from backend.celery_service import app as celery_app + + result = AsyncResult(task_id, app=celery_app) + if not result.ready(): + return Response({"task_id": task_id, "status": "processing"}) + if result.successful(): + return Response( + {"task_id": task_id, "status": "completed", "result": result.result} + ) + return Response( + {"task_id": task_id, "status": "failed", "error": str(result.result)}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) - return Response(response, status=status.HTTP_200_OK) @action(detail=True, methods=["get"]) def list_of_shared_users(self, request: HttpRequest, pk: Any = None) -> Response: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 075b3f2895..07611650b4 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -44,7 +44,7 @@ services: container_name: unstract-worker restart: unless-stopped entrypoint: .venv/bin/celery - command: "-A backend worker --loglevel=info -Q celery,celery_api_deployments --autoscale=${WORKER_AUTOSCALE}" + command: "-A backend worker --loglevel=info -Q celery,celery_api_deployments,celery_prompt_studio --autoscale=${WORKER_AUTOSCALE}" env_file: - ../backend/.env depends_on: diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 8d55a5d58c..d3214ad4c2 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -111,7 +111,14 @@ def dispatch( ) try: - result_dict = async_result.get(timeout=timeout) + # disable_sync_subtasks=False: safe because the executor task + # runs on a *different* broker (RabbitMQ) and worker pool + # (worker-v2) — no deadlock risk even when dispatch() is + # called from inside a Django Celery task (Redis broker). + result_dict = async_result.get( + timeout=timeout, + disable_sync_subtasks=False, + ) except Exception as exc: logger.error( "Dispatch failed: executor=%s operation=%s " From f66dfb2c24580ce5a8b790c37005b3c8522b187f Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 24 Feb 2026 18:26:56 +0530 Subject: [PATCH 03/64] Streaming progress to FE --- backend/backend/worker_celery.py | 18 +- .../prompt_studio_helper.py | 8 +- .../prompt_studio_core_v2/tasks.py | 68 ++- .../prompt-card/DisplayPromptResult.jsx | 16 +- .../custom-tools/prompt-card/PromptCard.css | 14 + .../custom-tools/prompt-card/PromptCard.jsx | 3 +- .../prompt-card/PromptCardItems.jsx | 1 + .../custom-tools/prompt-card/PromptOutput.jsx | 5 + .../custom-tools/prompt-card/PromptRun.jsx | 5 +- .../custom-tools/tool-ide/ToolIde.jsx | 31 +- .../helpers/socket-messages/SocketMessages.js | 6 +- frontend/src/helpers/SocketContext.js | 16 +- frontend/src/hooks/usePromptRun.js | 46 +- frontend/src/hooks/usePromptStudioSocket.js | 119 +++++ frontend/src/setupProxy.js | 1 + .../core/src/unstract/core/pubsub_helper.py | 31 +- .../src/unstract/sdk1/execution/context.py | 6 + .../src/unstract/sdk1/execution/dispatcher.py | 10 +- unstract/sdk1/tests/test_execution.py | 8 +- workers/executor/executor_tool_shim.py | 49 +- workers/executor/executors/legacy_executor.py | 187 ++++++- workers/executor/tasks.py | 18 +- workers/tests/test_answer_prompt.py | 21 + workers/tests/test_phase1_log_streaming.py | 483 ++++++++++++++++++ workers/tests/test_sanity_phase2.py | 15 +- workers/tests/test_sanity_phase4.py | 33 +- workers/tests/test_usage.py | 6 +- 27 files changed, 1098 insertions(+), 126 deletions(-) create mode 100644 frontend/src/hooks/usePromptStudioSocket.js create mode 100644 workers/tests/test_phase1_log_streaming.py diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index 6b980f6f26..c22cb9e6da 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -2,9 +2,10 @@ The Django backend uses Redis as its Celery broker for internal tasks (beat, periodic tasks, etc.). The worker-v2 workers (executor, -file_processing, etc.) use RabbitMQ as their broker. +file_processing, etc.) use a separate broker configured via +``WORKER_CELERY_BROKER_URL``. -This module provides a Celery app connected to RabbitMQ specifically +This module provides a Celery app connected to that worker-v2 broker for dispatching tasks (via ExecutionDispatcher) to worker-v2 workers. Problem: Celery reads the ``CELERY_BROKER_URL`` environment variable @@ -13,7 +14,7 @@ every Celery app created in this process inherits Redis as broker. Solution: Subclass Celery and override ``connection_for_write`` / -``connection_for_read`` so they always use our explicit RabbitMQ URL, +``connection_for_read`` so they always use our explicit broker URL, bypassing the config resolution chain entirely. """ @@ -57,7 +58,7 @@ def get_worker_celery_app() -> Celery: """Get or create a Celery app for dispatching to worker-v2 workers. The app uses: - - RabbitMQ as broker (WORKER_CELERY_BROKER_URL env var) + - Worker-v2 broker (WORKER_CELERY_BROKER_URL env var) - Same PostgreSQL result backend as the Django Celery app Returns: @@ -74,8 +75,8 @@ def get_worker_celery_app() -> Celery: if not broker_url: raise ValueError( "WORKER_CELERY_BROKER_URL is not set. " - "This should point to the RabbitMQ broker used by worker-v2 " - "workers (e.g., amqp://admin:password@rabbitmq:5672//)." + "This should point to the broker used by worker-v2 " + "workers (e.g., redis://unstract-redis:6379)." ) # Reuse the same PostgreSQL result backend as Django's Celery app @@ -106,7 +107,10 @@ def get_worker_celery_app() -> Celery: _worker_app = app # Log broker host only (mask credentials) safe_broker = broker_url.split("@")[-1] if "@" in broker_url else broker_url + safe_backend = result_backend.split("@")[-1] if "@" in result_backend else result_backend logger.info( - "Created worker dispatch Celery app (broker=%s)", safe_broker + "Created worker dispatch Celery app (broker=%s, result_backend=%s)", + safe_broker, + safe_backend, ) return _worker_app diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index 2bd2dd3b05..d797f5e35d 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -267,8 +267,8 @@ def _publish_log( component: dict[str, str], level: str, state: str, message: str ) -> None: LogPublisher.publish( - StateStore.get(Common.LOG_EVENTS_ID), - LogPublisher.log_prompt(component, level, state, message), + channel_id=StateStore.get(Common.LOG_EVENTS_ID), + payload=LogPublisher.log_progress(component, level, state, message), ) @staticmethod @@ -1034,6 +1034,7 @@ def _fetch_response( organization_id=org_id, executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), ) result = dispatcher.dispatch(context) if not result.success: @@ -1183,6 +1184,7 @@ def dynamic_indexer( organization_id=org_id, executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), ) result = dispatcher.dispatch(index_context) if not result.success: @@ -1333,6 +1335,7 @@ def _fetch_single_pass_response( organization_id=org_id, executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), ) result = dispatcher.dispatch(context) if not result.success: @@ -1418,6 +1421,7 @@ def dynamic_extractor( organization_id=org_id, executor_params=payload, request_id=StateStore.get(Common.REQUEST_ID), + log_events_id=StateStore.get(Common.LOG_EVENTS_ID), ) result = dispatcher.dispatch(extract_context) if not result.success: diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index a52f6441be..60b1e71b76 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -1,9 +1,12 @@ +import json import logging +import uuid from typing import Any from celery import shared_task from account_v2.constants import Common +from utils.constants import Account from utils.local_context import StateStore from utils.log_events import _emit_websocket_event @@ -12,16 +15,40 @@ PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result" -def _setup_state_store(log_events_id: str, request_id: str) -> None: +class _UUIDEncoder(json.JSONEncoder): + """JSON encoder that converts uuid.UUID objects to strings.""" + + def default(self, obj: Any) -> Any: + if isinstance(obj, uuid.UUID): + return str(obj) + return super().default(obj) + + +def _json_safe(data: Any) -> Any: + """Round-trip through JSON to convert non-serializable types (UUID → str). + + DRF serializers return uuid.UUID objects for PrimaryKeyRelatedField + and UUIDField. Socket.IO's pubsub uses stdlib json.dumps which + cannot handle them, so we sanitize here before emitting. + """ + return json.loads(json.dumps(data, cls=_UUIDEncoder)) + + +def _setup_state_store( + log_events_id: str, request_id: str, org_id: str = "" +) -> None: """Restore thread-local context that was captured in the Django view.""" StateStore.set(Common.LOG_EVENTS_ID, log_events_id) StateStore.set(Common.REQUEST_ID, request_id) + if org_id: + StateStore.set(Account.ORGANIZATION_ID, org_id) def _clear_state_store() -> None: """Clean up thread-local context to prevent leaking between tasks.""" StateStore.clear(Common.LOG_EVENTS_ID) StateStore.clear(Common.REQUEST_ID) + StateStore.clear(Account.ORGANIZATION_ID) def _emit_result( @@ -34,12 +61,12 @@ def _emit_result( _emit_websocket_event( room=log_events_id, event=PROMPT_STUDIO_RESULT_EVENT, - data={ + data=_json_safe({ "task_id": task_id, "status": "completed", "operation": operation, "result": result, - }, + }), ) @@ -48,17 +75,21 @@ def _emit_error( task_id: str, operation: str, error: str, + extra: dict[str, Any] | None = None, ) -> None: """Push a failure event to the frontend via Socket.IO.""" + data: dict[str, Any] = { + "task_id": task_id, + "status": "failed", + "operation": operation, + "error": error, + } + if extra: + data.update(extra) _emit_websocket_event( room=log_events_id, event=PROMPT_STUDIO_RESULT_EVENT, - data={ - "task_id": task_id, - "status": "failed", - "operation": operation, - "error": error, - }, + data=data, ) @@ -79,7 +110,7 @@ def run_index_document( ) try: - _setup_state_store(log_events_id, request_id) + _setup_state_store(log_events_id, request_id, org_id) PromptStudioHelper.index_document( tool_id=tool_id, file_name=file_name, @@ -88,12 +119,21 @@ def run_index_document( document_id=document_id, run_id=run_id, ) - result: dict[str, Any] = {"message": "Document indexed successfully."} + result: dict[str, Any] = { + "message": "Document indexed successfully.", + "document_id": document_id, + } _emit_result(log_events_id, self.request.id, "index_document", result) return result except Exception as e: logger.exception("run_index_document failed") - _emit_error(log_events_id, self.request.id, "index_document", str(e)) + _emit_error( + log_events_id, + self.request.id, + "index_document", + str(e), + extra={"document_id": document_id}, + ) raise finally: _clear_state_store() @@ -117,7 +157,7 @@ def run_fetch_response( ) try: - _setup_state_store(log_events_id, request_id) + _setup_state_store(log_events_id, request_id, org_id) response: dict[str, Any] = PromptStudioHelper.prompt_responder( id=id, tool_id=tool_id, @@ -153,7 +193,7 @@ def run_single_pass_extraction( ) try: - _setup_state_store(log_events_id, request_id) + _setup_state_store(log_events_id, request_id, org_id) response: dict[str, Any] = PromptStudioHelper.prompt_responder( tool_id=tool_id, org_id=org_id, diff --git a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx index 51344888fa..b24a2dc919 100644 --- a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx +++ b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx @@ -25,6 +25,7 @@ function DisplayPromptResult({ wordConfidenceData, isTable = false, setOpenExpandModal = () => {}, + progressMsg, }) { const [isLoading, setIsLoading] = useState(false); const [parsedOutput, setParsedOutput] = useState(null); @@ -66,7 +67,19 @@ function DisplayPromptResult({ ]); if (isLoading) { - return } />; + return ( +
+ } /> + {progressMsg?.message && ( + + {progressMsg.message} + + )} +
+ ); } if (output === undefined) { @@ -427,6 +440,7 @@ DisplayPromptResult.propTypes = { wordConfidenceData: PropTypes.object, isTable: PropTypes.bool, setOpenExpandModal: PropTypes.func, + progressMsg: PropTypes.object, }; export { DisplayPromptResult }; diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.css b/frontend/src/components/custom-tools/prompt-card/PromptCard.css index 9b58a9b7ea..509e05bc0b 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.css +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.css @@ -325,3 +325,17 @@ .prompt-output-result{ font-size: 12px; } + +.prompt-loading-container { + display: flex; + align-items: center; + gap: 8px; +} + +.prompt-progress-msg { + font-size: 12px; + max-width: 300px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx index bf844778a3..1fc70be363 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx @@ -74,7 +74,8 @@ const PromptCard = memo( .find( (item) => (item?.component?.prompt_id === promptDetailsState?.prompt_id || - item?.component?.prompt_key === promptKey) && + item?.component?.prompt_key === promptKey || + item?.component?.tool_id === details?.tool_id) && (item?.level === "INFO" || item?.level === "ERROR") ); diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx index 594ae62f51..63dc1b05f4 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx @@ -316,6 +316,7 @@ function PromptCardItems({ promptRunStatus={promptRunStatus} isChallenge={isChallenge} handleSelectHighlight={handleSelectHighlight} + progressMsg={progressMsg} /> diff --git a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx index 3286db0799..e82d98caf9 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx @@ -65,6 +65,7 @@ function PromptOutput({ promptRunStatus, isChallenge, handleSelectHighlight, + progressMsg, }) { const [openExpandModal, setOpenExpandModal] = useState(false); const { width: windowWidth } = useWindowDimensions(); @@ -110,6 +111,7 @@ function PromptOutput({ promptDetails={promptDetails} isTable={true} setOpenExpandModal={setOpenExpandModal} + progressMsg={progressMsg} />
state.setPromptRunQueue ); const { runPrompt, syncPromptRunApisAndStatus } = usePromptRun(); + usePromptStudioSocket(); const promptRunStatus = usePromptRunStatusStore( (state) => state.promptRunStatus ); diff --git a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx index f24fda5d92..017b748b87 100644 --- a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx +++ b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx @@ -257,29 +257,14 @@ function ToolIde() { }; pushIndexDoc(docId); - return axiosPrivate(requestOptions) - .then(() => { - setAlertDetails({ - type: "success", - content: `${doc?.document_name} - Indexed successfully`, - }); - - try { - setPostHogCustomEvent("intent_success_ps_indexed_file", { - info: "Indexing completed", - }); - } catch (err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } - }) - .catch((err) => { - setAlertDetails( - handleException(err, `${doc?.document_name} - Failed to index`) - ); - }) - .finally(() => { - deleteIndexDoc(docId); - }); + return axiosPrivate(requestOptions).catch((err) => { + // Only clear spinner on POST network failure (not 2xx). + // On success the spinner stays until a socket event arrives. + deleteIndexDoc(docId); + setAlertDetails( + handleException(err, `${doc?.document_name} - Failed to index`) + ); + }); }; const handleUpdateTool = async (body) => { diff --git a/frontend/src/components/helpers/socket-messages/SocketMessages.js b/frontend/src/components/helpers/socket-messages/SocketMessages.js index aa8224aa52..4ec9aa1533 100644 --- a/frontend/src/components/helpers/socket-messages/SocketMessages.js +++ b/frontend/src/components/helpers/socket-messages/SocketMessages.js @@ -11,6 +11,7 @@ import throttle from "lodash/throttle"; import { SocketContext } from "../../../helpers/SocketContext"; import { useExceptionHandler } from "../../../hooks/useExceptionHandler"; import { useAlertStore } from "../../../store/alert-store"; +import { useSocketCustomToolStore } from "../../../store/socket-custom-tool"; import { useSocketLogsStore } from "../../../store/socket-logs-store"; import { useSocketMessagesStore } from "../../../store/socket-messages-store"; import { useSessionStore } from "../../../store/session-store"; @@ -28,6 +29,7 @@ function SocketMessages() { setPointer, } = useSocketMessagesStore(); const { pushLogMessages } = useSocketLogsStore(); + const { updateCusToolMessages } = useSocketCustomToolStore(); const { sessionDetails } = useSessionStore(); const socket = useContext(SocketContext); const { setAlertDetails } = useAlertStore(); @@ -89,6 +91,8 @@ function SocketMessages() { pushStagedMessage(msg); } else if (msg?.type === "LOG" && msg?.service === "prompt") { handleLogMessages(msg); + } else if (msg?.type === "PROGRESS") { + updateCusToolMessages([msg]); } if (msg?.type === "LOG" && msg?.service === "usage") { @@ -102,7 +106,7 @@ function SocketMessages() { ); } }, - [handleLogMessages, pushStagedMessage] + [handleLogMessages, pushStagedMessage, updateCusToolMessages] ); // Subscribe/unsubscribe to the socket channel diff --git a/frontend/src/helpers/SocketContext.js b/frontend/src/helpers/SocketContext.js index fa8d32e176..6e6ace9a63 100644 --- a/frontend/src/helpers/SocketContext.js +++ b/frontend/src/helpers/SocketContext.js @@ -10,17 +10,15 @@ const SocketProvider = ({ children }) => { const [socket, setSocket] = useState(null); useEffect(() => { - let baseUrl = ""; - const body = { + // Always connect to the same origin as the page. + // - Dev: CRA proxy (ws: true in setupProxy.js) forwards to the backend. + // - Prod: Traefik routes /api/v1/socket to the backend. + // This ensures session cookies are sent (same-origin) and avoids + // cross-origin WebSocket issues. + const newSocket = io(getBaseUrl(), { transports: ["websocket"], path: "/api/v1/socket", - }; - if (!process.env.NODE_ENV || process.env.NODE_ENV === "development") { - baseUrl = process.env.REACT_APP_BACKEND_URL; - } else { - baseUrl = getBaseUrl(); - } - const newSocket = io(baseUrl, body); + }); setSocket(newSocket); // Clean up the socket connection on browser unload window.onbeforeunload = () => { diff --git a/frontend/src/hooks/usePromptRun.js b/frontend/src/hooks/usePromptRun.js index 421ab957f7..86bf966b71 100644 --- a/frontend/src/hooks/usePromptRun.js +++ b/frontend/src/hooks/usePromptRun.js @@ -1,7 +1,6 @@ import { generateApiRunStatusId, generateUUID, - pollForCompletion, PROMPT_RUN_API_STATUSES, PROMPT_RUN_TYPES, } from "../helpers/GetStaticData"; @@ -16,11 +15,9 @@ import usePromptOutput from "./usePromptOutput"; const usePromptRun = () => { const { pushPromptRunApi, freeActiveApi } = usePromptRunQueueStore(); - const { generatePromptOutputKey, updatePromptOutputState } = - usePromptOutput(); + const { generatePromptOutputKey } = usePromptOutput(); const { addPromptStatus, removePromptStatus } = usePromptRunStatusStore(); - const { details, llmProfiles, listOfDocs, selectedDoc } = - useCustomToolStore(); + const { details, llmProfiles, listOfDocs } = useCustomToolStore(); const { sessionDetails } = useSessionStore(); const axiosPrivate = useAxiosPrivate(); const { setAlertDetails } = useAlertStore(); @@ -28,6 +25,8 @@ const usePromptRun = () => { const makeApiRequest = (requestOptions) => axiosPrivate(requestOptions); + const SOCKET_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes + const runPromptApi = (api) => { const [promptId, docId, profileId] = api.split("__"); const runId = generateUUID(); @@ -49,32 +48,33 @@ const usePromptRun = () => { data: body, }; - const startTime = Date.now(); - const maxWaitTime = 30 * 1000; // 30 seconds - const pollingInterval = 5000; // 5 seconds - - pollForCompletion( - startTime, - requestOptions, - maxWaitTime, - pollingInterval, - makeApiRequest - ) - .then((res) => { - if (docId !== selectedDoc?.document_id) return; - const data = res?.data || []; - const timeTakenInSeconds = Math.floor((Date.now() - startTime) / 1000); - updatePromptOutputState(data, false, timeTakenInSeconds); + // Fire-and-forget: POST dispatches the Celery task, socket delivers result. + makeApiRequest(requestOptions) + .then(() => { + // Timeout safety net: clear stale status if socket event never arrives. + setTimeout(() => { + const statusKey = generateApiRunStatusId(docId, profileId); + const current = usePromptRunStatusStore.getState().promptRunStatus; + if ( + current?.[promptId]?.[statusKey] === PROMPT_RUN_API_STATUSES.RUNNING + ) { + removePromptStatus(promptId, statusKey); + setAlertDetails({ + type: "warning", + content: "Prompt execution timed out. Please try again.", + }); + } + }, SOCKET_TIMEOUT_MS); }) .catch((err) => { setAlertDetails( handleException(err, "Failed to generate prompt output") ); + const statusKey = generateApiRunStatusId(docId, profileId); + removePromptStatus(promptId, statusKey); }) .finally(() => { freeActiveApi(); - const statusKey = generateApiRunStatusId(docId, profileId); - removePromptStatus(promptId, statusKey); }); }; diff --git a/frontend/src/hooks/usePromptStudioSocket.js b/frontend/src/hooks/usePromptStudioSocket.js new file mode 100644 index 0000000000..142f0c750e --- /dev/null +++ b/frontend/src/hooks/usePromptStudioSocket.js @@ -0,0 +1,119 @@ +import { useContext, useEffect, useCallback } from "react"; + +import { SocketContext } from "../helpers/SocketContext"; +import { generateApiRunStatusId } from "../helpers/GetStaticData"; +import { useAlertStore } from "../store/alert-store"; +import { useCustomToolStore } from "../store/custom-tool-store"; +import { usePromptRunStatusStore } from "../store/prompt-run-status-store"; +import { useExceptionHandler } from "./useExceptionHandler"; +import usePromptOutput from "./usePromptOutput"; + +const PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result"; + +/** + * Hook that listens for `prompt_studio_result` Socket.IO events emitted by + * backend Celery tasks (fetch_response, single_pass_extraction, index_document). + * + * On completion it feeds the result into the prompt-output store and clears + * the corresponding run-status entries so the UI stops showing spinners. + */ +const usePromptStudioSocket = () => { + const socket = useContext(SocketContext); + const { removePromptStatus } = usePromptRunStatusStore(); + const { updateCustomTool, deleteIndexDoc } = useCustomToolStore(); + const { setAlertDetails } = useAlertStore(); + const handleException = useExceptionHandler(); + const { updatePromptOutputState } = usePromptOutput(); + + const clearResultStatuses = useCallback( + (data) => { + if (!Array.isArray(data)) return; + data.forEach((item) => { + const promptId = item?.prompt_id; + const docId = item?.document_manager; + const profileId = item?.profile_manager; + if (promptId && docId && profileId) { + const statusKey = generateApiRunStatusId(docId, profileId); + removePromptStatus(promptId, statusKey); + } + }); + }, + [removePromptStatus] + ); + + const handleCompleted = useCallback( + (operation, result) => { + if (operation === "fetch_response") { + const data = Array.isArray(result) ? result : []; + updatePromptOutputState(data, false); + clearResultStatuses(data); + } else if (operation === "single_pass_extraction") { + const data = Array.isArray(result) ? result : []; + updatePromptOutputState(data, false); + updateCustomTool({ isSinglePassExtractLoading: false }); + clearResultStatuses(data); + } else if (operation === "index_document") { + const docId = result?.document_id; + if (docId) deleteIndexDoc(docId); + setAlertDetails({ + type: "success", + content: result?.message || "Document indexed successfully.", + }); + } + }, + [ + updatePromptOutputState, + clearResultStatuses, + updateCustomTool, + setAlertDetails, + deleteIndexDoc, + ] + ); + + const handleFailed = useCallback( + (operation, error, extra) => { + setAlertDetails({ + type: "error", + content: error || `${operation} failed`, + }); + if (operation === "single_pass_extraction") { + updateCustomTool({ isSinglePassExtractLoading: false }); + } else if (operation === "index_document") { + const docId = extra?.document_id; + if (docId) deleteIndexDoc(docId); + } + }, + [setAlertDetails, updateCustomTool, deleteIndexDoc] + ); + + const onResult = useCallback( + (payload) => { + try { + const msg = payload?.data || payload; + const { status, operation, result, error, ...extra } = msg; + + if (status === "completed") { + handleCompleted(operation, result); + } else if (status === "failed") { + handleFailed(operation, error, extra); + } + } catch (err) { + setAlertDetails( + handleException(err, "Failed to process prompt studio result") + ); + } + }, + [handleCompleted, handleFailed, setAlertDetails, handleException] + ); + + useEffect(() => { + if (!socket) return; + + socket.on(PROMPT_STUDIO_RESULT_EVENT, onResult); + return () => { + socket.off(PROMPT_STUDIO_RESULT_EVENT, onResult); + }; + }, [socket, onResult]); +}; + +export default usePromptStudioSocket; diff --git a/frontend/src/setupProxy.js b/frontend/src/setupProxy.js index f36f375919..cb7a97dbb6 100644 --- a/frontend/src/setupProxy.js +++ b/frontend/src/setupProxy.js @@ -11,6 +11,7 @@ module.exports = (app) => { createProxyMiddleware({ target: process.env.REACT_APP_BACKEND_URL, changeOrigin: true, + ws: true, }) ); } diff --git a/unstract/core/src/unstract/core/pubsub_helper.py b/unstract/core/src/unstract/core/pubsub_helper.py index 6f96d9f7c6..d45b1dfd30 100644 --- a/unstract/core/src/unstract/core/pubsub_helper.py +++ b/unstract/core/src/unstract/core/pubsub_helper.py @@ -16,16 +16,16 @@ class LogPublisher: broker_url = str( httpx.URL(os.getenv("CELERY_BROKER_BASE_URL", "amqp://")).copy_with( - username=os.getenv("CELERY_BROKER_USER"), - password=os.getenv("CELERY_BROKER_PASS"), + username=os.getenv("CELERY_BROKER_USER") or None, + password=os.getenv("CELERY_BROKER_PASS") or None, ) ) kombu_conn = Connection(broker_url) r = redis.Redis( host=os.environ.get("REDIS_HOST"), port=os.environ.get("REDIS_PORT", 6379), - username=os.environ.get("REDIS_USER"), - password=os.environ.get("REDIS_PASSWORD"), + username=os.environ.get("REDIS_USER") or None, + password=os.environ.get("REDIS_PASSWORD") or None, ) @staticmethod @@ -91,6 +91,29 @@ def log_workflow_update( "message": message, } + @staticmethod + def log_progress( + component: dict[str, str], + level: str, + state: str, + message: str, + ) -> dict[str, str]: + """Build a progress log message for streaming to the frontend. + + Same structure as ``log_prompt()`` but uses ``type: "PROGRESS"`` + so the frontend can distinguish executor progress from regular + log messages. + """ + return { + "timestamp": datetime.now(UTC).timestamp(), + "type": "PROGRESS", + "service": "prompt", + "component": component, + "level": level, + "state": state, + "message": message, + } + @staticmethod def log_prompt( component: dict[str, str], diff --git a/unstract/sdk1/src/unstract/sdk1/execution/context.py b/unstract/sdk1/src/unstract/sdk1/execution/context.py index ce5810066d..f149c4d4e7 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/context.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/context.py @@ -52,6 +52,9 @@ class ExecutionContext: executor_params: Opaque, operation-specific payload passed through to the executor. Must be JSON-serializable. request_id: Correlation ID for tracing across services. + log_events_id: Socket.IO channel ID for streaming progress + logs to the frontend. ``None`` when not in an IDE + session (no logs published). """ executor_name: str @@ -61,6 +64,7 @@ class ExecutionContext: organization_id: str | None = None executor_params: dict[str, Any] = field(default_factory=dict) request_id: str | None = None + log_events_id: str | None = None def __post_init__(self) -> None: """Validate required fields after initialization.""" @@ -93,6 +97,7 @@ def to_dict(self) -> dict[str, Any]: "organization_id": self.organization_id, "executor_params": self.executor_params, "request_id": self.request_id, + "log_events_id": self.log_events_id, } @classmethod @@ -106,4 +111,5 @@ def from_dict(cls, data: dict[str, Any]) -> "ExecutionContext": organization_id=data.get("organization_id"), executor_params=data.get("executor_params", {}), request_id=data.get("request_id"), + log_events_id=data.get("log_events_id"), ) diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index d3214ad4c2..c8e2674ad1 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -109,12 +109,16 @@ def dispatch( args=[context.to_dict()], queue=_QUEUE_NAME, ) + logger.info( + "Task sent: celery_task_id=%s, waiting for result...", + async_result.id, + ) try: # disable_sync_subtasks=False: safe because the executor task - # runs on a *different* broker (RabbitMQ) and worker pool - # (worker-v2) — no deadlock risk even when dispatch() is - # called from inside a Django Celery task (Redis broker). + # runs on a separate worker pool (worker-v2) — no deadlock + # risk even when dispatch() is called from inside a Django + # Celery task. result_dict = async_result.get( timeout=timeout, disable_sync_subtasks=False, diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 8200ec1530..7749fa3b12 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -665,7 +665,7 @@ def test_dispatch_sends_task_and_returns_result( queue="executor", ) mock_app.send_task.return_value.get.assert_called_once_with( - timeout=60 + timeout=60, disable_sync_subtasks=False ) def test_dispatch_uses_default_timeout(self: Self) -> None: @@ -677,7 +677,7 @@ def test_dispatch_uses_default_timeout(self: Self) -> None: dispatcher.dispatch(ctx) mock_app.send_task.return_value.get.assert_called_once_with( - timeout=3600 + timeout=3600, disable_sync_subtasks=False ) def test_dispatch_timeout_from_env( @@ -692,7 +692,7 @@ def test_dispatch_timeout_from_env( dispatcher.dispatch(ctx) mock_app.send_task.return_value.get.assert_called_once_with( - timeout=120 + timeout=120, disable_sync_subtasks=False ) def test_dispatch_explicit_timeout_overrides_env( @@ -707,7 +707,7 @@ def test_dispatch_explicit_timeout_overrides_env( dispatcher.dispatch(ctx, timeout=30) mock_app.send_task.return_value.get.assert_called_once_with( - timeout=30 + timeout=30, disable_sync_subtasks=False ) def test_dispatch_timeout_returns_failure( diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py index 8baee47194..22f44c1ffc 100644 --- a/workers/executor/executor_tool_shim.py +++ b/workers/executor/executor_tool_shim.py @@ -13,12 +13,22 @@ import os from typing import Any +from unstract.core.pubsub_helper import LogPublisher from unstract.sdk1.constants import LogLevel, ToolEnv from unstract.sdk1.exceptions import SdkError from unstract.sdk1.tool.stream import StreamMixin logger = logging.getLogger(__name__) +# Map SDK log levels to the string levels used by LogPublisher. +_SDK_TO_WF_LEVEL: dict[LogLevel, str] = { + LogLevel.DEBUG: "INFO", # DEBUG not surfaced to frontend + LogLevel.INFO: "INFO", + LogLevel.WARN: "WARN", + LogLevel.ERROR: "ERROR", + LogLevel.FATAL: "ERROR", +} + # Mapping from SDK LogLevel enum to Python logging levels. _LEVEL_MAP = { LogLevel.DEBUG: logging.DEBUG, @@ -45,15 +55,27 @@ class ExecutorToolShim(StreamMixin): adapter = SomeAdapter(tool=shim) # adapter calls shim.get_env_or_die() """ - def __init__(self, platform_api_key: str = "") -> None: + def __init__( + self, + platform_api_key: str = "", + log_events_id: str = "", + component: dict[str, str] | None = None, + ) -> None: """Initialize the shim. Args: platform_api_key: The platform service API key for this execution. Returned by ``get_env_or_die()`` when the caller asks for ``PLATFORM_SERVICE_API_KEY``. + log_events_id: Socket.IO channel ID for streaming progress + logs. Empty string disables publishing. + component: Structured identifier dict for log correlation + (``tool_id``, ``run_id``, ``doc_name``, optionally + ``prompt_key``). """ self.platform_api_key = platform_api_key + self.log_events_id = log_events_id + self.component = component or {} # Initialize StreamMixin. EXECUTION_BY_TOOL is not set in # the worker environment, so _exec_by_tool will be False. super().__init__(log_level=LogLevel.INFO) @@ -95,12 +117,15 @@ def stream_log( stage: str = "TOOL_RUN", **kwargs: dict[str, Any], ) -> None: - """Route log messages to Python logging. + """Route log messages to Python logging and publish progress. In the executor worker context, logs go through the standard Python logging framework (captured by Celery) rather than the Unstract stdout JSON protocol used by tools. + Progress messages are published via ``LogPublisher.publish()`` + to the Redis broker (shared with worker-logging). + Args: log: The log message. level: SDK log level. @@ -110,6 +135,26 @@ def stream_log( py_level = _LEVEL_MAP.get(level, logging.INFO) logger.log(py_level, log) + # Publish progress to frontend via the log consumer queue. + if self.log_events_id: + try: + wf_level = _SDK_TO_WF_LEVEL.get(level, "INFO") + payload = LogPublisher.log_progress( + component=self.component, + level=wf_level, + state=stage, + message=log, + ) + LogPublisher.publish( + channel_id=self.log_events_id, + payload=payload, + ) + except Exception: + logger.debug( + "Failed to publish progress log (non-fatal)", + exc_info=True, + ) + def stream_error_and_exit( self, message: str, err: Exception | None = None ) -> None: diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 64f2d7c137..1aefa2444c 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -8,6 +8,7 @@ """ import logging +import time from pathlib import Path from typing import Any @@ -54,6 +55,10 @@ class LegacyExecutor(BaseExecutor): Operation.AGENTIC_EXTRACTION.value: "_handle_agentic_extraction", } + # Defaults for log streaming (overridden by execute()). + _log_events_id: str = "" + _log_component: dict[str, str] = {} + @property def name(self) -> str: return "legacy" @@ -69,6 +74,12 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: Raises: NotImplementedError: From stub handlers (until 2D–2H). """ + # Extract log streaming info (set by tasks.py for IDE sessions). + self._log_events_id: str = context.log_events_id or "" + self._log_component: dict[str, str] = getattr( + context, "_log_component", {} + ) + handler_name = self._OPERATION_MAP.get(context.operation) if handler_name is None: return ExecutionResult.failure( @@ -81,18 +92,32 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: handler = getattr(self, handler_name) logger.info( "LegacyExecutor routing operation=%s to %s " - "(run_id=%s request_id=%s)", + "(run_id=%s request_id=%s execution_source=%s)", context.operation, handler_name, context.run_id, context.request_id, + context.execution_source, ) + start = time.monotonic() try: - return handler(context) + result = handler(context) + elapsed = time.monotonic() - start + logger.info( + "Handler %s completed in %.2fs " + "(run_id=%s success=%s)", + handler_name, + elapsed, + context.run_id, + result.success, + ) + return result except LegacyExecutorError as exc: + elapsed = time.monotonic() - start logger.warning( - "Handler %s raised %s: %s", + "Handler %s failed after %.2fs: %s: %s", handler_name, + elapsed, type(exc).__name__, exc.message, ) @@ -140,7 +165,11 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: execution_data_dir: str | None = params.get(IKeys.EXECUTION_DATA_DIR) # Build adapter shim and X2Text - shim = ExecutorToolShim(platform_api_key=platform_api_key) + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) x2text = X2Text( tool=shim, adapter_instance_id=x2text_instance_id, @@ -148,7 +177,17 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: ) fs = FileUtils.get_fs_instance(execution_source=execution_source) + logger.info( + "Starting text extraction: x2text_adapter=%s file=%s " + "run_id=%s", + x2text_instance_id, + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Initializing text extractor...") + try: + shim.stream_log("Extracting text from document...") if enable_highlight and isinstance( x2text.x2text_instance, (LLMWhisperer, LLMWhispererV2) ): @@ -174,12 +213,24 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: fs=fs, ) + logger.info( + "Text extraction completed: file=%s run_id=%s", + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Text extraction completed") return ExecutionResult( success=True, data={IKeys.EXTRACTED_TEXT: process_response.extracted_text}, ) except AdapterError as e: name = x2text.x2text_instance.get_name() + logger.error( + "Text extraction failed: adapter=%s file=%s error=%s", + name, + Path(file_path).name, + str(e), + ) msg = f"Error from text extractor '{name}'. {e}" raise ExtractionError(message=msg) from e @@ -287,11 +338,26 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: usage_kwargs=usage_kwargs, ) - shim = ExecutorToolShim(platform_api_key=platform_api_key) + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) fs_instance = FileUtils.get_fs_instance( execution_source=execution_source ) + logger.info( + "Starting indexing: chunk_size=%d chunk_overlap=%d " + "reindex=%s file=%s run_id=%s", + chunk_size, + chunk_overlap, + reindex, + Path(file_path).name, + context.run_id, + ) + shim.stream_log("Initializing indexing pipeline...") + # Skip indexing when chunk_size is 0 — no vector operations needed. # ChunkingConfig raises ValueError for 0, so handle before DTO. if chunk_size == 0: @@ -332,6 +398,8 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: doc_id = index.generate_index_key( file_info=file_info, fs=fs_instance ) + logger.debug("Generated index key: doc_id=%s", doc_id) + shim.stream_log("Checking document index status...") embedding = EmbeddingCompat( adapter_instance_id=embedding_instance_id, @@ -347,16 +415,34 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: doc_id_found = index.is_document_indexed( doc_id=doc_id, embedding=embedding, vector_db=vector_db ) + logger.info( + "Index status: doc_id=%s found=%s reindex=%s", + doc_id, + doc_id_found, + reindex, + ) + shim.stream_log("Indexing document into vector store...") index.perform_indexing( vector_db=vector_db, doc_id=doc_id, extracted_text=extracted_text, doc_id_found=doc_id_found, ) + logger.info( + "Indexing completed: doc_id=%s file=%s", + doc_id, + Path(file_path).name, + ) + shim.stream_log("Document indexing completed") return ExecutionResult( success=True, data={IKeys.DOC_ID: doc_id} ) except Exception as e: + logger.error( + "Indexing failed: file=%s error=%s", + Path(file_path).name, + str(e), + ) status_code = getattr(e, "status_code", 500) raise LegacyExecutorError( message=f"Error while indexing: {e}", code=status_code @@ -465,6 +551,15 @@ def _handle_answer_prompt( variable_names: list[str] = [] context_retrieval_metrics: dict[str, Any] = {} + logger.info( + "Starting answer_prompt: tool_id=%s prompt_count=%d " + "file=%s run_id=%s", + tool_id, + len(prompts), + doc_name, + run_id, + ) + # Lazy imports ( AnswerPromptService, @@ -489,9 +584,24 @@ def _handle_answer_prompt( prompt_text = output[PSKeys.PROMPT] chunk_size = output[PSKeys.CHUNK_SIZE] - logger.info("[%s] chunk size: %s", tool_id, chunk_size) + logger.debug( + "Prompt config: name=%s chunk_size=%d type=%s", + prompt_name, + chunk_size, + output.get(PSKeys.TYPE, "TEXT"), + ) - shim = ExecutorToolShim(platform_api_key=platform_api_key) + # Enrich component with current prompt_key for log correlation. + prompt_component = { + **self._log_component, + "prompt_key": prompt_name, + } + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=prompt_component, + ) + shim.stream_log(f"Processing prompt: {prompt_name}") # {{variable}} template replacement if VariableReplacementService.is_variables_present( @@ -511,7 +621,12 @@ def _handle_answer_prompt( ) ) - logger.info("[%s] Executing prompt: '%s'", tool_id, prompt_name) + logger.info( + "Executing prompt: tool_id=%s name=%s run_id=%s", + tool_id, + prompt_name, + run_id, + ) # %variable% replacement output[PSKeys.PROMPTX] = AnswerPromptService.extract_variable( @@ -592,10 +707,15 @@ def _handle_answer_prompt( valid_strategies = {s.value for s in RetrievalStrategy} if retrieval_strategy in valid_strategies: + shim.stream_log( + f"Retrieving context for: {prompt_name}" + ) logger.info( - "[%s] Performing retrieval for: %s", - tool_id, - file_path, + "Performing retrieval: prompt=%s strategy=%s " + "chunk_size=%d", + prompt_name, + retrieval_strategy, + chunk_size, ) if chunk_size == 0: context_list = ( @@ -616,8 +736,16 @@ def _handle_answer_prompt( context_retrieval_metrics=context_retrieval_metrics, ) metadata[PSKeys.CONTEXT][prompt_name] = context_list + logger.debug( + "Retrieved %d context chunks for prompt: %s", + len(context_list), + prompt_name, + ) # Run prompt with retrieved context + shim.stream_log( + f"Running LLM completion for: {prompt_name}" + ) answer = AnswerPromptService.construct_and_run_prompt( tool_settings=tool_settings, output=output, @@ -629,8 +757,11 @@ def _handle_answer_prompt( file_path=file_path, ) else: - logger.info( - "Invalid retrieval strategy: %s", retrieval_strategy + logger.warning( + "Skipping retrieval: invalid strategy=%s " + "for prompt=%s", + retrieval_strategy, + prompt_name, ) # ---- Type-specific post-processing ------------------------- @@ -648,6 +779,8 @@ def _handle_answer_prompt( doc_name=doc_name, ) + shim.stream_log(f"Completed prompt: {prompt_name}") + # Strip trailing newline val = structured_output.get(prompt_name) if isinstance(val, str): @@ -666,6 +799,13 @@ def _handle_answer_prompt( if vector_db: vector_db.close() + logger.info( + "All prompts processed: tool_id=%s prompt_count=%d file=%s", + tool_id, + len(prompts), + doc_name, + ) + # ---- Sanitize null values ------------------------------------------ structured_output = self._sanitize_null_values(structured_output) @@ -865,6 +1005,12 @@ def _handle_summarize( error="Missing required param: context" ) + logger.info( + "Starting summarization: prompt_keys=%s run_id=%s", + prompt_keys, + context.run_id, + ) + # Build the summarize prompt prompt = f"{summarize_prompt}\n\n" if prompt_keys: @@ -876,11 +1022,16 @@ def _handle_summarize( f"-----------------\n\nSummary:" ) - shim = ExecutorToolShim(platform_api_key=platform_api_key) + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) usage_kwargs = {"run_id": context.run_id} _, _, _, _, LLM, _, _ = self._get_prompt_deps() + shim.stream_log("Initializing LLM for summarization...") try: llm = LLM( adapter_instance_id=llm_adapter_id, @@ -889,14 +1040,22 @@ def _handle_summarize( ) from executor.executors.answer_prompt import AnswerPromptService + shim.stream_log("Running document summarization...") summary = AnswerPromptService.run_completion( llm=llm, prompt=prompt ) + logger.info( + "Summarization completed: run_id=%s", context.run_id + ) + shim.stream_log("Summarization completed") return ExecutionResult( success=True, data={"data": summary}, ) except Exception as e: + logger.error( + "Summarization failed: error=%s", str(e) + ) status_code = getattr(e, "status_code", None) or 500 raise LegacyExecutorError( message=f"Error during summarization: {e}", diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 75bf52b3fb..e4db70aaee 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -46,11 +46,14 @@ def execute_extraction( request_id = execution_context_dict.get("request_id", "") logger.info( "Received execute_extraction task: " - "celery_task_id=%s request_id=%s executor=%s operation=%s", + "celery_task_id=%s request_id=%s executor=%s " + "operation=%s execution_source=%s run_id=%s", self.request.id, request_id, execution_context_dict.get("executor_name"), execution_context_dict.get("operation"), + execution_context_dict.get("execution_source"), + execution_context_dict.get("run_id"), ) try: @@ -63,6 +66,19 @@ def execute_extraction( error=f"Invalid execution context: {exc}" ).to_dict() + # Build component dict for log correlation when streaming to + # the frontend. Attached as a transient attribute (not serialized). + if context.log_events_id: + params = context.executor_params + context._log_component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + else: + context._log_component = {} + orchestrator = ExecutionOrchestrator() result = orchestrator.execute(context) diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py index 97a6bb44ae..04934aea1a 100644 --- a/workers/tests/test_answer_prompt.py +++ b/workers/tests/test_answer_prompt.py @@ -144,6 +144,27 @@ def _mock_deps(llm=None): ) +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + + +@pytest.fixture(autouse=True) +def _mock_indexing_utils(): + """Mock IndexingUtils.generate_index_key for all answer_prompt tests. + + _handle_answer_prompt calls IndexingUtils.generate_index_key(tool=shim) + which delegates to PlatformHelper.get_adapter_config() — a real HTTP + call. Since tests use a mock shim, the platform URL is invalid. + """ + with patch(_PATCH_INDEX_UTILS, return_value="doc-id-test"): + yield + + # --------------------------------------------------------------------------- # Tests — _handle_answer_prompt # --------------------------------------------------------------------------- diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py new file mode 100644 index 0000000000..37d1e7d327 --- /dev/null +++ b/workers/tests/test_phase1_log_streaming.py @@ -0,0 +1,483 @@ +"""Phase 1 — Executor log streaming to frontend via Socket.IO. + +Tests cover: +- ExecutionContext round-trips log_events_id through to_dict/from_dict +- LogPublisher.log_progress() returns type: "PROGRESS" (not "LOG") +- LogPublisher.log_prompt() still returns type: "LOG" (unchanged) +- ExecutorToolShim with log_events_id: stream_log() publishes progress +- ExecutorToolShim without log_events_id: no publishing, no exceptions +- ExecutorToolShim with failing LogPublisher: no exception raised +- execute_extraction builds component dict when log_events_id present +- execute_extraction skips component dict when log_events_id absent +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.constants import LogLevel +from unstract.sdk1.execution.context import ExecutionContext + + +# --------------------------------------------------------------------------- +# 1A — ExecutionContext.log_events_id round-trip +# --------------------------------------------------------------------------- + + +class TestExecutionContextLogEventsId: + """Verify log_events_id serialization in ExecutionContext.""" + + def test_log_events_id_default_is_none(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + ) + assert ctx.log_events_id is None + + def test_log_events_id_round_trips(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + ) + d = ctx.to_dict() + assert d["log_events_id"] == "session-abc" + + restored = ExecutionContext.from_dict(d) + assert restored.log_events_id == "session-abc" + + def test_log_events_id_none_round_trips(self): + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + ) + d = ctx.to_dict() + assert d["log_events_id"] is None + + restored = ExecutionContext.from_dict(d) + assert restored.log_events_id is None + + def test_backward_compat_missing_key(self): + """from_dict with old payload lacking log_events_id.""" + old_payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + } + ctx = ExecutionContext.from_dict(old_payload) + assert ctx.log_events_id is None + + +# --------------------------------------------------------------------------- +# 1B-i — LogPublisher.log_progress() vs log_prompt() +# --------------------------------------------------------------------------- + + +class TestLogPublisherLogProgress: + """Verify log_progress returns type PROGRESS, log_prompt returns LOG.""" + + def test_log_progress_type(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_progress( + component={"tool_id": "t1"}, + level="INFO", + state="TOOL_RUN", + message="Extracting text...", + ) + assert result["type"] == "PROGRESS" + assert result["service"] == "prompt" + assert result["message"] == "Extracting text..." + assert result["component"] == {"tool_id": "t1"} + assert "timestamp" in result + + def test_log_prompt_type_unchanged(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_prompt( + component={"tool_id": "t1"}, + level="INFO", + state="RUNNING", + message="test", + ) + assert result["type"] == "LOG" + assert result["service"] == "prompt" + + def test_log_progress_has_all_fields(self): + from unstract.core.pubsub_helper import LogPublisher + + result = LogPublisher.log_progress( + component={"tool_id": "t1", "prompt_key": "pk"}, + level="ERROR", + state="FAILED", + message="boom", + ) + assert result["level"] == "ERROR" + assert result["state"] == "FAILED" + assert result["component"]["prompt_key"] == "pk" + + +# --------------------------------------------------------------------------- +# 1B-ii — ExecutorToolShim progress publishing +# --------------------------------------------------------------------------- + + +class TestExecutorToolShimProgress: + """Verify ExecutorToolShim publishes progress via LogPublisher.""" + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_publishes_when_log_events_id_set(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + component = {"tool_id": "t1", "run_id": "r1"} + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="session-xyz", + component=component, + ) + shim.stream_log("Extracting...", level=LogLevel.INFO) + + mock_lp.log_progress.assert_called_once_with( + component=component, + level="INFO", + state="TOOL_RUN", + message="Extracting...", + ) + mock_lp.publish.assert_called_once_with( + channel_id="session-xyz", + payload=mock_lp.log_progress.return_value, + ) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_no_publish_without_log_events_id(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim(platform_api_key="sk-test") + shim.stream_log("Hello", level=LogLevel.INFO) + + mock_lp.log_progress.assert_not_called() + mock_lp.publish.assert_not_called() + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_empty_log_events_id_no_publish(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", log_events_id="" + ) + shim.stream_log("Hello", level=LogLevel.INFO) + + mock_lp.log_progress.assert_not_called() + + @patch("executor.executor_tool_shim.LogPublisher") + def test_stream_log_swallows_publish_error(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + mock_lp.publish.side_effect = ConnectionError("AMQP down") + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="session-xyz", + component={"tool_id": "t1"}, + ) + # Should NOT raise + shim.stream_log("test", level=LogLevel.INFO) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_level_mapping(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="s1", + component={}, + ) + + cases = [ + (LogLevel.DEBUG, "INFO"), + (LogLevel.INFO, "INFO"), + (LogLevel.WARN, "WARN"), + (LogLevel.ERROR, "ERROR"), + (LogLevel.FATAL, "ERROR"), + ] + for sdk_level, expected_wf_level in cases: + mock_lp.reset_mock() + shim.stream_log("msg", level=sdk_level) + call_kwargs = mock_lp.log_progress.call_args + assert call_kwargs.kwargs["level"] == expected_wf_level, ( + f"SDK {sdk_level} should map to {expected_wf_level}" + ) + + @patch("executor.executor_tool_shim.LogPublisher") + def test_custom_stage_passed_through(self, mock_lp): + from executor.executor_tool_shim import ExecutorToolShim + + shim = ExecutorToolShim( + platform_api_key="sk-test", + log_events_id="s1", + component={}, + ) + shim.stream_log("msg", level=LogLevel.INFO, stage="INDEXING") + call_kwargs = mock_lp.log_progress.call_args + assert call_kwargs.kwargs["state"] == "INDEXING" + + +# --------------------------------------------------------------------------- +# 1C — Component dict building in execute_extraction +# --------------------------------------------------------------------------- + + +class TestExecuteExtractionComponentDict: + """Verify component dict is built from executor_params.""" + + @patch("executor.tasks.ExecutionOrchestrator") + def test_component_dict_built_when_log_events_id_present( + self, mock_orch_cls + ): + mock_orch = MagicMock() + mock_orch.execute.return_value = MagicMock( + success=True, to_dict=lambda: {"success": True} + ) + mock_orch_cls.return_value = mock_orch + + from executor.tasks import execute_extraction + + payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + "log_events_id": "session-abc", + "executor_params": { + "tool_id": "tool-123", + "file_name": "invoice.pdf", + }, + } + execute_extraction(payload) + + # Verify the context passed to orchestrator has _log_component + ctx = mock_orch.execute.call_args[0][0] + assert ctx._log_component == { + "tool_id": "tool-123", + "run_id": "r1", + "doc_name": "invoice.pdf", + "operation": "extract", + } + + @patch("executor.tasks.ExecutionOrchestrator") + def test_component_dict_empty_when_no_log_events_id( + self, mock_orch_cls + ): + mock_orch = MagicMock() + mock_orch.execute.return_value = MagicMock( + success=True, to_dict=lambda: {"success": True} + ) + mock_orch_cls.return_value = mock_orch + + from executor.tasks import execute_extraction + + payload = { + "executor_name": "legacy", + "operation": "extract", + "run_id": "r1", + "execution_source": "ide", + "executor_params": {}, + } + execute_extraction(payload) + + ctx = mock_orch.execute.call_args[0][0] + assert ctx._log_component == {} + + +# --------------------------------------------------------------------------- +# 1D — LegacyExecutor passes log info to shim +# --------------------------------------------------------------------------- + + +class TestLegacyExecutorLogPassthrough: + """Verify LegacyExecutor passes log_events_id and component to shim.""" + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_extract_passes_log_info_to_shim( + self, mock_shim_cls, mock_x2text, mock_fs + ): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + mock_x2t = MagicMock() + mock_x2t.process.return_value = MagicMock( + extracted_text="hello" + ) + mock_x2text.return_value = mock_x2t + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + executor_params={ + "x2text_instance_id": "x2t-1", + "file_path": "/tmp/test.pdf", + "platform_api_key": "sk-test", + }, + ) + ctx._log_component = {"tool_id": "t1", "run_id": "r1", "doc_name": "test.pdf"} + + executor = LegacyExecutor() + result = executor.execute(ctx) + + assert result.success + mock_shim_cls.assert_called_once_with( + platform_api_key="sk-test", + log_events_id="session-abc", + component={"tool_id": "t1", "run_id": "r1", "doc_name": "test.pdf"}, + ) + + @patch("executor.executors.legacy_executor.FileUtils.get_fs_instance") + @patch("executor.executors.legacy_executor.X2Text") + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_extract_no_log_info_when_absent( + self, mock_shim_cls, mock_x2text, mock_fs + ): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + mock_x2t = MagicMock() + mock_x2t.process.return_value = MagicMock( + extracted_text="hello" + ) + mock_x2text.return_value = mock_x2t + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="tool", + executor_params={ + "x2text_instance_id": "x2t-1", + "file_path": "/tmp/test.pdf", + "platform_api_key": "sk-test", + }, + ) + + executor = LegacyExecutor() + result = executor.execute(ctx) + + assert result.success + mock_shim_cls.assert_called_once_with( + platform_api_key="sk-test", + log_events_id="", + component={}, + ) + + @patch( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" + ) + @patch("executor.executors.legacy_executor.ExecutorToolShim") + def test_answer_prompt_enriches_component_with_prompt_key( + self, mock_shim_cls, mock_prompt_deps + ): + """Verify per-prompt shim includes prompt_key in component.""" + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + # Mock prompt deps + MockAnswerPromptService = MagicMock() + MockAnswerPromptService.extract_variable.return_value = "prompt text" + MockRetrievalService = MagicMock() + MockVariableReplacementService = MagicMock() + MockVariableReplacementService.is_variables_present.return_value = ( + False + ) + MockIndex = MagicMock() + MockLLM = MagicMock() + MockEmbeddingCompat = MagicMock() + MockVectorDB = MagicMock() + + mock_prompt_deps.return_value = ( + MockAnswerPromptService, + MockRetrievalService, + MockVariableReplacementService, + MockIndex, + MockLLM, + MockEmbeddingCompat, + MockVectorDB, + ) + + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="r1", + execution_source="ide", + log_events_id="session-abc", + executor_params={ + "tool_id": "t1", + "outputs": [ + { + "name": "invoice_number", + "prompt": "What is the invoice number?", + "chunk-size": 0, + "type": "text", + "retrieval-strategy": "simple", + "vector-db": "vdb1", + "embedding": "emb1", + "x2text_adapter": "x2t1", + "chunk-overlap": 0, + "llm": "llm1", + }, + ], + "tool_settings": {}, + "PLATFORM_SERVICE_API_KEY": "sk-test", + }, + ) + ctx._log_component = { + "tool_id": "t1", + "run_id": "r1", + "doc_name": "test.pdf", + } + + # Mock IndexingUtils + with patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1", + ): + executor = LegacyExecutor() + # The handler will try retrieval which we need to mock + MockRetrievalService.retrieve_complete_context.return_value = [ + "context" + ] + MockAnswerPromptService.construct_and_run_prompt.return_value = ( + "INV-001" + ) + + executor.execute(ctx) + + # Check that shim was created with prompt_key in component + shim_call = mock_shim_cls.call_args + assert shim_call.kwargs["component"]["prompt_key"] == "invoice_number" + assert shim_call.kwargs["log_events_id"] == "session-abc" diff --git a/workers/tests/test_sanity_phase2.py b/workers/tests/test_sanity_phase2.py index 6961f5cc0e..c104382dd2 100644 --- a/workers/tests/test_sanity_phase2.py +++ b/workers/tests/test_sanity_phase2.py @@ -423,10 +423,11 @@ def test_index_error_full_chain(self, mock_deps, mock_get_fs, eager_app): class TestSanityAnswerPrompt: """Full-chain answer_prompt tests through Celery eager mode.""" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_answer_prompt_text_full_chain( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """TEXT prompt → result.data has output, metadata, metrics.""" llm = _mock_llm("sanity answer") @@ -443,10 +444,11 @@ def test_answer_prompt_text_full_chain( assert PSKeys.METRICS in result.data assert result.data[PSKeys.OUTPUT]["field_a"] == "sanity answer" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_answer_prompt_multi_prompt_full_chain( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Two prompts → both field names in output and metrics.""" llm = _mock_llm("multi answer") @@ -467,10 +469,11 @@ def test_answer_prompt_multi_prompt_full_chain( assert "revenue" in result.data[PSKeys.METRICS] assert "date_signed" in result.data[PSKeys.METRICS] + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_answer_prompt_table_fails_full_chain( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """TABLE type → failure mentioning TABLE.""" llm = _mock_llm() @@ -490,10 +493,11 @@ def test_answer_prompt_table_fails_full_chain( class TestSanitySinglePass: """Full-chain single_pass_extraction test.""" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_single_pass_delegates_full_chain( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Same mocks as answer_prompt → same response shape.""" llm = _mock_llm("single pass answer") @@ -643,10 +647,11 @@ def test_index_contract(self, mock_deps, mock_get_fs, eager_app): assert result.success is True assert isinstance(result.data[IKeys.DOC_ID], str) + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sanity") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_answer_prompt_contract( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): llm = _mock_llm("contract answer") mock_deps.return_value = _mock_prompt_deps(llm) diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py index 0c8615884f..bce773dec0 100644 --- a/workers/tests/test_sanity_phase4.py +++ b/workers/tests/test_sanity_phase4.py @@ -46,6 +46,9 @@ _PATCH_RUN_COMPLETION = ( "executor.executors.answer_prompt.AnswerPromptService.run_completion" ) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) # --------------------------------------------------------------------------- # Fixtures @@ -496,9 +499,10 @@ def test_ide_index_failure(self, mock_deps, mock_get_fs, eager_app): class TestIDEAnswerPrompt: """IDE answer_prompt payload → executor → {output, metadata, metrics}.""" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) - def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, eager_app): + def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, _mock_idx, eager_app): """IDE text prompt → output dict with prompt_key → answer.""" llm = _mock_llm("INV-2024-001") mock_deps.return_value = _mock_prompt_deps(llm) @@ -515,10 +519,11 @@ def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, eager_app): assert "metrics" in result.data assert result.data["output"]["invoice_number"] == "INV-2024-001" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_metadata_has_run_id( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """IDE response metadata contains run_id and file_name.""" llm = _mock_llm("answer") @@ -533,10 +538,11 @@ def test_ide_answer_prompt_metadata_has_run_id( assert metadata["run_id"] == "run-ide-ap" assert metadata["file_name"] == "invoice.pdf" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_with_eval_settings( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Prompt with eval_settings passes through to executor cleanly.""" llm = _mock_llm("answer") @@ -556,10 +562,11 @@ def test_ide_answer_prompt_with_eval_settings( assert result.success is True + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_platform_key_reaches_shim( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """PLATFORM_SERVICE_API_KEY in payload reaches ExecutorToolShim.""" llm = _mock_llm("answer") @@ -574,10 +581,11 @@ def test_ide_answer_prompt_platform_key_reaches_shim( call_kwargs = mock_shim_cls.call_args assert call_kwargs.kwargs.get("platform_api_key") == "pk-ide-test" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_webhook_settings( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Prompt with webhook settings passes through cleanly.""" llm = _mock_llm("answer") @@ -598,10 +606,11 @@ def test_ide_answer_prompt_webhook_settings( class TestIDESinglePass: """IDE single_pass_extraction → executor → same shape as answer_prompt.""" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_single_pass_multi_prompt( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Single pass with multiple prompts → all fields in output.""" llm = _mock_llm("single pass value") @@ -617,10 +626,11 @@ def test_ide_single_pass_multi_prompt( assert "revenue" in result.data["output"] assert "date" in result.data["output"] + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_single_pass_has_metadata( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """Single pass returns metadata with run_id.""" llm = _mock_llm("value") @@ -680,10 +690,11 @@ def test_dispatcher_extract_round_trip( assert result.success is True assert result.data["extracted_text"] == "dispatcher extracted" + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_dispatcher_answer_prompt_round_trip( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """ExecutionDispatcher.dispatch() → answer_prompt → ExecutionResult.""" llm = _mock_llm("dispatcher answer") @@ -702,10 +713,11 @@ def test_dispatcher_answer_prompt_round_trip( assert result.data["output"]["invoice_number"] == "dispatcher answer" assert "metadata" in result.data + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_dispatcher_single_pass_round_trip( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """ExecutionDispatcher.dispatch() → single_pass → ExecutionResult.""" llm = _mock_llm("sp dispatch") @@ -779,10 +791,11 @@ def test_ide_source_reaches_extract_handler( # This is verified by the fact that no dump_json was called # on the fs mock. In IDE mode, whisper_hash metadata is skipped. + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_source_in_answer_prompt_enables_variable_replacement( - self, mock_shim_cls, mock_deps, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, eager_app ): """execution_source='ide' in payload sets is_ide=True for variable replacement.""" llm = _mock_llm("var answer") diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py index 720f5388cc..7aaa553e1b 100644 --- a/workers/tests/test_usage.py +++ b/workers/tests/test_usage.py @@ -215,12 +215,16 @@ def test_llm_calls_audit_push(self): class TestMetricsInResult: + @patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-test", + ) @patch( "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" ) @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_answer_prompt_returns_metrics( - self, mock_shim_cls, mock_get_deps + self, mock_shim_cls, mock_get_deps, _mock_idx ): """answer_prompt result includes metrics dict.""" from unstract.sdk1.execution.context import ExecutionContext From 95c65924f557a3cf24c21d9d0bae1188e81677ce Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 25 Feb 2026 18:56:16 +0530 Subject: [PATCH 04/64] Removing multi hop in Prompt studio ide and structure tool --- backend/backend/celery_config.py | 6 + .../prompt_studio_helper.py | 582 +++++++++++ .../prompt_studio_core_v2/tasks.py | 260 +++++ .../prompt_studio_core_v2/views.py | 220 +++-- .../src/unstract/sdk1/execution/context.py | 2 + .../src/unstract/sdk1/execution/dispatcher.py | 93 +- unstract/sdk1/tests/test_execution.py | 147 +++ workers/executor/executors/legacy_executor.py | 456 +++++++++ workers/executor/tasks.py | 34 +- .../file_processing/structure_tool_task.py | 423 ++------ workers/tests/test_phase5d.py | 902 ++++++++++++++++++ workers/tests/test_sanity_phase3.py | 413 +++----- workers/tests/test_sanity_phase5.py | 853 +++++++++++++++++ 13 files changed, 3732 insertions(+), 659 deletions(-) create mode 100644 workers/tests/test_phase5d.py create mode 100644 workers/tests/test_sanity_phase5.py diff --git a/backend/backend/celery_config.py b/backend/backend/celery_config.py index 9ddd8a342a..9ffe71464f 100644 --- a/backend/backend/celery_config.py +++ b/backend/backend/celery_config.py @@ -38,4 +38,10 @@ class CeleryConfig: "prompt_studio_index_document": {"queue": "celery_prompt_studio"}, "prompt_studio_fetch_response": {"queue": "celery_prompt_studio"}, "prompt_studio_single_pass": {"queue": "celery_prompt_studio"}, + # Phase 5B: Fire-and-forget callback tasks (sub-second, run on + # same queue as the old blocking tasks they replace). + "ide_index_complete": {"queue": "celery_prompt_studio"}, + "ide_index_error": {"queue": "celery_prompt_studio"}, + "ide_prompt_complete": {"queue": "celery_prompt_studio"}, + "ide_prompt_error": {"queue": "celery_prompt_studio"}, } diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index d797f5e35d..fd09a4b99c 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -292,6 +292,588 @@ def _get_platform_api_key(org_id: str) -> str: platform_key = PlatformAuthenticationService.get_active_platform_key(org_id) return str(platform_key.key) + # ------------------------------------------------------------------ + # Phase 5B — Payload builders for fire-and-forget dispatch + # ------------------------------------------------------------------ + + @staticmethod + def build_index_payload( + tool_id: str, + file_name: str, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + ) -> tuple[ExecutionContext, dict[str, Any]]: + """Build ide_index ExecutionContext for fire-and-forget dispatch. + + Does ORM validation and summarization synchronously, then returns + the execution context so the caller can dispatch with callbacks. + """ + tool: CustomTool = CustomTool.objects.get(pk=tool_id) + file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, is_create=False, user_id=user_id, tool_id=tool_id, + ) + file_path = str(Path(file_path) / file_name) + + default_profile = ProfileManager.get_default_llm_profile(tool) + if not tool: + raise ToolNotValid() + + PromptStudioHelper.validate_adapter_status(default_profile) + PromptStudioHelper.validate_profile_manager_owner_access(default_profile) + + # Handle summarization synchronously (uses Django plugin) + if tool.summarize_context: + SummarizeMigrationUtils.migrate_tool_to_adapter_based(tool) + summary_profile = default_profile + if not tool.summarize_llm_adapter: + try: + sp = ProfileManager.objects.get( + prompt_studio_tool=tool, is_summarize_llm=True + ) + sp.chunk_size = 0 + summary_profile = sp + except ProfileManager.DoesNotExist: + pass + + if summary_profile != default_profile: + PromptStudioHelper.validate_adapter_status(summary_profile) + PromptStudioHelper.validate_profile_manager_owner_access( + summary_profile + ) + + summarize_file_path = PromptStudioHelper.summarize( + file_name, org_id, run_id, tool + ) + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + summarize_doc_id = IndexingUtils.generate_index_key( + vector_db=str(summary_profile.vector_store.id), + embedding=str(summary_profile.embedding_model.id), + x2text=str(summary_profile.x2text.id), + chunk_size="0", + chunk_overlap=str(summary_profile.chunk_overlap), + file_path=summarize_file_path, + fs=fs_instance, + tool=util, + ) + PromptStudioIndexHelper.handle_index_manager( + document_id=document_id, + is_summary=True, + profile_manager=summary_profile, + doc_id=summarize_doc_id, + ) + + # Generate doc_id for indexing tracking + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + doc_id_key = IndexingUtils.generate_index_key( + vector_db=str(default_profile.vector_store.id), + embedding=str(default_profile.embedding_model.id), + x2text=str(default_profile.x2text.id), + chunk_size=str(default_profile.chunk_size), + chunk_overlap=str(default_profile.chunk_overlap), + file_path=file_path, + file_hash=None, + fs=fs_instance, + tool=util, + ) + + # Mark as indexing in progress + DocumentIndexingService.set_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + + # Build extract params + directory, filename = os.path.split(file_path) + extract_file_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + usage_kwargs = {"run_id": run_id, "file_name": filename} + + from prompt_studio.prompt_studio_core_v2.constants import ( + IndexingConstants as IKeys, + ) + + extract_params = { + IKeys.X2TEXT_INSTANCE_ID: str(default_profile.x2text.id), + IKeys.FILE_PATH: file_path, + IKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + IKeys.OUTPUT_FILE_PATH: extract_file_path, + "platform_api_key": platform_api_key, + IKeys.USAGE_KWARGS: usage_kwargs, + } + + index_params = { + IKeys.TOOL_ID: tool_id, + IKeys.EMBEDDING_INSTANCE_ID: str(default_profile.embedding_model.id), + IKeys.VECTOR_DB_INSTANCE_ID: str(default_profile.vector_store.id), + IKeys.X2TEXT_INSTANCE_ID: str(default_profile.x2text.id), + IKeys.FILE_PATH: extract_file_path, + IKeys.FILE_HASH: None, + IKeys.CHUNK_OVERLAP: default_profile.chunk_overlap, + IKeys.CHUNK_SIZE: default_profile.chunk_size, + IKeys.REINDEX: True, + IKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + IKeys.USAGE_KWARGS: usage_kwargs, + IKeys.RUN_ID: run_id, + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + "platform_api_key": platform_api_key, + } + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params={ + "extract_params": extract_params, + "index_params": index_params, + }, + request_id=request_id, + log_events_id=log_events_id, + ) + + # x2text config hash for extraction status tracking in callback + x2text_metadata = default_profile.x2text.metadata or {} + x2text_config_hash = ToolUtils.hash_str( + json.dumps(x2text_metadata, sort_keys=True) + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "user_id": user_id, + "document_id": document_id, + "doc_id_key": doc_id_key, + "profile_manager_id": str(default_profile.profile_id), + "tool_id": tool_id, + "run_id": run_id, + "file_name": file_name, + "x2text_config_hash": x2text_config_hash, + "enable_highlight": tool.enable_highlight, + } + + return context, cb_kwargs + + @staticmethod + def build_fetch_response_payload( + tool: CustomTool, + doc_path: str, + doc_name: str, + prompt: ToolStudioPrompt, + org_id: str, + user_id: str, + document_id: str, + run_id: str, + profile_manager_id: str | None = None, + ) -> tuple[ExecutionContext | None, dict[str, Any]]: + """Build answer_prompt ExecutionContext for fire-and-forget dispatch. + + Does ORM work, extraction, and indexing synchronously. Only the + LLM answer_prompt call is dispatched asynchronously. + + Returns: + (context, cb_kwargs) or (None, pending_response_dict) + """ + profile_manager = prompt.profile_manager + if profile_manager_id: + profile_manager = ProfileManagerHelper.get_profile_manager( + profile_manager_id=profile_manager_id + ) + + monitor_llm_instance: AdapterInstance | None = tool.monitor_llm + monitor_llm: str | None = None + challenge_llm_instance: AdapterInstance | None = tool.challenge_llm + challenge_llm: str | None = None + if monitor_llm_instance: + monitor_llm = str(monitor_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + monitor_llm = str(dp.llm.id) + + if challenge_llm_instance: + challenge_llm = str(challenge_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + challenge_llm = str(dp.llm.id) + + PromptStudioHelper.validate_adapter_status(profile_manager) + PromptStudioHelper.validate_profile_manager_owner_access(profile_manager) + + if not profile_manager: + raise DefaultProfileError() + + vector_db = str(profile_manager.vector_store.id) + embedding_model = str(profile_manager.embedding_model.id) + llm = str(profile_manager.llm.id) + x2text = str(profile_manager.x2text.id) + + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id) + file_path = doc_path + directory, filename = os.path.split(doc_path) + extract_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + + doc_id = IndexingUtils.generate_index_key( + vector_db=vector_db, + embedding=embedding_model, + x2text=x2text, + chunk_size=str(profile_manager.chunk_size), + chunk_overlap=str(profile_manager.chunk_overlap), + file_path=file_path, + file_hash=None, + fs=fs_instance, + tool=util, + ) + + if DocumentIndexingService.is_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id + ): + return None, { + "status": IndexingStatus.PENDING_STATUS.value, + "message": IndexingStatus.DOCUMENT_BEING_INDEXED.value, + } + + # Extract (blocking, usually cached) + extracted_text = PromptStudioHelper.dynamic_extractor( + profile_manager=profile_manager, + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + enable_highlight=tool.enable_highlight, + ) + + is_summary = tool.summarize_as_source + if is_summary: + profile_manager.chunk_size = 0 + p = Path(extract_path) + extract_path = str(p.parent.parent / "summarize" / (p.stem + ".txt")) + + # Index (blocking, usually cached) + index_result = PromptStudioHelper.dynamic_indexer( + profile_manager=profile_manager, + tool_id=str(tool.tool_id), + file_path=file_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + user_id=user_id, + enable_highlight=tool.enable_highlight, + extracted_text=extracted_text, + doc_id_key=doc_id, + ) + + if index_result.get("status") == IndexingStatus.PENDING_STATUS.value: + return None, { + "status": IndexingStatus.PENDING_STATUS.value, + "message": IndexingStatus.DOCUMENT_BEING_INDEXED.value, + } + + # Build outputs + tool_id = str(tool.tool_id) + output: dict[str, Any] = {} + outputs: list[dict[str, Any]] = [] + grammar_list: list[dict[str, Any]] = [] + prompt_grammer = tool.prompt_grammer + if prompt_grammer: + for word, synonyms in prompt_grammer.items(): + grammar_list.append( + {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} + ) + + output[TSPKeys.PROMPT] = prompt.prompt + output[TSPKeys.ACTIVE] = prompt.active + output[TSPKeys.REQUIRED] = prompt.required + output[TSPKeys.CHUNK_SIZE] = profile_manager.chunk_size + output[TSPKeys.VECTOR_DB] = vector_db + output[TSPKeys.EMBEDDING] = embedding_model + output[TSPKeys.CHUNK_OVERLAP] = profile_manager.chunk_overlap + output[TSPKeys.LLM] = llm + output[TSPKeys.TYPE] = prompt.enforce_type + output[TSPKeys.NAME] = prompt.prompt_key + output[TSPKeys.RETRIEVAL_STRATEGY] = profile_manager.retrieval_strategy + output[TSPKeys.SIMILARITY_TOP_K] = profile_manager.similarity_top_k + output[TSPKeys.SECTION] = profile_manager.section + output[TSPKeys.X2TEXT_ADAPTER] = x2text + + webhook_enabled = bool(prompt.enable_postprocessing_webhook) + webhook_url = (prompt.postprocessing_webhook_url or "").strip() + if webhook_enabled and not webhook_url: + webhook_enabled = False + output[TSPKeys.ENABLE_POSTPROCESSING_WEBHOOK] = webhook_enabled + if webhook_enabled: + output[TSPKeys.POSTPROCESSING_WEBHOOK_URL] = webhook_url + + output[TSPKeys.EVAL_SETTINGS] = {} + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = ( + prompt.evaluate + ) + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [ + monitor_llm + ] + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EXCLUDE_FAILED] = ( + tool.exclude_failed + ) + for attr in dir(prompt): + if attr.startswith(TSPKeys.EVAL_METRIC_PREFIX): + output[TSPKeys.EVAL_SETTINGS][attr] = getattr(prompt, attr) + + output = PromptStudioHelper.fetch_table_settings_if_enabled( + doc_name, prompt, org_id, user_id, tool_id, output + ) + variable_map = ( + PromptStudioVariableService.frame_variable_replacement_map( + doc_id=document_id, prompt_object=prompt + ) + ) + if variable_map: + output[TSPKeys.VARIABLE_MAP] = variable_map + outputs.append(output) + + tool_settings: dict[str, Any] = {} + tool_settings[TSPKeys.ENABLE_CHALLENGE] = tool.enable_challenge + tool_settings[TSPKeys.CHALLENGE_LLM] = challenge_llm + tool_settings[TSPKeys.SINGLE_PASS_EXTRACTION_MODE] = ( + tool.single_pass_extraction_mode + ) + tool_settings[TSPKeys.SUMMARIZE_AS_SOURCE] = tool.summarize_as_source + tool_settings[TSPKeys.PREAMBLE] = tool.preamble + tool_settings[TSPKeys.POSTAMBLE] = tool.postamble + tool_settings[TSPKeys.GRAMMAR] = grammar_list + tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight + tool_settings[TSPKeys.ENABLE_WORD_CONFIDENCE] = ( + tool.enable_word_confidence + ) + tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr( + settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" + ) + tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr( + settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" + ) + + file_hash = fs_instance.get_hash_from_file(path=extract_path) + + payload: dict[str, Any] = { + TSPKeys.TOOL_SETTINGS: tool_settings, + TSPKeys.OUTPUTS: outputs, + TSPKeys.TOOL_ID: tool_id, + TSPKeys.RUN_ID: run_id, + TSPKeys.FILE_NAME: doc_name, + TSPKeys.FILE_HASH: file_hash, + TSPKeys.FILE_PATH: extract_path, + Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID), + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + TSPKeys.CUSTOM_DATA: tool.custom_data, + } + + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id=run_id, + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=request_id, + log_events_id=log_events_id, + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "operation": "fetch_response", + "run_id": run_id, + "document_id": document_id, + "tool_id": tool_id, + "prompt_ids": [str(prompt.prompt_id)], + "profile_manager_id": profile_manager_id, + "is_single_pass": False, + } + + return context, cb_kwargs + + @staticmethod + def build_single_pass_payload( + tool: CustomTool, + doc_path: str, + doc_name: str, + prompts: list[ToolStudioPrompt], + org_id: str, + document_id: str, + run_id: str, + ) -> tuple[ExecutionContext, dict[str, Any]]: + """Build single_pass_extraction ExecutionContext. + + Does ORM work and extraction synchronously. Only the LLM + single-pass call is dispatched asynchronously. + """ + tool_id = str(tool.tool_id) + outputs: list[dict[str, Any]] = [] + grammar: list[dict[str, Any]] = [] + prompt_grammar = tool.prompt_grammer + default_profile = ProfileManager.get_default_llm_profile(tool) + + challenge_llm_instance: AdapterInstance | None = tool.challenge_llm + challenge_llm: str | None = None + if challenge_llm_instance: + challenge_llm = str(challenge_llm_instance.id) + else: + challenge_llm = str(default_profile.llm.id) + + PromptStudioHelper.validate_adapter_status(default_profile) + PromptStudioHelper.validate_profile_manager_owner_access(default_profile) + default_profile.chunk_size = 0 + + if not default_profile: + raise DefaultProfileError() + + if prompt_grammar: + for word, synonyms in prompt_grammar.items(): + grammar.append( + {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} + ) + + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + directory, filename = os.path.split(doc_path) + file_path = os.path.join( + directory, "extract", os.path.splitext(filename)[0] + ".txt" + ) + + # Extract (blocking, usually cached) + PromptStudioHelper.dynamic_extractor( + profile_manager=default_profile, + file_path=doc_path, + org_id=org_id, + document_id=document_id, + run_id=run_id, + enable_highlight=tool.enable_highlight, + ) + + vector_db = str(default_profile.vector_store.id) + embedding_model = str(default_profile.embedding_model.id) + llm = str(default_profile.llm.id) + x2text = str(default_profile.x2text.id) + + tool_settings: dict[str, Any] = { + TSPKeys.PREAMBLE: tool.preamble, + TSPKeys.POSTAMBLE: tool.postamble, + TSPKeys.GRAMMAR: grammar, + TSPKeys.LLM: llm, + TSPKeys.X2TEXT_ADAPTER: x2text, + TSPKeys.VECTOR_DB: vector_db, + TSPKeys.EMBEDDING: embedding_model, + TSPKeys.CHUNK_SIZE: default_profile.chunk_size, + TSPKeys.CHUNK_OVERLAP: default_profile.chunk_overlap, + TSPKeys.ENABLE_CHALLENGE: tool.enable_challenge, + TSPKeys.ENABLE_HIGHLIGHT: tool.enable_highlight, + TSPKeys.ENABLE_WORD_CONFIDENCE: tool.enable_word_confidence, + TSPKeys.CHALLENGE_LLM: challenge_llm, + TSPKeys.PLATFORM_POSTAMBLE: getattr( + settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" + ), + TSPKeys.WORD_CONFIDENCE_POSTAMBLE: getattr( + settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" + ), + TSPKeys.SUMMARIZE_AS_SOURCE: tool.summarize_as_source, + } + + for p in prompts: + if not p.prompt: + raise EmptyPromptError() + outputs.append( + { + TSPKeys.PROMPT: p.prompt, + TSPKeys.ACTIVE: p.active, + TSPKeys.TYPE: p.enforce_type, + TSPKeys.NAME: p.prompt_key, + } + ) + + if tool.summarize_as_source: + path_obj = Path(file_path) + file_path = str( + path_obj.parent.parent + / TSPKeys.SUMMARIZE + / (path_obj.stem + ".txt") + ) + + file_hash = fs_instance.get_hash_from_file(path=file_path) + + payload: dict[str, Any] = { + TSPKeys.TOOL_SETTINGS: tool_settings, + TSPKeys.OUTPUTS: outputs, + TSPKeys.TOOL_ID: tool_id, + TSPKeys.RUN_ID: run_id, + TSPKeys.FILE_HASH: file_hash, + TSPKeys.FILE_NAME: doc_name, + TSPKeys.FILE_PATH: file_path, + Common.LOG_EVENTS_ID: StateStore.get(Common.LOG_EVENTS_ID), + TSPKeys.EXECUTION_SOURCE: ExecutionSource.IDE.value, + TSPKeys.CUSTOM_DATA: tool.custom_data, + } + + platform_api_key = PromptStudioHelper._get_platform_api_key(org_id) + payload[ToolStudioKeys.PLATFORM_SERVICE_API_KEY] = platform_api_key + payload[TSPKeys.INCLUDE_METADATA] = True + + log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or "" + request_id = StateStore.get(Common.REQUEST_ID) or "" + + context = ExecutionContext( + executor_name="legacy", + operation="single_pass_extraction", + run_id=run_id or str(uuid.uuid4()), + execution_source="ide", + organization_id=org_id, + executor_params=payload, + request_id=request_id, + log_events_id=log_events_id, + ) + + cb_kwargs = { + "log_events_id": log_events_id, + "request_id": request_id, + "org_id": org_id, + "operation": "single_pass_extraction", + "run_id": run_id, + "document_id": document_id, + "tool_id": tool_id, + "prompt_ids": [str(p.prompt_id) for p in prompts], + "is_single_pass": True, + } + + return context, cb_kwargs + @staticmethod def get_select_fields() -> dict[str, Any]: """Method to fetch dropdown field values for frontend. diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index 60b1e71b76..30d13e3b54 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -93,6 +93,266 @@ def _emit_error( ) +# ------------------------------------------------------------------ +# Phase 5B — Fire-and-forget callback tasks +# +# These are lightweight callbacks invoked by Celery `link` / `link_error` +# after the executor worker finishes. They run on the backend +# (celery_prompt_studio queue) and do only post-ORM writes + socket +# emission — no heavy computation. +# ------------------------------------------------------------------ + + +@shared_task(name="ide_index_complete") +def ide_index_complete( + result_dict: dict[str, Any], + callback_kwargs: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Celery ``link`` callback after a successful ``ide_index`` execution. + + Performs post-indexing ORM bookkeeping and pushes a socket event to + the frontend. + """ + from prompt_studio.prompt_studio_core_v2.document_indexing_service import ( + DocumentIndexingService, + ) + from prompt_studio.prompt_studio_index_manager_v2.prompt_studio_index_helper import ( + PromptStudioIndexHelper, + ) + from prompt_studio.prompt_profile_manager_v2.models import ProfileManager + + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + request_id = cb.get("request_id", "") + org_id = cb.get("org_id", "") + user_id = cb.get("user_id", "") + document_id = cb.get("document_id", "") + doc_id_key = cb.get("doc_id_key", "") + profile_manager_id = cb.get("profile_manager_id") + executor_task_id = cb.get("executor_task_id", "") + + try: + _setup_state_store(log_events_id, request_id, org_id) + + # Check executor-level failure + if not result_dict.get("success", False): + error_msg = result_dict.get("error", "Unknown executor error") + logger.error("ide_index executor reported failure: %s", error_msg) + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + _emit_error( + log_events_id, + executor_task_id, + "index_document", + error_msg, + extra={"document_id": document_id}, + ) + return {"status": "failed", "error": error_msg} + + doc_id = result_dict.get("data", {}).get("doc_id", doc_id_key) + + # ORM writes + DocumentIndexingService.mark_document_indexed( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key, doc_id=doc_id + ) + if profile_manager_id: + profile_manager = ProfileManager.objects.get(pk=profile_manager_id) + PromptStudioIndexHelper.handle_index_manager( + document_id=document_id, + profile_manager=profile_manager, + doc_id=doc_id, + ) + + result: dict[str, Any] = { + "message": "Document indexed successfully.", + "document_id": document_id, + } + _emit_result(log_events_id, executor_task_id, "index_document", result) + return result + except Exception as e: + logger.exception("ide_index_complete callback failed") + _emit_error( + log_events_id, + executor_task_id, + "index_document", + str(e), + extra={"document_id": document_id}, + ) + raise + finally: + _clear_state_store() + + +@shared_task(name="ide_index_error") +def ide_index_error( + failed_task_id: str, + callback_kwargs: dict[str, Any] | None = None, +) -> None: + """Celery ``link_error`` callback when an ``ide_index`` task fails. + + Cleans up the indexing-in-progress flag and pushes an error socket + event to the frontend. + """ + from celery.result import AsyncResult + + from prompt_studio.prompt_studio_core_v2.document_indexing_service import ( + DocumentIndexingService, + ) + + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + request_id = cb.get("request_id", "") + org_id = cb.get("org_id", "") + user_id = cb.get("user_id", "") + document_id = cb.get("document_id", "") + doc_id_key = cb.get("doc_id_key", "") + executor_task_id = cb.get("executor_task_id", "") + + try: + _setup_state_store(log_events_id, request_id, org_id) + + # Attempt to retrieve the actual exception from the result backend + error_msg = "Indexing failed" + try: + from backend.worker_celery import get_worker_celery_app + + res = AsyncResult(failed_task_id, app=get_worker_celery_app()) + if res.result: + error_msg = str(res.result) + except Exception: + pass + + # Clean up the indexing-in-progress flag + if doc_id_key: + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + + _emit_error( + log_events_id, + executor_task_id, + "index_document", + error_msg, + extra={"document_id": document_id}, + ) + except Exception: + logger.exception("ide_index_error callback failed") + finally: + _clear_state_store() + + +@shared_task(name="ide_prompt_complete") +def ide_prompt_complete( + result_dict: dict[str, Any], + callback_kwargs: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Celery ``link`` callback after a successful answer_prompt / single_pass + execution. + + Persists prompt outputs via OutputManagerHelper and pushes a socket + event. + """ + from prompt_studio.prompt_studio_output_manager_v2.output_manager_helper import ( + OutputManagerHelper, + ) + from prompt_studio.prompt_studio_v2.models import ToolStudioPrompt + + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + request_id = cb.get("request_id", "") + org_id = cb.get("org_id", "") + operation = cb.get("operation", "fetch_response") + run_id = cb.get("run_id", "") + document_id = cb.get("document_id", "") + prompt_ids = cb.get("prompt_ids", []) + profile_manager_id = cb.get("profile_manager_id") + is_single_pass = cb.get("is_single_pass", False) + executor_task_id = cb.get("executor_task_id", "") + + try: + _setup_state_store(log_events_id, request_id, org_id) + + # Check executor-level failure + if not result_dict.get("success", False): + error_msg = result_dict.get("error", "Unknown executor error") + logger.error("ide_prompt executor reported failure: %s", error_msg) + _emit_error(log_events_id, executor_task_id, operation, error_msg) + return {"status": "failed", "error": error_msg} + + data = result_dict.get("data", {}) + + # Re-fetch prompt ORM objects for OutputManagerHelper + prompts = list( + ToolStudioPrompt.objects.filter(prompt_id__in=prompt_ids).order_by( + "sequence_number" + ) + ) + + response = OutputManagerHelper.handle_prompt_output_update( + run_id=run_id, + prompts=prompts, + outputs=data.get("output", []), + document_id=document_id, + is_single_pass_extract=is_single_pass, + profile_manager_id=profile_manager_id, + metadata=data.get("metadata", {}), + ) + + _emit_result(log_events_id, executor_task_id, operation, response) + return response + except Exception as e: + logger.exception("ide_prompt_complete callback failed") + _emit_error(log_events_id, executor_task_id, operation, str(e)) + raise + finally: + _clear_state_store() + + +@shared_task(name="ide_prompt_error") +def ide_prompt_error( + failed_task_id: str, + callback_kwargs: dict[str, Any] | None = None, +) -> None: + """Celery ``link_error`` callback when an answer_prompt / single_pass + task fails. + + Pushes an error socket event to the frontend. + """ + from celery.result import AsyncResult + + cb = callback_kwargs or {} + log_events_id = cb.get("log_events_id", "") + request_id = cb.get("request_id", "") + org_id = cb.get("org_id", "") + operation = cb.get("operation", "fetch_response") + executor_task_id = cb.get("executor_task_id", "") + + try: + _setup_state_store(log_events_id, request_id, org_id) + + error_msg = "Prompt execution failed" + try: + from backend.worker_celery import get_worker_celery_app + + res = AsyncResult(failed_task_id, app=get_worker_celery_app()) + if res.result: + error_msg = str(res.result) + except Exception: + pass + + _emit_error(log_events_id, executor_task_id, operation, error_msg) + except Exception: + logger.exception("ide_prompt_error callback failed") + finally: + _clear_state_store() + + +# ------------------------------------------------------------------ +# Legacy tasks (kept for backward compatibility during rollout) +# ------------------------------------------------------------------ + + @shared_task(name="prompt_studio_index_document", bind=True) def run_index_document( self, diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 3c515401ff..6742b7f92a 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -2,6 +2,7 @@ import logging import uuid from datetime import datetime +from pathlib import Path from typing import Any from account_v2.custom_exceptions import DuplicateData @@ -48,12 +49,9 @@ ) from prompt_studio.prompt_studio_core_v2.migration_utils import SummarizeMigrationUtils from account_v2.constants import Common +from celery import signature + from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import PromptStudioHelper -from prompt_studio.prompt_studio_core_v2.tasks import ( - run_fetch_response, - run_index_document, - run_single_pass_extraction, -) from utils.local_context import StateStore from prompt_studio.prompt_studio_core_v2.retrieval_strategies import ( get_retrieval_strategy_metadata, @@ -357,6 +355,10 @@ def make_profile_default(self, request: HttpRequest, pk: Any = None) -> Response def index_document(self, request: HttpRequest, pk: Any = None) -> Response: """API Entry point method to index input file. + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. + The backend worker slot is freed immediately. + Args: request (HttpRequest) @@ -373,23 +375,38 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: document_id: str = serializer.validated_data.get(ToolStudioPromptKeys.DOCUMENT_ID) document: DocumentManager = DocumentManager.objects.get(pk=document_id) file_name: str = document.document_name - # Generate a run_id run_id = CommonUtils.generate_uuid() - log_events_id = StateStore.get(Common.LOG_EVENTS_ID) - request_id = StateStore.get(Common.REQUEST_ID) - - task = run_index_document.apply_async( - kwargs={ - "tool_id": str(tool.tool_id), - "file_name": file_name, - "org_id": UserSessionUtils.get_organization_id(request), - "user_id": tool.created_by.user_id, - "document_id": document_id, - "run_id": run_id, - "log_events_id": log_events_id, - "request_id": request_id, - } + context, cb_kwargs = PromptStudioHelper.build_index_payload( + tool_id=str(tool.tool_id), + file_name=file_name, + org_id=UserSessionUtils.get_organization_id(request), + user_id=tool.created_by.user_id, + document_id=document_id, + run_id=run_id, + ) + + dispatcher = PromptStudioHelper._get_dispatcher() + + # Pre-generate task ID so callbacks can reference it + import uuid as _uuid + + executor_task_id = str(_uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_index_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + on_error=signature( + "ide_index_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + task_id=executor_task_id, ) return Response( {"task_id": task.id, "run_id": run_id, "status": "accepted"}, @@ -400,39 +417,73 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: """API Entry point method to fetch response to prompt. - Args: - request (HttpRequest): _description_ + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. - Raises: - FilenameMissingError: _description_ + Args: + request (HttpRequest) Returns: Response """ custom_tool = self.get_object() - tool_id: str = str(custom_tool.tool_id) document_id: str = request.data.get(ToolStudioPromptKeys.DOCUMENT_ID) - id: str = request.data.get(ToolStudioPromptKeys.ID) + prompt_id: str = request.data.get(ToolStudioPromptKeys.ID) run_id: str = request.data.get(ToolStudioPromptKeys.RUN_ID) - profile_manager: str = request.data.get(ToolStudioPromptKeys.PROFILE_MANAGER_ID) + profile_manager_id: str = request.data.get(ToolStudioPromptKeys.PROFILE_MANAGER_ID) if not run_id: - # Generate a run_id run_id = CommonUtils.generate_uuid() - log_events_id = StateStore.get(Common.LOG_EVENTS_ID) - request_id = StateStore.get(Common.REQUEST_ID) - - task = run_fetch_response.apply_async( - kwargs={ - "tool_id": tool_id, - "org_id": UserSessionUtils.get_organization_id(request), - "user_id": custom_tool.created_by.user_id, - "document_id": document_id, - "run_id": run_id, - "id": id, - "profile_manager_id": profile_manager, - "log_events_id": log_events_id, - "request_id": request_id, - } + + org_id = UserSessionUtils.get_organization_id(request) + user_id = custom_tool.created_by.user_id + + # Resolve prompt + prompt = ToolStudioPrompt.objects.get(pk=prompt_id) + + # Build file path + doc_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, is_create=False, user_id=user_id, + tool_id=str(custom_tool.tool_id), + ) + document: DocumentManager = DocumentManager.objects.get(pk=document_id) + doc_path = str(Path(doc_path) / document.document_name) + + context, cb_kwargs = PromptStudioHelper.build_fetch_response_payload( + tool=custom_tool, + doc_path=doc_path, + doc_name=document.document_name, + prompt=prompt, + org_id=org_id, + user_id=user_id, + document_id=document_id, + run_id=run_id, + profile_manager_id=profile_manager_id, + ) + + # If document is being indexed, return pending status + if context is None: + return Response(cb_kwargs, status=status.HTTP_200_OK) + + dispatcher = PromptStudioHelper._get_dispatcher() + + import uuid as _uuid + + executor_task_id = str(_uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_prompt_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + on_error=signature( + "ide_prompt_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + task_id=executor_task_id, ) return Response( {"task_id": task.id, "run_id": run_id, "status": "accepted"}, @@ -441,37 +492,72 @@ def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: @action(detail=True, methods=["post"]) def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: - """API Entry point method to fetch response to prompt. + """API Entry point method for single pass extraction. + + Builds the full execution payload (ORM work), then fires a + single executor task with Celery link/link_error callbacks. Args: - request (HttpRequest): _description_ - pk (Any): Primary key of the CustomTool + request (HttpRequest) + pk: Primary key of the CustomTool Returns: Response """ - # TODO: Handle fetch_response and single_pass_ - # extraction using common function custom_tool = self.get_object() - tool_id: str = str(custom_tool.tool_id) document_id: str = request.data.get(ToolStudioPromptKeys.DOCUMENT_ID) run_id: str = request.data.get(ToolStudioPromptKeys.RUN_ID) if not run_id: - # Generate a run_id run_id = CommonUtils.generate_uuid() - log_events_id = StateStore.get(Common.LOG_EVENTS_ID) - request_id = StateStore.get(Common.REQUEST_ID) - - task = run_single_pass_extraction.apply_async( - kwargs={ - "tool_id": tool_id, - "org_id": UserSessionUtils.get_organization_id(request), - "user_id": custom_tool.created_by.user_id, - "document_id": document_id, - "run_id": run_id, - "log_events_id": log_events_id, - "request_id": request_id, - } + + org_id = UserSessionUtils.get_organization_id(request) + user_id = custom_tool.created_by.user_id + + # Build file path + doc_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id, is_create=False, user_id=user_id, + tool_id=str(custom_tool.tool_id), + ) + document: DocumentManager = DocumentManager.objects.get(pk=document_id) + doc_path = str(Path(doc_path) / document.document_name) + + # Fetch all active prompts + prompts = list( + ToolStudioPrompt.objects.filter( + tool_id=custom_tool.tool_id + ).order_by("sequence_number") + ) + + context, cb_kwargs = PromptStudioHelper.build_single_pass_payload( + tool=custom_tool, + doc_path=doc_path, + doc_name=document.document_name, + prompts=prompts, + org_id=org_id, + document_id=document_id, + run_id=run_id, + ) + + dispatcher = PromptStudioHelper._get_dispatcher() + + import uuid as _uuid + + executor_task_id = str(_uuid.uuid4()) + cb_kwargs["executor_task_id"] = executor_task_id + + task = dispatcher.dispatch_with_callback( + context, + on_success=signature( + "ide_prompt_complete", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + on_error=signature( + "ide_prompt_error", + kwargs={"callback_kwargs": cb_kwargs}, + queue="celery_prompt_studio", + ), + task_id=executor_task_id, ) return Response( {"task_id": task.id, "run_id": run_id, "status": "accepted"}, @@ -482,6 +568,10 @@ def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: def task_status(self, request: HttpRequest, pk: Any = None, task_id: str = None) -> Response: """Poll the status of an async Prompt Studio task. + Task IDs now point to executor worker tasks dispatched via the + worker-v2 Celery app. Both apps share the same PostgreSQL + result backend, so we use the worker app to look up results. + Args: request (HttpRequest) pk: Primary key of the CustomTool (for permission check) @@ -492,9 +582,9 @@ def task_status(self, request: HttpRequest, pk: Any = None, task_id: str = None) """ from celery.result import AsyncResult - from backend.celery_service import app as celery_app + from backend.worker_celery import get_worker_celery_app - result = AsyncResult(task_id, app=celery_app) + result = AsyncResult(task_id, app=get_worker_celery_app()) if not result.ready(): return Response({"task_id": task_id, "status": "processing"}) if result.successful(): diff --git a/unstract/sdk1/src/unstract/sdk1/execution/context.py b/unstract/sdk1/src/unstract/sdk1/execution/context.py index f149c4d4e7..4eb3e22bb6 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/context.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/context.py @@ -30,6 +30,8 @@ class Operation(str, Enum): SINGLE_PASS_EXTRACTION = "single_pass_extraction" SUMMARIZE = "summarize" AGENTIC_EXTRACTION = "agentic_extraction" + IDE_INDEX = "ide_index" + STRUCTURE_PIPELINE = "structure_pipeline" @dataclass diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index c8e2674ad1..949a174597 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -4,8 +4,13 @@ - Structure tool Celery task (workflow path) - PromptStudioHelper (IDE path) -It sends ``execute_extraction`` tasks to the ``executor`` queue -and waits for results via ``AsyncResult.get()``. +It sends ``execute_extraction`` tasks to the ``executor`` queue. +Three dispatch modes are available: + +- ``dispatch()``: Send and block until result (synchronous). +- ``dispatch_async()``: Fire-and-forget, returns task_id for polling. +- ``dispatch_with_callback()``: Fire-and-forget with Celery ``link`` + / ``link_error`` callbacks for post-processing. """ import logging @@ -48,9 +53,18 @@ class ExecutionDispatcher: dispatcher = ExecutionDispatcher(celery_app=app) result = dispatcher.dispatch(context, timeout=120) - Or fire-and-forget:: + Fire-and-forget:: task_id = dispatcher.dispatch_async(context) + + Fire-and-forget with callbacks:: + + from celery import signature + task = dispatcher.dispatch_with_callback( + context, + on_success=signature("my_success_task", args=[...], queue="q"), + on_error=signature("my_error_task", args=[...], queue="q"), + ) """ def __init__(self, celery_app: Any = None) -> None: @@ -173,3 +187,76 @@ def dispatch_async( queue=_QUEUE_NAME, ) return async_result.id + + def dispatch_with_callback( + self, + context: ExecutionContext, + on_success: Any = None, + on_error: Any = None, + task_id: str | None = None, + ) -> Any: + """Fire-and-forget dispatch with Celery link callbacks. + + Sends the task to the executor queue and returns immediately. + When the executor task completes, Celery invokes the + ``on_success`` callback (via ``link``). If the executor task + raises an exception, Celery invokes ``on_error`` (via + ``link_error``). + + Args: + context: ExecutionContext to dispatch. + on_success: A Celery ``Signature`` invoked on success. + Receives ``(result_dict,)`` as first positional arg + followed by the signature's own args. + on_error: A Celery ``Signature`` invoked on failure. + Receives ``(failed_task_uuid,)`` as first positional + arg followed by the signature's own args. + task_id: Optional pre-generated Celery task ID. Useful + when the caller needs to know the task ID before + dispatch (e.g. to include it in callback kwargs). + + Returns: + The ``AsyncResult`` from ``send_task``. Callers can + use ``.id`` for task tracking but should NOT call + ``.get()`` (that would block, defeating the purpose). + + Raises: + ValueError: If no Celery app is configured. + """ + if self._app is None: + raise ValueError( + "No Celery app configured on ExecutionDispatcher" + ) + + logger.info( + "Dispatching with callback: executor=%s " + "operation=%s run_id=%s request_id=%s " + "on_success=%s on_error=%s", + context.executor_name, + context.operation, + context.run_id, + context.request_id, + on_success, + on_error, + ) + + send_kwargs: dict[str, Any] = { + "args": [context.to_dict()], + "queue": _QUEUE_NAME, + } + if on_success is not None: + send_kwargs["link"] = on_success + if on_error is not None: + send_kwargs["link_error"] = on_error + if task_id is not None: + send_kwargs["task_id"] = task_id + + async_result = self._app.send_task( + _TASK_NAME, + **send_kwargs, + ) + logger.info( + "Task sent with callbacks: celery_task_id=%s", + async_result.id, + ) + return async_result diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 7749fa3b12..d128d9736a 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -820,6 +820,153 @@ def test_dispatch_context_serialized_correctly( "schema": {"name": "str"} } + # ---- Phase 5A: dispatch_with_callback ---- + + def test_dispatch_with_callback_sends_link_and_link_error( + self: Self, + ) -> None: + """dispatch_with_callback() passes on_success as link, on_error as link_error.""" + mock_app = self._make_mock_app(task_id="cb-task-001") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_success = MagicMock(name="on_success_sig") + on_error = MagicMock(name="on_error_sig") + + result = dispatcher.dispatch_with_callback( + ctx, on_success=on_success, on_error=on_error + ) + + assert result.id == "cb-task-001" + mock_app.send_task.assert_called_once_with( + "execute_extraction", + args=[ctx.to_dict()], + queue="executor", + link=on_success, + link_error=on_error, + ) + + def test_dispatch_with_callback_success_only( + self: Self, + ) -> None: + """dispatch_with_callback() with only on_success omits link_error.""" + mock_app = self._make_mock_app(task_id="cb-task-002") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_success = MagicMock(name="on_success_sig") + + dispatcher.dispatch_with_callback(ctx, on_success=on_success) + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs[1]["link"] is on_success + assert "link_error" not in call_kwargs[1] + + def test_dispatch_with_callback_error_only( + self: Self, + ) -> None: + """dispatch_with_callback() with only on_error omits link.""" + mock_app = self._make_mock_app(task_id="cb-task-003") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + on_error = MagicMock(name="on_error_sig") + + dispatcher.dispatch_with_callback(ctx, on_error=on_error) + + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs[1] + assert call_kwargs[1]["link_error"] is on_error + + def test_dispatch_with_callback_no_callbacks( + self: Self, + ) -> None: + """dispatch_with_callback() with no callbacks sends plain task.""" + mock_app = self._make_mock_app(task_id="cb-task-004") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch_with_callback(ctx) + + assert result.id == "cb-task-004" + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs[1] + assert "link_error" not in call_kwargs[1] + + def test_dispatch_with_callback_returns_async_result( + self: Self, + ) -> None: + """dispatch_with_callback() returns the AsyncResult object (not just task_id).""" + mock_app = self._make_mock_app(task_id="cb-task-005") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch_with_callback(ctx) + + # Returns the full AsyncResult, not just the id string + assert result is mock_app.send_task.return_value + assert result.id == "cb-task-005" + + def test_dispatch_with_callback_no_app_raises_value_error( + self: Self, + ) -> None: + """dispatch_with_callback() without celery_app raises ValueError.""" + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = self._make_context() + + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_with_callback(ctx) + + def test_dispatch_with_callback_context_serialized( + self: Self, + ) -> None: + """dispatch_with_callback() serializes context correctly.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context( + operation="answer_prompt", + executor_params={"prompt_key": "p1"}, + ) + + dispatcher.dispatch_with_callback( + ctx, on_success=MagicMock() + ) + + sent_args = mock_app.send_task.call_args + context_dict = sent_args[1]["args"][0] + assert context_dict["operation"] == "answer_prompt" + assert context_dict["executor_params"] == { + "prompt_key": "p1" + } + + def test_dispatch_with_callback_custom_task_id( + self: Self, + ) -> None: + """dispatch_with_callback() passes custom task_id to send_task.""" + mock_app = self._make_mock_app(task_id="pre-gen-id-123") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + result = dispatcher.dispatch_with_callback( + ctx, task_id="pre-gen-id-123" + ) + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs[1]["task_id"] == "pre-gen-id-123" + + def test_dispatch_with_callback_no_task_id_omits_kwarg( + self: Self, + ) -> None: + """dispatch_with_callback() without task_id doesn't pass task_id.""" + mock_app = self._make_mock_app() + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = self._make_context() + + dispatcher.dispatch_with_callback(ctx) + + call_kwargs = mock_app.send_task.call_args + assert "task_id" not in call_kwargs[1] + # ---- Phase 1G: ExecutorToolShim ---- # Note: ExecutorToolShim lives in workers/executor/ but the tests diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 1aefa2444c..9c98eb353d 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -53,6 +53,8 @@ class LegacyExecutor(BaseExecutor): Operation.SINGLE_PASS_EXTRACTION.value: "_handle_single_pass_extraction", Operation.SUMMARIZE.value: "_handle_summarize", Operation.AGENTIC_EXTRACTION.value: "_handle_agentic_extraction", + Operation.IDE_INDEX.value: "_handle_ide_index", + Operation.STRUCTURE_PIPELINE.value: "_handle_structure_pipeline", } # Defaults for log streaming (overridden by execute()). @@ -271,6 +273,460 @@ def _get_indexing_deps(): return Index, EmbeddingCompat, VectorDB + # ------------------------------------------------------------------ + # Phase 5C — Compound IDE index handler (extract + index) + # ------------------------------------------------------------------ + + def _handle_ide_index( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.IDE_INDEX`` — compound extract then index. + + This compound operation combines ``_handle_extract`` and + ``_handle_index`` in a single executor invocation, eliminating + the need for the backend Celery worker to block between steps. + + The ``executor_params`` must contain: + - ``extract_params``: Parameters for ``_handle_extract``. + - ``index_params``: Parameters for ``_handle_index``. The + executor injects ``extracted_text`` from the extract step + before calling index. + + Returns: + ExecutionResult with ``data`` containing ``doc_id`` from + the index step. + """ + params = context.executor_params + extract_params = params.get("extract_params") + index_params = params.get("index_params") + + if not extract_params or not index_params: + missing = [] + if not extract_params: + missing.append("extract_params") + if not index_params: + missing.append("index_params") + return ExecutionResult.failure( + error=f"ide_index missing required params: " + f"{', '.join(missing)}" + ) + + # Step 1: Extract + extract_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.EXTRACT.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=extract_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + extract_result = self._handle_extract(extract_ctx) + if not extract_result.success: + return extract_result + + # Step 2: Index — inject extracted text + extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "") + index_params[IKeys.EXTRACTED_TEXT] = extracted_text + + index_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.INDEX.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=index_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + index_result = self._handle_index(index_ctx) + if not index_result.success: + return index_result + + return ExecutionResult( + success=True, + data={ + IKeys.DOC_ID: index_result.data.get(IKeys.DOC_ID, ""), + }, + ) + + # ------------------------------------------------------------------ + # Phase 5D — Compound structure pipeline handler + # ------------------------------------------------------------------ + + def _handle_structure_pipeline( + self, context: ExecutionContext + ) -> ExecutionResult: + """Handle ``Operation.STRUCTURE_PIPELINE``. + + Runs the full structure-tool pipeline in a single executor + invocation: extract → summarize → index → answer_prompt. + + This eliminates three sequential ``dispatcher.dispatch()`` calls + that would otherwise block a file_processing worker slot. + + Expected ``executor_params`` keys: + + ``extract_params`` + Parameters for ``_handle_extract``. + ``index_template`` + Common indexing params (``tool_id``, ``file_hash``, + ``is_highlight_enabled``, ``platform_api_key``, + ``extracted_file_path``). + ``answer_params`` + Full payload for ``_handle_answer_prompt`` / + ``_handle_single_pass_extraction``. + ``pipeline_options`` + Control flags: ``skip_extraction_and_indexing``, + ``is_summarization_enabled``, ``is_single_pass_enabled``, + ``input_file_path``, ``source_file_name``. + ``summarize_params`` + (Optional) Parameters for ``_handle_summarize`` plus + filesystem paths for caching. + + Returns: + ExecutionResult with ``data`` containing the structured + output dict (``output``, ``metadata``, ``metrics``). + """ + params = context.executor_params + extract_params = params.get("extract_params", {}) + index_template = params.get("index_template", {}) + answer_params = params.get("answer_params", {}) + pipeline_options = params.get("pipeline_options", {}) + summarize_params = params.get("summarize_params") + + skip_extraction = pipeline_options.get( + "skip_extraction_and_indexing", False + ) + is_summarization = pipeline_options.get( + "is_summarization_enabled", False + ) + is_single_pass = pipeline_options.get( + "is_single_pass_enabled", False + ) + input_file_path = pipeline_options.get("input_file_path", "") + source_file_name = pipeline_options.get("source_file_name", "") + + extracted_text = "" + index_metrics: dict = {} + + # ---- Step 1: Extract ---- + if not skip_extraction: + extract_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.EXTRACT.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=extract_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + extract_result = self._handle_extract(extract_ctx) + if not extract_result.success: + return extract_result + extracted_text = extract_result.data.get( + IKeys.EXTRACTED_TEXT, "" + ) + + # ---- Step 2: Summarize (if enabled) ---- + if is_summarization: + summarize_result = self._run_pipeline_summarize( + context=context, + summarize_params=summarize_params or {}, + answer_params=answer_params, + ) + if not summarize_result.success: + return summarize_result + # answer_params file_path/hash updated in-place by helper + elif skip_extraction: + # Smart table: use original source file + answer_params["file_path"] = input_file_path + elif not is_single_pass: + # ---- Step 3: Index per output with dedup ---- + index_metrics = self._run_pipeline_index( + context=context, + index_template=index_template, + answer_params=answer_params, + extracted_text=extracted_text, + ) + + # ---- Step 4: Table settings injection ---- + if not is_single_pass: + outputs = answer_params.get("outputs", []) + extracted_file_path = index_template.get( + "extracted_file_path", "" + ) + for output in outputs: + if "table_settings" in output: + table_settings = output["table_settings"] + is_dir = table_settings.get("is_directory_mode", False) + if skip_extraction: + table_settings["input_file"] = input_file_path + answer_params["file_path"] = input_file_path + else: + table_settings["input_file"] = extracted_file_path + table_settings["is_directory_mode"] = is_dir + output["table_settings"] = table_settings + + # ---- Step 5: Answer prompt / Single pass ---- + operation = ( + Operation.SINGLE_PASS_EXTRACTION.value + if is_single_pass + else Operation.ANSWER_PROMPT.value + ) + answer_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=operation, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + executor_params=answer_params, + request_id=context.request_id, + log_events_id=context.log_events_id, + ) + answer_result = self._handle_answer_prompt(answer_ctx) + if not answer_result.success: + return answer_result + + # ---- Step 6: Merge results ---- + structured_output = answer_result.data + + # Ensure metadata section + if "metadata" not in structured_output: + structured_output["metadata"] = {} + structured_output["metadata"]["file_name"] = source_file_name + + # Add extracted text for HITL raw view + if extracted_text: + structured_output["metadata"]["extracted_text"] = ( + extracted_text + ) + + # Merge index metrics + if index_metrics: + existing_metrics = structured_output.get("metrics", {}) + merged = self._merge_pipeline_metrics( + existing_metrics, index_metrics + ) + structured_output["metrics"] = merged + + return ExecutionResult(success=True, data=structured_output) + + def _run_pipeline_summarize( + self, + context: ExecutionContext, + summarize_params: dict, + answer_params: dict, + ) -> ExecutionResult: + """Run the summarize step of the structure pipeline. + + Handles filesystem caching: if a cached summary exists, uses it. + Otherwise calls ``_handle_summarize`` and writes the result. + Updates ``answer_params`` in-place with new file_path and + file_hash. + """ + extract_file_path = summarize_params.get("extract_file_path", "") + summarize_file_path = summarize_params.get( + "summarize_file_path", "" + ) + platform_api_key = summarize_params.get("platform_api_key", "") + llm_adapter_id = summarize_params.get( + "llm_adapter_instance_id", "" + ) + summarize_prompt = summarize_params.get("summarize_prompt", "") + prompt_keys = summarize_params.get("prompt_keys", []) + outputs = answer_params.get("outputs", []) + + fs = FileUtils.get_fs_instance( + execution_source=context.execution_source + ) + + # Set chunk_size=0 for all outputs when summarizing + embedding = answer_params.get("tool_settings", {}).get( + "embedding", "" + ) + vector_db = answer_params.get("tool_settings", {}).get( + "vector-db", "" + ) + x2text = answer_params.get("tool_settings", {}).get( + "x2text_adapter", "" + ) + for output in outputs: + output["embedding"] = embedding + output["vector-db"] = vector_db + output["x2text_adapter"] = x2text + output["chunk-size"] = 0 + output["chunk-overlap"] = 0 + + # Check cache + summarized_context = "" + if fs.exists(summarize_file_path): + summarized_context = fs.read( + path=summarize_file_path, mode="r" + ) + + if not summarized_context: + # Read extracted text + doc_context = fs.read(path=extract_file_path, mode="r") + if not doc_context: + return ExecutionResult.failure( + error="No extracted text found for summarization" + ) + + summarize_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.SUMMARIZE.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + log_events_id=context.log_events_id, + executor_params={ + "llm_adapter_instance_id": llm_adapter_id, + "summarize_prompt": summarize_prompt, + "context": doc_context, + "prompt_keys": prompt_keys, + "PLATFORM_SERVICE_API_KEY": platform_api_key, + }, + ) + summarize_result = self._handle_summarize(summarize_ctx) + if not summarize_result.success: + return summarize_result + + summarized_context = summarize_result.data.get("data", "") + fs.write( + path=summarize_file_path, + mode="w", + data=summarized_context, + ) + + # Update answer_params + summarize_file_hash = fs.get_hash_from_file( + path=summarize_file_path + ) + answer_params["file_hash"] = summarize_file_hash + answer_params["file_path"] = str(summarize_file_path) + + return ExecutionResult(success=True, data={}) + + def _run_pipeline_index( + self, + context: ExecutionContext, + index_template: dict, + answer_params: dict, + extracted_text: str, + ) -> dict: + """Run per-output indexing with dedup for the structure pipeline. + + Returns: + Dict of index metrics keyed by output name. + """ + import datetime + + tool_settings = answer_params.get("tool_settings", {}) + outputs = answer_params.get("outputs", []) + tool_id = index_template.get("tool_id", "") + file_hash = index_template.get("file_hash", "") + is_highlight = index_template.get("is_highlight_enabled", False) + platform_api_key = index_template.get("platform_api_key", "") + extracted_file_path = index_template.get( + "extracted_file_path", "" + ) + + index_metrics: dict = {} + seen_params: set = set() + + for output in outputs: + chunk_size = output.get("chunk-size", 0) + chunk_overlap = output.get("chunk-overlap", 0) + vector_db = tool_settings.get("vector-db", "") + embedding = tool_settings.get("embedding", "") + x2text = tool_settings.get("x2text_adapter", "") + + param_key = ( + f"chunk_size={chunk_size}_" + f"chunk_overlap={chunk_overlap}_" + f"vector_db={vector_db}_" + f"embedding={embedding}_" + f"x2text={x2text}" + ) + + if chunk_size != 0 and param_key not in seen_params: + seen_params.add(param_key) + + indexing_start = datetime.datetime.now() + logger.info( + "Pipeline indexing: chunk_size=%s " + "chunk_overlap=%s vector_db=%s", + chunk_size, + chunk_overlap, + vector_db, + ) + + index_ctx = ExecutionContext( + executor_name=context.executor_name, + operation=Operation.INDEX.value, + run_id=context.run_id, + execution_source=context.execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + log_events_id=context.log_events_id, + executor_params={ + "embedding_instance_id": embedding, + "vector_db_instance_id": vector_db, + "x2text_instance_id": x2text, + "chunk_size": chunk_size, + "chunk_overlap": chunk_overlap, + "file_path": extracted_file_path, + "reindex": True, + "tool_id": tool_id, + "file_hash": file_hash, + "enable_highlight": is_highlight, + "extracted_text": extracted_text, + "platform_api_key": platform_api_key, + }, + ) + index_result = self._handle_index(index_ctx) + if not index_result.success: + logger.warning( + "Pipeline indexing failed for %s: %s", + param_key, + index_result.error, + ) + + elapsed = ( + datetime.datetime.now() - indexing_start + ).total_seconds() + output_name = output.get("name", "") + index_metrics[output_name] = { + "indexing": {"time_taken(s)": elapsed} + } + + return index_metrics + + @staticmethod + def _merge_pipeline_metrics( + metrics1: dict, metrics2: dict + ) -> dict: + """Merge two metrics dicts, combining sub-dicts for shared keys.""" + merged: dict = {} + all_keys = set(metrics1) | set(metrics2) + for key in all_keys: + if ( + key in metrics1 + and key in metrics2 + and isinstance(metrics1[key], dict) + and isinstance(metrics2[key], dict) + ): + merged[key] = {**metrics1[key], **metrics2[key]} + elif key in metrics1: + merged[key] = metrics1[key] + else: + merged[key] = metrics2[key] + return merged + # ------------------------------------------------------------------ # Phase 2C — Index handler # ------------------------------------------------------------------ diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index e4db70aaee..24f5fc2184 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -70,12 +70,34 @@ def execute_extraction( # the frontend. Attached as a transient attribute (not serialized). if context.log_events_id: params = context.executor_params - context._log_component = { - "tool_id": params.get("tool_id", ""), - "run_id": context.run_id, - "doc_name": str(params.get("file_name", "")), - "operation": context.operation, - } + # For compound operations, extract nested params for log + # correlation. + if context.operation == "ide_index": + extract_params = params.get("extract_params", {}) + context._log_component = { + "tool_id": extract_params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(extract_params.get("file_name", "")), + "operation": context.operation, + } + elif context.operation == "structure_pipeline": + answer_params = params.get("answer_params", {}) + pipeline_opts = params.get("pipeline_options", {}) + context._log_component = { + "tool_id": answer_params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str( + pipeline_opts.get("source_file_name", "") + ), + "operation": context.operation, + } + else: + context._log_component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } else: context._log_component = {} diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index fb6a7e4a6e..09d2e5d9cc 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -184,25 +184,6 @@ def _should_skip_extraction_for_smart_table( return False -def _merge_metrics(metrics1: dict, metrics2: dict) -> dict: - """Merge two metrics dicts, combining sub-dicts for shared keys.""" - merged: dict = {} - all_keys = set(metrics1) | set(metrics2) - for key in all_keys: - if ( - key in metrics1 - and key in metrics2 - and isinstance(metrics1[key], dict) - and isinstance(metrics2[key], dict) - ): - merged[key] = {**metrics1[key], **metrics2[key]} - elif key in metrics1: - merged[key] = metrics1[key] - else: - merged[key] = metrics2[key] - return merged - - # ----------------------------------------------------------------------- # Main Celery task # ----------------------------------------------------------------------- @@ -234,6 +215,11 @@ def _execute_structure_tool_impl(params: dict) -> dict: """Implementation of the structure tool pipeline. Separated from the task function for testability. + + Phase 5E: Uses a single ``structure_pipeline`` dispatch instead of + 3 sequential ``dispatcher.dispatch()`` calls. The executor worker + handles the full extract → summarize → index → answer_prompt + pipeline internally, freeing the file_processing worker slot. """ # ---- Unpack params ---- organization_id = params["organization_id"] @@ -319,9 +305,25 @@ def _execute_structure_tool_impl(params: dict) -> dict: execution_run_data_folder = Path(execution_data_dir) extracted_input_file = str(execution_run_data_folder / _SK.EXTRACT) - # ---- Step 4: Build payload ---- + # ---- Step 4: Smart table detection ---- + skip_extraction_and_indexing = _should_skip_extraction_for_smart_table( + input_file_path, outputs + ) + if skip_extraction_and_indexing: + logger.info( + "Skipping extraction and indexing for Excel table " + "with valid JSON schema" + ) + + # ---- Step 5: Build pipeline params ---- + usage_kwargs: dict[Any, Any] = {} + if not skip_extraction_and_indexing: + usage_kwargs[UsageKwargs.RUN_ID] = file_execution_id + usage_kwargs[UsageKwargs.FILE_NAME] = source_file_name + usage_kwargs[UsageKwargs.EXECUTION_ID] = execution_id + custom_data = exec_metadata.get(_SK.CUSTOM_DATA, {}) - payload = { + answer_params = { _SK.RUN_ID: file_execution_id, _SK.EXECUTION_ID: execution_id, _SK.TOOL_SETTINGS: tool_settings, @@ -335,152 +337,88 @@ def _execute_structure_tool_impl(params: dict) -> dict: "PLATFORM_SERVICE_API_KEY": platform_service_api_key, } - # ---- Step 5: Extract ---- - skip_extraction_and_indexing = _should_skip_extraction_for_smart_table( - input_file_path, outputs - ) + extract_params = { + "x2text_instance_id": tool_settings[_SK.X2TEXT_ADAPTER], + "file_path": input_file_path, + "enable_highlight": is_highlight_enabled, + "output_file_path": str(execution_run_data_folder / _SK.EXTRACT), + "platform_api_key": platform_service_api_key, + "usage_kwargs": usage_kwargs, + "tags": exec_metadata.get("tags"), + "tool_execution_metadata": exec_metadata, + "execution_data_dir": str(execution_run_data_folder), + } - extracted_text = "" - usage_kwargs: dict[Any, Any] = {} - if skip_extraction_and_indexing: - logger.info( - "Skipping extraction and indexing for Excel table " - "with valid JSON schema" - ) - else: - logger.info("Extracting document '%s'", source_file_name) - usage_kwargs[UsageKwargs.RUN_ID] = file_execution_id - usage_kwargs[UsageKwargs.FILE_NAME] = source_file_name - usage_kwargs[UsageKwargs.EXECUTION_ID] = execution_id + index_template = { + "tool_id": tool_id, + "file_hash": file_hash, + "is_highlight_enabled": is_highlight_enabled, + "platform_api_key": platform_service_api_key, + "extracted_file_path": extracted_input_file, + } - extract_ctx = ExecutionContext( - executor_name="legacy", - operation="extract", - run_id=file_execution_id, - execution_source="tool", - organization_id=organization_id, - request_id=file_execution_id, - executor_params={ - "x2text_instance_id": tool_settings[_SK.X2TEXT_ADAPTER], - "file_path": input_file_path, - "enable_highlight": is_highlight_enabled, - "output_file_path": str( - execution_run_data_folder / _SK.EXTRACT - ), - "platform_api_key": platform_service_api_key, - "usage_kwargs": usage_kwargs, - "tags": exec_metadata.get("tags"), - "tool_execution_metadata": exec_metadata, - "execution_data_dir": str(execution_run_data_folder), - }, - ) - extract_result = dispatcher.dispatch( - extract_ctx, timeout=EXECUTOR_TIMEOUT - ) - if not extract_result.success: - return extract_result.to_dict() - extracted_text = extract_result.data.get("extracted_text", "") + pipeline_options = { + "skip_extraction_and_indexing": skip_extraction_and_indexing, + "is_summarization_enabled": is_summarization_enabled, + "is_single_pass_enabled": is_single_pass_enabled, + "input_file_path": input_file_path, + "source_file_name": source_file_name, + } - # ---- Step 6: Summarize (if enabled) ---- - index_metrics: dict = {} + # Build summarize params if enabled + summarize_params = None if is_summarization_enabled: - summarize_file_path, summarize_file_hash = _summarize( - tool_settings=tool_settings, - tool_data_dir=execution_run_data_folder, - dispatcher=dispatcher, - outputs=outputs, - usage_kwargs=usage_kwargs, - file_execution_id=file_execution_id, - organization_id=organization_id, - platform_service_api_key=platform_service_api_key, - fs=fs, - ) - payload[_SK.FILE_HASH] = summarize_file_hash - payload[_SK.FILE_PATH] = summarize_file_path - elif skip_extraction_and_indexing: - # Use source file directly for Excel with valid JSON - payload[_SK.FILE_PATH] = input_file_path - elif not is_single_pass_enabled: - # ---- Step 7: Index ---- - index_metrics = _index_documents( - outputs=outputs, - tool_settings=tool_settings, - tool_id=tool_id, - file_hash=file_hash, - extracted_text=extracted_text, - execution_run_data_folder=execution_run_data_folder, - is_highlight_enabled=is_highlight_enabled, - dispatcher=dispatcher, - file_execution_id=file_execution_id, - organization_id=organization_id, - platform_service_api_key=platform_service_api_key, - ) - - # ---- Step 8: Answer prompt (or single pass) ---- - if is_single_pass_enabled: - logger.info("Fetching response for single pass extraction...") - operation = "single_pass_extraction" - else: - # Handle table_settings injection - for output in outputs: - if _SK.TABLE_SETTINGS in output: - table_settings = output[_SK.TABLE_SETTINGS] - is_directory_mode = table_settings.get( - _SK.IS_DIRECTORY_MODE, False - ) - if skip_extraction_and_indexing: - table_settings[_SK.INPUT_FILE] = input_file_path - payload[_SK.FILE_PATH] = input_file_path - else: - table_settings[_SK.INPUT_FILE] = extracted_input_file - table_settings[_SK.IS_DIRECTORY_MODE] = is_directory_mode - logger.info( - "Performing table extraction with: %s", table_settings - ) - output[_SK.TABLE_SETTINGS] = table_settings - - logger.info( - "Fetching responses for '%d' prompt(s)...", len(outputs) - ) - operation = "answer_prompt" + prompt_keys = [o[_SK.NAME] for o in outputs] + summarize_params = { + "llm_adapter_instance_id": tool_settings[_SK.LLM], + "summarize_prompt": tool_settings.get( + _SK.SUMMARIZE_PROMPT, "" + ), + "extract_file_path": str( + execution_run_data_folder / _SK.EXTRACT + ), + "summarize_file_path": str( + execution_run_data_folder / _SK.SUMMARIZE + ), + "platform_api_key": platform_service_api_key, + "prompt_keys": prompt_keys, + } + + # ---- Step 6: Single dispatch to executor ---- + logger.info( + "Dispatching structure_pipeline: tool_id=%s " + "skip_extract=%s summarize=%s single_pass=%s", + tool_id, + skip_extraction_and_indexing, + is_summarization_enabled, + is_single_pass_enabled, + ) - answer_ctx = ExecutionContext( + pipeline_ctx = ExecutionContext( executor_name="legacy", - operation=operation, + operation="structure_pipeline", run_id=file_execution_id, execution_source="tool", organization_id=organization_id, request_id=file_execution_id, - executor_params=payload, + executor_params={ + "extract_params": extract_params, + "index_template": index_template, + "answer_params": answer_params, + "pipeline_options": pipeline_options, + "summarize_params": summarize_params, + }, ) - answer_result = dispatcher.dispatch(answer_ctx, timeout=EXECUTOR_TIMEOUT) - if not answer_result.success: - return answer_result.to_dict() - - structured_output = answer_result.data - - # ---- Step 9: Post-process and write output ---- - # Ensure metadata section exists - if _SK.METADATA not in structured_output: - structured_output[_SK.METADATA] = {} - - structured_output[_SK.METADATA][_SK.FILE_NAME] = source_file_name - - # Add extracted text for HITL raw view - if extracted_text: - structured_output[_SK.METADATA]["extracted_text"] = extracted_text - logger.info( - "Added extracted text to metadata (length: %d characters)", - len(extracted_text), - ) + pipeline_result = dispatcher.dispatch( + pipeline_ctx, timeout=EXECUTOR_TIMEOUT + ) + if not pipeline_result.success: + return pipeline_result.to_dict() - # Merge index metrics - if merged_metrics := _merge_metrics( - structured_output.get(_SK.METRICS, {}), index_metrics - ): - structured_output[_SK.METRICS] = merged_metrics + structured_output = pipeline_result.data - # Write output JSON + # ---- Step 7: Write output files ---- + # (metadata/metrics merging already done by executor pipeline) try: output_path = ( Path(output_dir_path) @@ -609,183 +547,6 @@ def _handle_profile_overrides( ) from e -def _summarize( - tool_settings: dict, - tool_data_dir: Path, - dispatcher: ExecutionDispatcher, - outputs: list[dict], - usage_kwargs: dict, - file_execution_id: str, - organization_id: str, - platform_service_api_key: str, - fs: Any, -) -> tuple[str, str]: - """Summarize the document, with filesystem caching. - - Returns: - Tuple of (summarize_file_path, summarize_file_hash). - """ - llm_adapter_instance_id = tool_settings[_SK.LLM] - embedding_instance_id = tool_settings[_SK.EMBEDDING] - vector_db_instance_id = tool_settings[_SK.VECTOR_DB] - x2text_instance_id = tool_settings[_SK.X2TEXT_ADAPTER] - summarize_prompt = tool_settings[_SK.SUMMARIZE_PROMPT] - run_id = usage_kwargs.get(UsageKwargs.RUN_ID, file_execution_id) - extract_file_path = tool_data_dir / _SK.EXTRACT - summarize_file_path = tool_data_dir / _SK.SUMMARIZE - - # Check cache - summarized_context = "" - logger.info( - "Checking if summarized context exists at '%s'...", - summarize_file_path, - ) - if fs.exists(summarize_file_path): - summarized_context = fs.read(path=summarize_file_path, mode="r") - - if not summarized_context: - context = fs.read(path=extract_file_path, mode="r") - prompt_keys = [] - for output in outputs: - prompt_keys.append(output[_SK.NAME]) - output[_SK.EMBEDDING] = embedding_instance_id - output[_SK.VECTOR_DB] = vector_db_instance_id - output[_SK.X2TEXT_ADAPTER] = x2text_instance_id - output[_SK.CHUNK_SIZE] = 0 - output[_SK.CHUNK_OVERLAP] = 0 - - logger.info("Summarized context not found, summarizing...") - summarize_ctx = ExecutionContext( - executor_name="legacy", - operation="summarize", - run_id=run_id, - execution_source="tool", - organization_id=organization_id, - request_id=file_execution_id, - executor_params={ - _SK.LLM_ADAPTER_INSTANCE_ID: llm_adapter_instance_id, - _SK.SUMMARIZE_PROMPT: summarize_prompt, - _SK.CONTEXT: context, - _SK.PROMPT_KEYS: prompt_keys, - "PLATFORM_SERVICE_API_KEY": platform_service_api_key, - }, - ) - summarize_result = dispatcher.dispatch( - summarize_ctx, timeout=EXECUTOR_TIMEOUT - ) - if not summarize_result.success: - raise RuntimeError( - f"Summarization failed: {summarize_result.error}" - ) - summarized_context = summarize_result.data.get(_SK.DATA, "") - logger.info( - "Writing summarized context to '%s'", summarize_file_path - ) - fs.write( - path=summarize_file_path, mode="w", data=summarized_context - ) - - summarize_file_hash = fs.get_hash_from_file(path=summarize_file_path) - return str(summarize_file_path), summarize_file_hash - - -def _index_documents( - outputs: list[dict], - tool_settings: dict, - tool_id: str, - file_hash: str, - extracted_text: str, - execution_run_data_folder: Path, - is_highlight_enabled: bool, - dispatcher: ExecutionDispatcher, - file_execution_id: str, - organization_id: str, - platform_service_api_key: str, -) -> dict: - """Index documents with dedup on parameter combinations. - - Returns: - Dict of index metrics per output name. - """ - import datetime - - index_metrics: dict = {} - seen_params: set = set() - - for output in outputs: - chunk_size = output[_SK.CHUNK_SIZE] - chunk_overlap = output[_SK.CHUNK_OVERLAP] - vector_db = tool_settings[_SK.VECTOR_DB] - embedding = tool_settings[_SK.EMBEDDING] - x2text = tool_settings[_SK.X2TEXT_ADAPTER] - - param_key = ( - f"chunk_size={chunk_size}_" - f"chunk_overlap={chunk_overlap}_" - f"vector_db={vector_db}_" - f"embedding={embedding}_" - f"x2text={x2text}" - ) - - if chunk_size != 0 and param_key not in seen_params: - seen_params.add(param_key) - - indexing_start_time = datetime.datetime.now() - logger.info( - "Indexing document with: chunk_size=%s, " - "chunk_overlap=%s, vector_db=%s, embedding=%s, " - "x2text=%s", - chunk_size, - chunk_overlap, - vector_db, - embedding, - x2text, - ) - - index_ctx = ExecutionContext( - executor_name="legacy", - operation="index", - run_id=file_execution_id, - execution_source="tool", - organization_id=organization_id, - request_id=file_execution_id, - executor_params={ - "embedding_instance_id": embedding, - "vector_db_instance_id": vector_db, - "x2text_instance_id": x2text, - "chunk_size": chunk_size, - "chunk_overlap": chunk_overlap, - "file_path": str( - execution_run_data_folder / _SK.EXTRACT - ), - "reindex": True, - "tool_id": tool_id, - "file_hash": file_hash, - "enable_highlight": is_highlight_enabled, - "extracted_text": extracted_text, - "platform_api_key": platform_service_api_key, - }, - ) - index_result = dispatcher.dispatch( - index_ctx, timeout=EXECUTOR_TIMEOUT - ) - if not index_result.success: - logger.warning( - "Indexing failed for param combo %s: %s", - param_key, - index_result.error, - ) - - elapsed = ( - datetime.datetime.now() - indexing_start_time - ).total_seconds() - index_metrics[output[_SK.NAME]] = { - _SK.INDEXING: {"time_taken(s)": elapsed} - } - - return index_metrics - - def _run_agentic_extraction( tool_metadata: dict, input_file_path: str, diff --git a/workers/tests/test_phase5d.py b/workers/tests/test_phase5d.py new file mode 100644 index 0000000000..a61403ee2e --- /dev/null +++ b/workers/tests/test_phase5d.py @@ -0,0 +1,902 @@ +"""Phase 5D — Tests for structure_pipeline compound operation. + +Tests _handle_structure_pipeline in LegacyExecutor which runs the full +extract → summarize → index → answer_prompt pipeline in a single +executor invocation. +""" + +import datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets — all at source in executor.executors.legacy_executor +# --------------------------------------------------------------------------- + +_PATCH_FILE_UTILS = "executor.executors.file_utils.FileUtils.get_fs_instance" +_PATCH_INDEXING_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def executor(): + """Create a LegacyExecutor instance.""" + from executor.executors.legacy_executor import LegacyExecutor + + return LegacyExecutor() + + +@pytest.fixture +def mock_fs(): + """Mock filesystem.""" + fs = MagicMock(name="file_storage") + fs.exists.return_value = False + fs.read.return_value = "" + fs.write.return_value = None + fs.get_hash_from_file.return_value = "hash123" + return fs + + +def _make_pipeline_context( + executor_params: dict, + run_id: str = "run-1", + organization_id: str = "org-1", +) -> ExecutionContext: + """Build a structure_pipeline ExecutionContext.""" + return ExecutionContext( + executor_name="legacy", + operation=Operation.STRUCTURE_PIPELINE.value, + run_id=run_id, + execution_source="tool", + organization_id=organization_id, + request_id="req-1", + executor_params=executor_params, + ) + + +def _base_extract_params() -> dict: + """Extract params template.""" + return { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "sk-test", + "usage_kwargs": {"run_id": "run-1", "file_name": "test.pdf"}, + } + + +def _base_index_template() -> dict: + """Index template.""" + return { + "tool_id": "tool-1", + "file_hash": "hash-abc", + "is_highlight_enabled": False, + "platform_api_key": "sk-test", + "extracted_file_path": "/data/exec/EXTRACT", + } + + +def _base_answer_params() -> dict: + """Answer params (payload for answer_prompt).""" + return { + "run_id": "run-1", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "challenge_llm": "", + "enable_challenge": False, + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [ + { + "name": "field_a", + "prompt": "What is the revenue?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "retrieval-strategy": "simple", + "similarity-top-k": 5, + }, + ], + "tool_id": "tool-1", + "file_hash": "hash-abc", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "custom_data": {}, + "PLATFORM_SERVICE_API_KEY": "sk-test", + } + + +def _base_pipeline_options() -> dict: + """Default pipeline options.""" + return { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + } + + +# --------------------------------------------------------------------------- +# Tests — Operation enum and routing +# --------------------------------------------------------------------------- + + +class TestStructurePipelineEnum: + """Verify enum and operation map registration.""" + + def test_operation_enum_exists(self): + assert Operation.STRUCTURE_PIPELINE.value == "structure_pipeline" + + def test_operation_map_has_structure_pipeline(self, executor): + assert "structure_pipeline" in executor._OPERATION_MAP + + +# --------------------------------------------------------------------------- +# Tests — Normal pipeline: extract → index → answer_prompt +# --------------------------------------------------------------------------- + + +class TestNormalPipeline: + """Normal pipeline: extract + index + answer_prompt.""" + + def test_extract_index_answer(self, executor): + """Full pipeline calls extract, index, and answer_prompt.""" + extract_result = ExecutionResult( + success=True, data={"extracted_text": "Revenue is $1M"} + ) + index_result = ExecutionResult( + success=True, data={"doc_id": "doc-1"} + ) + answer_result = ExecutionResult( + success=True, + data={ + "output": {"field_a": "$1M"}, + "metadata": {}, + "metrics": {"field_a": {"llm": {"time_taken(s)": 1.0}}}, + }, + ) + + executor._handle_extract = MagicMock(return_value=extract_result) + executor._handle_index = MagicMock(return_value=index_result) + executor._handle_answer_prompt = MagicMock( + return_value=answer_result + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert executor._handle_extract.call_count == 1 + assert executor._handle_index.call_count == 1 + assert executor._handle_answer_prompt.call_count == 1 + + def test_result_has_metadata_and_file_name(self, executor): + """Result includes source_file_name in metadata.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}, "metadata": {}} + ) + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert result.data["metadata"]["file_name"] == "test.pdf" + + def test_extracted_text_in_metadata(self, executor): + """Extracted text is added to result metadata.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "Revenue $1M"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.data["metadata"]["extracted_text"] == "Revenue $1M" + + def test_index_metrics_merged(self, executor): + """Index metrics are merged into answer metrics.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, + data={ + "output": {}, + "metrics": { + "field_a": {"llm": {"time_taken(s)": 2.0}}, + }, + }, + ) + ) + # Simulate index metrics by patching _run_pipeline_index + executor._run_pipeline_index = MagicMock( + return_value={ + "field_a": {"indexing": {"time_taken(s)": 0.5}}, + } + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + metrics = result.data["metrics"] + # Both llm and indexing metrics for field_a should be merged + assert "llm" in metrics["field_a"] + assert "indexing" in metrics["field_a"] + + +# --------------------------------------------------------------------------- +# Tests — Extract failure propagation +# --------------------------------------------------------------------------- + + +class TestExtractFailure: + """Extract failure stops the pipeline.""" + + def test_extract_failure_stops_pipeline(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult.failure(error="x2text error") + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock() + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert not result.success + assert "x2text error" in result.error + executor._handle_index.assert_not_called() + executor._handle_answer_prompt.assert_not_called() + + +# --------------------------------------------------------------------------- +# Tests — Skip extraction (smart table) +# --------------------------------------------------------------------------- + + +class TestSkipExtraction: + """Smart table: skip extract+index, use source file.""" + + def test_skip_extraction_uses_input_file(self, executor): + executor._handle_extract = MagicMock() + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["skip_extraction_and_indexing"] = True + answer = _base_answer_params() + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_extract.assert_not_called() + executor._handle_index.assert_not_called() + # file_path should be set to input_file_path + call_ctx = executor._handle_answer_prompt.call_args[0][0] + assert call_ctx.executor_params["file_path"] == "/data/test.pdf" + + def test_skip_extraction_table_settings_injection(self, executor): + """Table settings get input_file when extraction is skipped.""" + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["skip_extraction_and_indexing"] = True + answer = _base_answer_params() + answer["outputs"][0]["table_settings"] = { + "is_directory_mode": False, + } + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + ts = answer["outputs"][0]["table_settings"] + assert ts["input_file"] == "/data/test.pdf" + + +# --------------------------------------------------------------------------- +# Tests — Single pass extraction +# --------------------------------------------------------------------------- + + +class TestSinglePass: + """Single pass: extract + answer_prompt (no indexing).""" + + def test_single_pass_skips_index(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_single_pass_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_extract.assert_called_once() + executor._handle_index.assert_not_called() + executor._handle_answer_prompt.assert_called_once() + + def test_single_pass_operation_is_single_pass(self, executor): + """The answer_prompt call uses single_pass_extraction operation.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_single_pass_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + }) + executor._handle_structure_pipeline(ctx) + + call_ctx = executor._handle_answer_prompt.call_args[0][0] + assert call_ctx.operation == "single_pass_extraction" + + +# --------------------------------------------------------------------------- +# Tests — Summarize pipeline +# --------------------------------------------------------------------------- + + +class TestSummarizePipeline: + """Summarize: extract + summarize + answer_prompt (no indexing).""" + + @patch(_PATCH_FILE_UTILS) + def test_summarize_calls_handle_summarize( + self, mock_get_fs, executor, mock_fs + ): + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = False + mock_fs.read.return_value = "extracted text for summarize" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock( + return_value=ExecutionResult( + success=True, data={"data": "summarized text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize this", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": ["field_a"], + }, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_summarize.assert_called_once() + executor._handle_index.assert_not_called() + + @patch(_PATCH_FILE_UTILS) + def test_summarize_uses_cache(self, mock_get_fs, executor, mock_fs): + """If cached summary exists, _handle_summarize is NOT called.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = True + mock_fs.read.return_value = "cached summary" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize this", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": ["field_a"], + }, + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_summarize.assert_not_called() + + @patch(_PATCH_FILE_UTILS) + def test_summarize_updates_answer_params( + self, mock_get_fs, executor, mock_fs + ): + """After summarize, answer_params file_path and hash are updated.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = False + mock_fs.read.return_value = "doc text" + mock_fs.get_hash_from_file.return_value = "sum-hash-456" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_summarize = MagicMock( + return_value=ExecutionResult( + success=True, data={"data": "summarized"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": [], + }, + }) + executor._handle_structure_pipeline(ctx) + + # Check answer_params were updated + assert answer["file_hash"] == "sum-hash-456" + assert answer["file_path"] == "/data/exec/SUMMARIZE" + + @patch(_PATCH_FILE_UTILS) + def test_summarize_sets_chunk_size_zero( + self, mock_get_fs, executor, mock_fs + ): + """Summarize sets chunk-size=0 for all outputs.""" + mock_get_fs.return_value = mock_fs + mock_fs.exists.return_value = True + mock_fs.read.return_value = "cached" + + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "t"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + opts = _base_pipeline_options() + opts["is_summarization_enabled"] = True + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": opts, + "summarize_params": { + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize", + "extract_file_path": "/data/exec/EXTRACT", + "summarize_file_path": "/data/exec/SUMMARIZE", + "platform_api_key": "sk-test", + "prompt_keys": [], + }, + }) + executor._handle_structure_pipeline(ctx) + + # Outputs should have chunk-size=0 + for output in answer["outputs"]: + assert output["chunk-size"] == 0 + assert output["chunk-overlap"] == 0 + + +# --------------------------------------------------------------------------- +# Tests — Index dedup +# --------------------------------------------------------------------------- + + +class TestIndexDedup: + """Index step deduplication.""" + + def test_index_dedup_skips_duplicate_params(self, executor): + """Duplicate param combos are only indexed once.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + index_call_count = 0 + original_index = executor._handle_index + + def counting_index(ctx): + nonlocal index_call_count + index_call_count += 1 + return ExecutionResult(success=True, data={"doc_id": "d1"}) + + executor._handle_index = counting_index + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + # Add a second output with same adapter params + answer["outputs"].append({ + "name": "field_b", + "prompt": "What is the profit?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 128, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + }) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + # Only one index call despite two outputs (same params) + assert index_call_count == 1 + + def test_index_different_params_indexes_both(self, executor): + """Different param combos are indexed separately.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + index_call_count = 0 + + def counting_index(ctx): + nonlocal index_call_count + index_call_count += 1 + return ExecutionResult(success=True, data={"doc_id": "d1"}) + + executor._handle_index = counting_index + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + answer["outputs"].append({ + "name": "field_b", + "prompt": "What is the profit?", + "type": "text", + "active": True, + "chunk-size": 256, # Different chunk size + "chunk-overlap": 64, + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + }) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + assert index_call_count == 2 + + def test_chunk_size_zero_skips_index(self, executor): + """chunk-size=0 outputs skip indexing entirely.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock() + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + answer = _base_answer_params() + answer["outputs"][0]["chunk-size"] = 0 + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": answer, + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert result.success + executor._handle_index.assert_not_called() + + +# --------------------------------------------------------------------------- +# Tests — Answer prompt failure +# --------------------------------------------------------------------------- + + +class TestAnswerPromptFailure: + """Answer prompt failure propagates correctly.""" + + def test_answer_failure_propagates(self, executor): + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult.failure(error="LLM timeout") + ) + + ctx = _make_pipeline_context({ + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }) + result = executor._handle_structure_pipeline(ctx) + + assert not result.success + assert "LLM timeout" in result.error + + +# --------------------------------------------------------------------------- +# Tests — Merge metrics utility +# --------------------------------------------------------------------------- + + +class TestMergeMetrics: + """Test _merge_pipeline_metrics.""" + + def test_merge_disjoint(self, executor): + m = executor._merge_pipeline_metrics( + {"a": {"x": 1}}, {"b": {"y": 2}} + ) + assert m == {"a": {"x": 1}, "b": {"y": 2}} + + def test_merge_overlapping(self, executor): + m = executor._merge_pipeline_metrics( + {"a": {"x": 1}}, {"a": {"y": 2}} + ) + assert m == {"a": {"x": 1, "y": 2}} + + def test_merge_non_dict_values(self, executor): + m = executor._merge_pipeline_metrics( + {"a": 1}, {"b": 2} + ) + assert m == {"a": 1, "b": 2} + + +# --------------------------------------------------------------------------- +# Tests — Sub-context creation +# --------------------------------------------------------------------------- + + +class TestSubContextCreation: + """Verify sub-contexts inherit parent context fields.""" + + def test_extract_context_inherits_fields(self, executor): + """Extract sub-context gets run_id, org_id, etc. from parent.""" + executor._handle_extract = MagicMock( + return_value=ExecutionResult( + success=True, data={"extracted_text": "text"} + ) + ) + executor._handle_index = MagicMock( + return_value=ExecutionResult( + success=True, data={"doc_id": "d1"} + ) + ) + executor._handle_answer_prompt = MagicMock( + return_value=ExecutionResult( + success=True, data={"output": {}} + ) + ) + + ctx = _make_pipeline_context( + { + "extract_params": _base_extract_params(), + "index_template": _base_index_template(), + "answer_params": _base_answer_params(), + "pipeline_options": _base_pipeline_options(), + }, + run_id="custom-run", + organization_id="custom-org", + ) + executor._handle_structure_pipeline(ctx) + + extract_ctx = executor._handle_extract.call_args[0][0] + assert extract_ctx.run_id == "custom-run" + assert extract_ctx.organization_id == "custom-org" + assert extract_ctx.operation == "extract" + + index_ctx = executor._handle_index.call_args[0][0] + assert index_ctx.run_id == "custom-run" + assert index_ctx.operation == "index" + + answer_ctx = executor._handle_answer_prompt.call_args[0][0] + assert answer_ctx.run_id == "custom-run" + assert answer_ctx.operation == "answer_prompt" diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index 6c543f8f34..5b41a02206 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -1,9 +1,10 @@ """Phase 3-SANITY — Integration tests for the structure tool Celery task. Tests the full structure tool pipeline with mocked platform API and -ExecutionDispatcher. Validates that execute_structure_tool correctly -orchestrates extract → index → answer_prompt operations and writes -output to filesystem. +ExecutionDispatcher. After Phase 5E, the structure tool task dispatches a +single ``structure_pipeline`` operation to the executor worker instead of +3 sequential dispatches. These tests verify the correct pipeline params +are assembled and the result is written to filesystem. """ import json @@ -127,16 +128,20 @@ def base_params(): } -def _make_dispatch_side_effect(operation_results: dict): - """Create a side_effect for dispatcher.dispatch that returns results by operation.""" - - def side_effect(ctx, timeout=None): - op = ctx.operation - if op in operation_results: - return operation_results[op] - return ExecutionResult(success=True, data={}) - - return side_effect +def _make_pipeline_result( + output: dict | None = None, + metadata: dict | None = None, + metrics: dict | None = None, +) -> ExecutionResult: + """Create a mock structure_pipeline result.""" + return ExecutionResult( + success=True, + data={ + "output": output or {}, + "metadata": metadata or {}, + "metrics": metrics or {}, + }, + ) # --------------------------------------------------------------------------- @@ -152,14 +157,14 @@ def test_task_enum_registered(self): assert str(TaskName.EXECUTE_STRUCTURE_TOOL) == "execute_structure_tool" -class TestStructureToolExtractIndexAnswer: - """Full pipeline: extract → index → answer_prompt.""" +class TestStructureToolPipeline: + """Full pipeline dispatched as single structure_pipeline operation.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @patch(_PATCH_PLATFORM_HELPER) @patch(_PATCH_DISPATCHER) - def test_structure_tool_extract_index_answer( + def test_structure_tool_single_dispatch( self, MockDispatcher, mock_create_ph, @@ -170,12 +175,11 @@ def test_structure_tool_extract_index_answer( mock_fs, mock_platform_helper, ): - """Full pipeline: extract → index → answer_prompt.""" + """Single structure_pipeline dispatch for extract+index+answer.""" from file_processing.structure_tool_task import ( _execute_structure_tool_impl as execute_structure_tool, ) - # Setup mocks mock_get_fs.return_value = mock_fs mock_create_ph.return_value = mock_platform_helper mock_platform_helper.get_prompt_studio_tool.return_value = { @@ -185,43 +189,89 @@ def test_structure_tool_extract_index_answer( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - extract_result = ExecutionResult( - success=True, - data={"extracted_text": "Revenue is $1M"}, - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "$1M"}, - "metadata": {"run_id": "fexec-789"}, - "metrics": {"field_a": {"extraction_llm": {"tokens": 50}}}, - }, + pipeline_result = _make_pipeline_result( + output={"field_a": "$1M"}, + metadata={"run_id": "fexec-789", "file_name": "test.pdf"}, + metrics={"field_a": {"extraction_llm": {"tokens": 50}}}, ) - # extract, index, answer_prompt - dispatcher_instance.dispatch.side_effect = [ - extract_result, - ExecutionResult(success=True, data={"doc_id": "doc-1"}), - answer_result, - ] + dispatcher_instance.dispatch.return_value = pipeline_result result = execute_structure_tool(base_params) assert result["success"] is True assert result["data"]["output"]["field_a"] == "$1M" assert result["data"]["metadata"]["file_name"] == "test.pdf" - # Verify output was written mock_fs.json_dump.assert_called_once() - # Verify dispatcher was called 3 times (extract, index, answer) - assert dispatcher_instance.dispatch.call_count == 3 - calls = dispatcher_instance.dispatch.call_args_list - assert calls[0][0][0].operation == "extract" - assert calls[1][0][0].operation == "index" - assert calls[2][0][0].operation == "answer_prompt" + # Single dispatch with structure_pipeline + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + assert ctx.execution_source == "tool" + assert ctx.executor_name == "legacy" + + @patch(_PATCH_SHIM) + @patch(_PATCH_FILE_STORAGE) + @patch(_PATCH_PLATFORM_HELPER) + @patch(_PATCH_DISPATCHER) + def test_pipeline_params_structure( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + base_params, + tool_metadata_regular, + mock_fs, + mock_platform_helper, + ): + """Verify executor_params contains all pipeline sub-params.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl as execute_structure_tool, + ) + + mock_get_fs.return_value = mock_fs + mock_create_ph.return_value = mock_platform_helper + mock_platform_helper.get_prompt_studio_tool.return_value = { + "tool_metadata": tool_metadata_regular, + } + + dispatcher_instance = MagicMock() + MockDispatcher.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result() + + execute_structure_tool(base_params) + + ctx = dispatcher_instance.dispatch.call_args[0][0] + ep = ctx.executor_params + + # All required keys present + assert "extract_params" in ep + assert "index_template" in ep + assert "answer_params" in ep + assert "pipeline_options" in ep + + # Extract params + assert ep["extract_params"]["file_path"] == "/data/test.pdf" + + # Index template + assert ep["index_template"]["tool_id"] == "tool-123" + assert ep["index_template"]["file_hash"] == "filehash123" + + # Answer params + assert ep["answer_params"]["tool_id"] == "tool-123" + assert ep["answer_params"]["run_id"] == "fexec-789" + + # Pipeline options (normal flow) + opts = ep["pipeline_options"] + assert opts["skip_extraction_and_indexing"] is False + assert opts["is_summarization_enabled"] is False + assert opts["is_single_pass_enabled"] is False + assert opts["source_file_name"] == "test.pdf" class TestStructureToolSinglePass: - """Single-pass flag skips indexing, uses single_pass_extraction.""" + """Single-pass flag passed to pipeline_options.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -248,41 +298,27 @@ def test_structure_tool_single_pass( "tool_metadata": tool_metadata_regular, } - # Enable single pass base_params["tool_instance_metadata"]["single_pass_extraction_mode"] = True dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "answer"}, - "metadata": {}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, ) - # extract, then single_pass_extraction (no index) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - answer_result, - ] result = execute_structure_tool(base_params) assert result["success"] is True - # Should be 2 calls: extract + single_pass_extraction (no index) - assert dispatcher_instance.dispatch.call_count == 2 - calls = dispatcher_instance.dispatch.call_args_list - assert calls[0][0][0].operation == "extract" - assert calls[1][0][0].operation == "single_pass_extraction" + # Single dispatch with is_single_pass_enabled flag + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + opts = ctx.executor_params["pipeline_options"] + assert opts["is_single_pass_enabled"] is True class TestStructureToolSummarize: - """Summarization path: extract → summarize → index → answer.""" + """Summarization params passed to pipeline.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -309,55 +345,38 @@ def test_structure_tool_summarize_flow( "tool_metadata": tool_metadata_regular, } - # Add summarize settings tool_metadata_regular["tool_settings"]["summarize_prompt"] = ( "Summarize this doc" ) base_params["tool_instance_metadata"]["summarize_as_source"] = True - # Mock that extract file exists for reading - mock_fs.exists.return_value = False # No cached summary - mock_fs.read.return_value = "Full extracted text" - dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "Full text"} - ) - summarize_result = ExecutionResult( - success=True, data={"data": "Summarized text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "answer"}, - "metadata": {}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, ) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - summarize_result, - answer_result, - ] result = execute_structure_tool(base_params) assert result["success"] is True - # extract + summarize + answer (no index because summarize changes payload) - assert dispatcher_instance.dispatch.call_count == 3 - calls = dispatcher_instance.dispatch.call_args_list - assert calls[0][0][0].operation == "extract" - assert calls[1][0][0].operation == "summarize" - assert calls[2][0][0].operation == "answer_prompt" + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" - # Verify summarized text was written to cache - mock_fs.write.assert_called() + opts = ctx.executor_params["pipeline_options"] + assert opts["is_summarization_enabled"] is True + + # Summarize params included + sp = ctx.executor_params["summarize_params"] + assert sp is not None + assert sp["summarize_prompt"] == "Summarize this doc" + assert sp["llm_adapter_instance_id"] == "llm-1" + assert "extract_file_path" in sp + assert "summarize_file_path" in sp class TestStructureToolSmartTable: - """Excel with valid JSON schema skips extract and index.""" + """Excel with valid JSON schema sets skip_extraction_and_indexing.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -381,7 +400,6 @@ def test_structure_tool_skip_extraction_smart_table( mock_get_fs.return_value = mock_fs mock_create_ph.return_value = mock_platform_helper - # Add table_settings with a valid JSON prompt tool_metadata_regular["outputs"][0]["table_settings"] = { "is_directory_mode": False, } @@ -393,25 +411,19 @@ def test_structure_tool_skip_extraction_smart_table( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "table_answer"}, - "metadata": {}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "table_answer"}, ) - # Only answer_prompt (skip extract and index) - dispatcher_instance.dispatch.side_effect = [answer_result] result = execute_structure_tool(base_params) assert result["success"] is True - # Only 1 call: answer_prompt (no extract, no index) + # Single pipeline dispatch with skip flag assert dispatcher_instance.dispatch.call_count == 1 - calls = dispatcher_instance.dispatch.call_args_list - assert calls[0][0][0].operation == "answer_prompt" + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + opts = ctx.executor_params["pipeline_options"] + assert opts["skip_extraction_and_indexing"] is True class TestStructureToolAgentic: @@ -471,7 +483,7 @@ def test_structure_tool_agentic_routing( class TestStructureToolProfileOverrides: - """Profile overrides modify tool_metadata correctly.""" + """Profile overrides modify tool_metadata before pipeline dispatch.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -498,7 +510,6 @@ def test_structure_tool_profile_overrides( "tool_metadata": tool_metadata_regular, } - # Add profile override base_params["exec_metadata"]["llm_profile_id"] = "profile-1" mock_platform_helper.get_llm_profile.return_value = { "profile_name": "Test Profile", @@ -507,41 +518,25 @@ def test_structure_tool_profile_overrides( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "answer"}, - "metadata": {}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, ) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - ExecutionResult(success=True, data={"doc_id": "d1"}), - answer_result, - ] result = execute_structure_tool(base_params) assert result["success"] is True - # Verify profile override was applied mock_platform_helper.get_llm_profile.assert_called_once_with("profile-1") - # The tool_settings should now have llm overridden assert tool_metadata_regular["tool_settings"]["llm"] == "llm-override" -class TestStructureToolExtractFailure: - """Dispatcher extract failure → task returns failure.""" +class TestStructureToolPipelineFailure: + """Pipeline failure propagated to caller.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @patch(_PATCH_PLATFORM_HELPER) @patch(_PATCH_DISPATCHER) - def test_structure_tool_extract_failure( + def test_structure_tool_pipeline_failure( self, MockDispatcher, mock_create_ph, @@ -565,27 +560,26 @@ def test_structure_tool_extract_failure( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - extract_failure = ExecutionResult.failure( + pipeline_failure = ExecutionResult.failure( error="X2Text adapter error: connection refused" ) - dispatcher_instance.dispatch.return_value = extract_failure + dispatcher_instance.dispatch.return_value = pipeline_failure result = execute_structure_tool(base_params) assert result["success"] is False assert "X2Text" in result["error"] - # Should only call extract, then bail assert dispatcher_instance.dispatch.call_count == 1 -class TestStructureToolIndexDedup: - """Same (chunk_size, overlap, vdb, emb) combo indexed only once.""" +class TestStructureToolMultipleOutputs: + """Multiple outputs are passed to executor in answer_params.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @patch(_PATCH_PLATFORM_HELPER) @patch(_PATCH_DISPATCHER) - def test_structure_tool_index_dedup( + def test_structure_tool_multiple_outputs( self, MockDispatcher, mock_create_ph, @@ -614,38 +608,20 @@ def test_structure_tool_index_dedup( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "a", "field_b": "b"}, ) - index_result = ExecutionResult( - success=True, data={"doc_id": "d1"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "a", "field_b": "b"}, - "metadata": {}, - "metrics": {}, - }, - ) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - index_result, # Only ONE index call despite 2 outputs - answer_result, - ] result = execute_structure_tool(base_params) assert result["success"] is True - # 3 calls: extract + 1 index (deduped) + answer - assert dispatcher_instance.dispatch.call_count == 3 - index_calls = [ - c - for c in dispatcher_instance.dispatch.call_args_list - if c[0][0].operation == "index" - ] - assert len(index_calls) == 1 + # Single dispatch — index dedup handled inside executor + assert dispatcher_instance.dispatch.call_count == 1 + ctx = dispatcher_instance.dispatch.call_args[0][0] + outputs = ctx.executor_params["answer_params"]["outputs"] + assert len(outputs) == 2 + assert outputs[0]["name"] == "field_a" + assert outputs[1]["name"] == "field_b" class TestStructureToolOutputWritten: @@ -678,23 +654,9 @@ def test_structure_tool_output_written( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "answer"}, - "metadata": {}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, ) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - ExecutionResult(success=True, data={"doc_id": "d1"}), - answer_result, - ] result = execute_structure_tool(base_params) @@ -706,15 +668,13 @@ def test_structure_tool_output_written( "path", json_dump_call[1].get("path") if len(json_dump_call) > 1 else None ) if output_path is None: - # Try positional output_path = json_dump_call[0][0] if json_dump_call[0] else None - # Verify it ends with test.json (stem of test.pdf) assert str(output_path).endswith("test.json") class TestStructureToolMetadataFileName: - """metadata.file_name replaced with actual source filename.""" + """metadata.file_name in pipeline result preserved.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -743,39 +703,25 @@ def test_structure_tool_metadata_file_name( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "answer"}, - "metadata": {"run_id": "123"}, - "metrics": {}, - }, + dispatcher_instance.dispatch.return_value = _make_pipeline_result( + output={"field_a": "answer"}, + metadata={"run_id": "123", "file_name": "test.pdf"}, ) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - ExecutionResult(success=True, data={"doc_id": "d1"}), - answer_result, - ] result = execute_structure_tool(base_params) assert result["success"] is True - # file_name in metadata should be the source_file_name assert result["data"]["metadata"]["file_name"] == "test.pdf" -class TestStructureToolSummarizeCache: - """Cached summary file skips dispatcher call.""" +class TestStructureToolNoSummarize: + """No summarize_params when summarization is not enabled.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @patch(_PATCH_PLATFORM_HELPER) @patch(_PATCH_DISPATCHER) - def test_summarize_cache_hit( + def test_no_summarize_params_when_disabled( self, MockDispatcher, mock_create_ph, @@ -792,47 +738,19 @@ def test_summarize_cache_hit( mock_get_fs.return_value = mock_fs mock_create_ph.return_value = mock_platform_helper - - tool_metadata_regular["tool_settings"]["summarize_prompt"] = ( - "Summarize" - ) mock_platform_helper.get_prompt_studio_tool.return_value = { "tool_metadata": tool_metadata_regular, } - base_params["tool_instance_metadata"]["summarize_as_source"] = True - - # Simulate cached summary exists - mock_fs.exists.return_value = True - mock_fs.read.return_value = "Cached summary text" - dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance + dispatcher_instance.dispatch.return_value = _make_pipeline_result() - extract_result = ExecutionResult( - success=True, data={"extracted_text": "text"} - ) - answer_result = ExecutionResult( - success=True, - data={ - "output": {"field_a": "from cache"}, - "metadata": {}, - "metrics": {}, - }, - ) - # extract + answer (no summarize call because cache hit) - dispatcher_instance.dispatch.side_effect = [ - extract_result, - answer_result, - ] - - result = execute_structure_tool(base_params) + execute_structure_tool(base_params) - assert result["success"] is True - # Should be 2 calls: extract + answer (no summarize) - assert dispatcher_instance.dispatch.call_count == 2 - ops = [c[0][0].operation for c in dispatcher_instance.dispatch.call_args_list] - assert "summarize" not in ops + ctx = dispatcher_instance.dispatch.call_args[0][0] + assert ctx.executor_params["summarize_params"] is None + assert ctx.executor_params["pipeline_options"]["is_summarization_enabled"] is False class TestWorkflowServiceDetection: @@ -981,16 +899,3 @@ def test_should_skip_extraction_with_json_schema(self): is True ) - def test_merge_metrics(self): - from file_processing.structure_tool_task import _merge_metrics - - m1 = {"field_a": {"extraction_llm": {"tokens": 50}}} - m2 = {"field_a": {"indexing": {"time_taken(s)": 1.5}}} - merged = _merge_metrics(m1, m2) - assert "extraction_llm" in merged["field_a"] - assert "indexing" in merged["field_a"] - - def test_merge_metrics_empty(self): - from file_processing.structure_tool_task import _merge_metrics - - assert _merge_metrics({}, {}) == {} diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py new file mode 100644 index 0000000000..62d534d04c --- /dev/null +++ b/workers/tests/test_sanity_phase5.py @@ -0,0 +1,853 @@ +"""Phase 5-SANITY — Integration tests for the multi-hop elimination. + +Phase 5 eliminates idle backend worker slots by: + - Adding ``dispatch_with_callback`` (fire-and-forget with link/link_error) + - Adding compound operations: ``ide_index``, ``structure_pipeline`` + - Rewiring structure_tool_task to single ``structure_pipeline`` dispatch + +These tests push payloads through the full Celery eager-mode chain and +verify the results match what callers (views / structure_tool_task) expect. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from executor.executors.constants import ( + IndexingConstants as IKeys, + PromptServiceConstants as PSKeys, +) +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + +# --------------------------------------------------------------------------- +# Patch targets +# --------------------------------------------------------------------------- + +_PATCH_X2TEXT = "executor.executors.legacy_executor.X2Text" +_PATCH_FS = "executor.executors.legacy_executor.FileUtils.get_fs_instance" +_PATCH_INDEX_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_indexing_deps" +) +_PATCH_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) +_PATCH_SHIM = "executor.executors.legacy_executor.ExecutorToolShim" +_PATCH_RUN_COMPLETION = ( + "executor.executors.answer_prompt.AnswerPromptService.run_completion" +) +_PATCH_INDEX_UTILS = ( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _ensure_legacy_registered(): + """Ensure LegacyExecutor is registered.""" + from executor.executors.legacy_executor import LegacyExecutor + + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry._registry["legacy"] = LegacyExecutor + yield + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_task(eager_app, ctx_dict): + """Run execute_extraction task via task.apply() (eager-safe).""" + task = eager_app.tasks["execute_extraction"] + result = task.apply(args=[ctx_dict]) + return result.get() + + +def _mock_llm(answer="pipeline answer"): + """Create a mock LLM matching the answer_prompt pattern.""" + llm = MagicMock(name="llm") + response = MagicMock() + response.text = answer + llm.complete.return_value = { + PSKeys.RESPONSE: response, + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.CONFIDENCE_DATA: None, + PSKeys.WORD_CONFIDENCE_DATA: None, + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {"tokens": 42} + return llm + + +def _mock_prompt_deps(llm=None): + """Return 7-tuple matching _get_prompt_deps() shape.""" + if llm is None: + llm = _mock_llm() + + from executor.executors.answer_prompt import AnswerPromptService + + RetrievalService = MagicMock(name="RetrievalService") + RetrievalService.run_retrieval.return_value = ["chunk1"] + RetrievalService.retrieve_complete_context.return_value = ["full doc"] + + VariableReplacementService = MagicMock(name="VariableReplacementService") + VariableReplacementService.is_variables_present.return_value = False + + Index = MagicMock(name="Index") + index_instance = MagicMock() + index_instance.generate_index_key.return_value = "doc-key-1" + Index.return_value = index_instance + + LLM_cls = MagicMock(name="LLM") + LLM_cls.return_value = llm + + EmbeddingCompat = MagicMock(name="EmbeddingCompat") + VectorDB = MagicMock(name="VectorDB") + + return ( + AnswerPromptService, + RetrievalService, + VariableReplacementService, + Index, + LLM_cls, + EmbeddingCompat, + VectorDB, + ) + + +def _mock_process_response(text="extracted text"): + """Build a mock TextExtractionResult.""" + from unstract.sdk1.adapters.x2text.dto import ( + TextExtractionMetadata, + TextExtractionResult, + ) + + metadata = TextExtractionMetadata(whisper_hash="test-hash") + return TextExtractionResult( + extracted_text=text, + extraction_metadata=metadata, + ) + + +def _make_output(name="field_a", prompt="What is the revenue?", **overrides): + """Build an output dict for answer_prompt payloads.""" + d = { + PSKeys.NAME: name, + PSKeys.PROMPT: prompt, + PSKeys.TYPE: "text", + "chunk-size": 512, + "chunk-overlap": 64, + "retrieval-strategy": "simple", + "llm": "llm-1", + "embedding": "emb-1", + "vector-db": "vdb-1", + "x2text_adapter": "x2t-1", + "similarity-top-k": 3, + "active": True, + } + d.update(overrides) + return d + + +# --------------------------------------------------------------------------- +# 5A: dispatch_with_callback +# --------------------------------------------------------------------------- + + +class TestDispatchWithCallback: + """Verify dispatch_with_callback passes link/link_error to send_task.""" + + def test_callback_kwargs_passed(self): + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="task-123") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + + ctx = ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-cb-1", + execution_source="ide", + ) + on_success = MagicMock(name="success_sig") + on_error = MagicMock(name="error_sig") + + result = dispatcher.dispatch_with_callback( + ctx, + on_success=on_success, + on_error=on_error, + task_id="pre-generated-id", + ) + + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs["link"] is on_success + assert call_kwargs.kwargs["link_error"] is on_error + assert call_kwargs.kwargs["task_id"] == "pre-generated-id" + assert result.id == "task-123" + + def test_no_callbacks_omits_link_kwargs(self): + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="task-456") + dispatcher = ExecutionDispatcher(celery_app=mock_app) + + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-cb-2", + execution_source="tool", + ) + dispatcher.dispatch_with_callback(ctx) + + call_kwargs = mock_app.send_task.call_args + assert "link" not in call_kwargs.kwargs + assert "link_error" not in call_kwargs.kwargs + + def test_no_app_raises(self): + dispatcher = ExecutionDispatcher(celery_app=None) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-cb-3", + execution_source="tool", + ) + with pytest.raises(ValueError, match="No Celery app"): + dispatcher.dispatch_with_callback(ctx) + + +# --------------------------------------------------------------------------- +# 5C: ide_index compound operation through eager chain +# --------------------------------------------------------------------------- + + +class TestIdeIndexEagerChain: + """ide_index: extract + index in a single executor invocation.""" + + @patch(_PATCH_INDEX_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_ide_index_success( + self, + MockShim, + MockX2Text, + mock_fs, + mock_index_deps, + eager_app, + ): + """Full ide_index through eager chain returns doc_id.""" + # Mock extract + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response( + "IDE extracted text" + ) + MockX2Text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + # Mock index + index_inst = MagicMock() + index_inst.index.return_value = "idx-doc-1" + index_inst.generate_index_key.return_value = "idx-key-1" + mock_index_deps.return_value = ( + MagicMock(return_value=index_inst), # Index + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ) + + ctx = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id="run-ide-idx", + execution_source="ide", + organization_id="org-test", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/doc.pdf", + "enable_highlight": False, + "output_file_path": "/data/extract/doc.txt", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_params": { + "tool_id": "tool-1", + "embedding_instance_id": "emb-1", + "vector_db_instance_id": "vdb-1", + "x2text_instance_id": "x2t-1", + "file_path": "/data/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": True, + "enable_highlight": False, + "usage_kwargs": {}, + "run_id": "run-ide-idx", + "execution_source": "ide", + "platform_api_key": "pk-test", + }, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "doc_id" in result.data + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_ide_index_extract_failure( + self, + MockShim, + MockX2Text, + mock_fs, + eager_app, + ): + """ide_index returns failure if extract fails.""" + x2t_instance = MagicMock() + x2t_instance.process.side_effect = Exception("X2Text unavailable") + MockX2Text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + ctx = ExecutionContext( + executor_name="legacy", + operation="ide_index", + run_id="run-ide-fail", + execution_source="ide", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/doc.pdf", + "enable_highlight": False, + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_params": { + "tool_id": "tool-1", + "embedding_instance_id": "emb-1", + "vector_db_instance_id": "vdb-1", + "x2text_instance_id": "x2t-1", + "file_path": "/data/extract/doc.txt", + "file_hash": None, + "chunk_overlap": 64, + "chunk_size": 512, + "reindex": True, + "enable_highlight": False, + "usage_kwargs": {}, + "run_id": "run-ide-fail", + "execution_source": "ide", + "platform_api_key": "pk-test", + }, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + result = ExecutionResult.from_dict(result_dict) + assert not result.success + assert "X2Text" in result.error + + +# --------------------------------------------------------------------------- +# 5D: structure_pipeline compound operation through eager chain +# --------------------------------------------------------------------------- + + +class TestStructurePipelineEagerChain: + """structure_pipeline: full extract→index→answer through eager chain.""" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-pipeline") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_INDEX_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_normal( + self, + MockShim, + MockX2Text, + mock_fs, + mock_index_deps, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Normal pipeline: extract → index → answer_prompt.""" + # Mock extract + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response("Revenue is $1M") + MockX2Text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + # Mock index + index_inst = MagicMock() + index_inst.index.return_value = "idx-doc-1" + index_inst.generate_index_key.return_value = "idx-key-1" + mock_index_deps.return_value = ( + MagicMock(return_value=index_inst), + MagicMock(), + MagicMock(), + ) + + # Mock prompt deps + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-1", + execution_source="tool", + organization_id="org-test", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": { + "tool_id": "tool-1", + "file_hash": "hash123", + "is_highlight_enabled": False, + "platform_api_key": "pk-test", + "extracted_file_path": "/data/exec/EXTRACT", + }, + "answer_params": { + "run_id": "run-sp-1", + "execution_id": "exec-1", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output()], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "output" in result.data + assert "metadata" in result.data + # source_file_name injected into metadata + assert result.data["metadata"]["file_name"] == "test.pdf" + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-sp") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_single_pass( + self, + MockShim, + MockX2Text, + mock_fs, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Single pass: extract → single_pass_extraction (no index).""" + x2t_instance = MagicMock() + x2t_instance.process.return_value = _mock_process_response("Revenue data") + MockX2Text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-sp", + execution_source="tool", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "output_file_path": "/data/exec/EXTRACT", + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": {}, + "answer_params": { + "run_id": "run-sp-sp", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": True, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output()], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.pdf", + "file_path": "/data/exec/EXTRACT", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": True, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + assert "output" in result.data + + @patch(_PATCH_INDEX_UTILS, return_value="doc-id-skip") + @patch(_PATCH_PROMPT_DEPS) + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_skip_extraction( + self, + MockShim, + MockX2Text, + mock_fs, + mock_prompt_deps, + _mock_idx_utils, + eager_app, + ): + """Smart table: skip extraction, go straight to answer_prompt.""" + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + mock_prompt_deps.return_value = _mock_prompt_deps() + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-skip", + execution_source="tool", + executor_params={ + "extract_params": {}, + "index_template": {}, + "answer_params": { + "run_id": "run-sp-skip", + "tool_settings": { + "vector-db": "vdb-1", + "embedding": "emb-1", + "x2text_adapter": "x2t-1", + "llm": "llm-1", + "enable_challenge": False, + "challenge_llm": "", + "enable_single_pass_extraction": False, + "summarize_as_source": False, + "enable_highlight": False, + }, + "outputs": [_make_output(prompt='{"key": "value"}')], + "tool_id": "tool-1", + "file_hash": "hash123", + "file_name": "test.xlsx", + "file_path": "/data/test.xlsx", + "execution_source": "tool", + "PLATFORM_SERVICE_API_KEY": "pk-test", + }, + "pipeline_options": { + "skip_extraction_and_indexing": True, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.xlsx", + "source_file_name": "test.xlsx", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert result.success + # No extract was called (X2Text not mocked beyond fixture) + MockX2Text.assert_not_called() + + @patch(_PATCH_FS) + @patch(_PATCH_X2TEXT) + @patch(_PATCH_SHIM) + def test_structure_pipeline_extract_failure( + self, + MockShim, + MockX2Text, + mock_fs, + eager_app, + ): + """Pipeline extract failure propagated as result failure.""" + x2t_instance = MagicMock() + x2t_instance.process.side_effect = Exception("X2Text timeout") + MockX2Text.return_value = x2t_instance + + fs = MagicMock() + fs.exists.return_value = False + mock_fs.return_value = fs + + ctx = ExecutionContext( + executor_name="legacy", + operation="structure_pipeline", + run_id="run-sp-fail", + execution_source="tool", + executor_params={ + "extract_params": { + "x2text_instance_id": "x2t-1", + "file_path": "/data/test.pdf", + "enable_highlight": False, + "platform_api_key": "pk-test", + "usage_kwargs": {}, + }, + "index_template": {}, + "answer_params": {}, + "pipeline_options": { + "skip_extraction_and_indexing": False, + "is_summarization_enabled": False, + "is_single_pass_enabled": False, + "input_file_path": "/data/test.pdf", + "source_file_name": "test.pdf", + }, + "summarize_params": None, + }, + ) + + result_dict = _run_task(eager_app, ctx.to_dict()) + + result = ExecutionResult.from_dict(result_dict) + assert not result.success + assert "X2Text" in result.error + + +# --------------------------------------------------------------------------- +# 5E: structure_tool_task single dispatch verification +# --------------------------------------------------------------------------- + + +class TestStructureToolSingleDispatch: + """Verify structure_tool_task dispatches exactly once.""" + + @patch( + "executor.executor_tool_shim.ExecutorToolShim" + ) + @patch( + "file_processing.structure_tool_task._get_file_storage" + ) + @patch( + "file_processing.structure_tool_task._create_platform_helper" + ) + @patch( + "file_processing.structure_tool_task.ExecutionDispatcher" + ) + def test_single_dispatch_normal( + self, + MockDispatcher, + mock_create_ph, + mock_get_fs, + MockShim, + ): + """Normal path sends single structure_pipeline dispatch.""" + from file_processing.structure_tool_task import ( + _execute_structure_tool_impl, + ) + + fs = MagicMock() + fs.exists.return_value = False + mock_get_fs.return_value = fs + + ph = MagicMock() + ph.get_prompt_studio_tool.return_value = { + "tool_metadata": { + "name": "Test", + "is_agentic": False, + "tool_id": "t1", + "tool_settings": { + "vector-db": "v1", + "embedding": "e1", + "x2text_adapter": "x1", + "llm": "l1", + }, + "outputs": [ + { + "name": "f1", + "prompt": "What?", + "type": "text", + "active": True, + "chunk-size": 512, + "chunk-overlap": 64, + "llm": "l1", + "embedding": "e1", + "vector-db": "v1", + "x2text_adapter": "x1", + }, + ], + }, + } + mock_create_ph.return_value = ph + + dispatcher = MagicMock() + MockDispatcher.return_value = dispatcher + dispatcher.dispatch.return_value = ExecutionResult( + success=True, + data={"output": {"f1": "ans"}, "metadata": {}, "metrics": {}}, + ) + + params = { + "organization_id": "org-1", + "workflow_id": "wf-1", + "execution_id": "ex-1", + "file_execution_id": "fex-1", + "tool_instance_metadata": {"prompt_registry_id": "pr-1"}, + "platform_service_api_key": "pk-1", + "input_file_path": "/data/test.pdf", + "output_dir_path": "/output", + "source_file_name": "test.pdf", + "execution_data_dir": "/data/exec", + "file_hash": "h1", + "exec_metadata": {}, + } + + result = _execute_structure_tool_impl(params) + + assert result["success"] is True + assert dispatcher.dispatch.call_count == 1 + ctx = dispatcher.dispatch.call_args[0][0] + assert ctx.operation == "structure_pipeline" + assert "extract_params" in ctx.executor_params + assert "index_template" in ctx.executor_params + assert "answer_params" in ctx.executor_params + assert "pipeline_options" in ctx.executor_params + + +# --------------------------------------------------------------------------- +# Operation enum completeness +# --------------------------------------------------------------------------- + + +class TestOperationEnum: + """Verify Phase 5 operations registered in enum.""" + + def test_ide_index_operation(self): + assert hasattr(Operation, "IDE_INDEX") + assert Operation.IDE_INDEX.value == "ide_index" + + def test_structure_pipeline_operation(self): + assert hasattr(Operation, "STRUCTURE_PIPELINE") + assert Operation.STRUCTURE_PIPELINE.value == "structure_pipeline" + + +# --------------------------------------------------------------------------- +# Dispatcher modes +# --------------------------------------------------------------------------- + + +class TestDispatcherModes: + """Verify all three dispatch modes work.""" + + def test_dispatch_sync(self): + """dispatch() calls send_task and .get().""" + mock_app = MagicMock() + async_result = MagicMock() + async_result.get.return_value = ExecutionResult( + success=True, data={"test": 1} + ).to_dict() + mock_app.send_task.return_value = async_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r1", + execution_source="tool", + ) + result = dispatcher.dispatch(ctx, timeout=10) + + assert result.success + mock_app.send_task.assert_called_once() + async_result.get.assert_called_once() + + def test_dispatch_async(self): + """dispatch_async() returns task_id without blocking.""" + mock_app = MagicMock() + mock_app.send_task.return_value = MagicMock(id="async-id") + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="r2", + execution_source="tool", + ) + task_id = dispatcher.dispatch_async(ctx) + + assert task_id == "async-id" + mock_app.send_task.assert_called_once() From 2f4f2dc874e8d6e080175c5d3b2cc2f2aa4b87e7 Mon Sep 17 00:00:00 2001 From: Ghost Jake <89829542+Deepak-Kesavan@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:50:41 +0530 Subject: [PATCH 05/64] UN-3234 [FIX] Add beta tag to agentic prompt studio navigation item --- frontend/src/components/navigations/side-nav-bar/SideNavBar.jsx | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/components/navigations/side-nav-bar/SideNavBar.jsx b/frontend/src/components/navigations/side-nav-bar/SideNavBar.jsx index 8a02aede79..74f6c467b7 100644 --- a/frontend/src/components/navigations/side-nav-bar/SideNavBar.jsx +++ b/frontend/src/components/navigations/side-nav-bar/SideNavBar.jsx @@ -494,6 +494,7 @@ const SideNavBar = ({ collapsed, setCollapsed }) => { active: globalThis.location.pathname.startsWith( `/${orgName}/agentic-prompt-studio`, ), + tag: "BETA", }); } From d041201b5aed072d84cf3db8c07952fb90792c29 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 2 Mar 2026 15:56:00 +0530 Subject: [PATCH 06/64] Added executors for agentic prompt studio --- docker/docker-compose.yaml | 2 +- .../src/unstract/sdk1/execution/context.py | 13 +- .../src/unstract/sdk1/execution/dispatcher.py | 34 +- unstract/sdk1/tests/test_execution.py | 6 +- workers/executor/executors/__init__.py | 9 +- workers/executor/executors/answer_prompt.py | 21 +- workers/executor/executors/legacy_executor.py | 145 +++- .../executor/executors/plugins/__init__.py | 3 + workers/executor/executors/plugins/loader.py | 80 ++ .../executor/executors/plugins/protocols.py | 45 ++ .../executors/plugins/text_processor.py | 21 + workers/executor/tasks.py | 7 + .../file_processing/structure_tool_task.py | 8 +- workers/shared/enums/worker_enums_base.py | 6 +- workers/tests/test_executor_sanity.py | 4 +- .../tests/test_legacy_executor_scaffold.py | 26 +- workers/tests/test_phase2f.py | 23 +- workers/tests/test_sanity_phase2.py | 11 +- workers/tests/test_sanity_phase3.py | 17 +- workers/tests/test_sanity_phase6a.py | 284 ++++++++ workers/tests/test_sanity_phase6c.py | 559 ++++++++++++++ workers/tests/test_sanity_phase6d.py | 514 +++++++++++++ workers/tests/test_sanity_phase6e.py | 218 ++++++ workers/tests/test_sanity_phase6f.py | 192 +++++ workers/tests/test_sanity_phase6g.py | 297 ++++++++ workers/tests/test_sanity_phase6h.py | 261 +++++++ workers/tests/test_sanity_phase6i.py | 272 +++++++ workers/tests/test_sanity_phase6j.py | 682 ++++++++++++++++++ 28 files changed, 3676 insertions(+), 84 deletions(-) create mode 100644 workers/executor/executors/plugins/__init__.py create mode 100644 workers/executor/executors/plugins/loader.py create mode 100644 workers/executor/executors/plugins/protocols.py create mode 100644 workers/executor/executors/plugins/text_processor.py create mode 100644 workers/tests/test_sanity_phase6a.py create mode 100644 workers/tests/test_sanity_phase6c.py create mode 100644 workers/tests/test_sanity_phase6d.py create mode 100644 workers/tests/test_sanity_phase6e.py create mode 100644 workers/tests/test_sanity_phase6f.py create mode 100644 workers/tests/test_sanity_phase6g.py create mode 100644 workers/tests/test_sanity_phase6h.py create mode 100644 workers/tests/test_sanity_phase6i.py create mode 100644 workers/tests/test_sanity_phase6j.py diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 07611650b4..c6a81f45a7 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -585,7 +585,7 @@ services: - EXECUTOR_METRICS_PORT=8088 - HEALTH_PORT=8088 # Configurable Celery options - - CELERY_QUEUES_EXECUTOR=${CELERY_QUEUES_EXECUTOR:-executor} + - CELERY_QUEUES_EXECUTOR=${CELERY_QUEUES_EXECUTOR:-celery_executor_legacy} - CELERY_POOL=${WORKER_EXECUTOR_POOL:-prefork} - CELERY_PREFETCH_MULTIPLIER=${WORKER_EXECUTOR_PREFETCH_MULTIPLIER:-1} - CELERY_CONCURRENCY=${WORKER_EXECUTOR_CONCURRENCY:-2} diff --git a/unstract/sdk1/src/unstract/sdk1/execution/context.py b/unstract/sdk1/src/unstract/sdk1/execution/context.py index 4eb3e22bb6..a1efb4c3f8 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/context.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/context.py @@ -29,9 +29,20 @@ class Operation(str, Enum): ANSWER_PROMPT = "answer_prompt" SINGLE_PASS_EXTRACTION = "single_pass_extraction" SUMMARIZE = "summarize" - AGENTIC_EXTRACTION = "agentic_extraction" IDE_INDEX = "ide_index" STRUCTURE_PIPELINE = "structure_pipeline" + TABLE_EXTRACT = "table_extract" + SMART_TABLE_EXTRACT = "smart_table_extract" + SPS_ANSWER_PROMPT = "sps_answer_prompt" + SPS_INDEX = "sps_index" + AGENTIC_EXTRACT = "agentic_extract" + AGENTIC_SUMMARIZE = "agentic_summarize" + AGENTIC_UNIFORMIZE = "agentic_uniformize" + AGENTIC_FINALIZE = "agentic_finalize" + AGENTIC_GENERATE_PROMPT = "agentic_generate_prompt" + AGENTIC_GENERATE_PROMPT_PIPELINE = "agentic_generate_prompt_pipeline" + AGENTIC_COMPARE = "agentic_compare" + AGENTIC_TUNE_FIELD = "agentic_tune_field" @dataclass diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 949a174597..4afe708a1d 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -25,7 +25,11 @@ # Constants matching workers/shared/enums values. # Defined here to avoid an SDK1 → workers package dependency. _TASK_NAME = "execute_extraction" -_QUEUE_NAME = "executor" + +# Queue-per-executor prefix. Each executor gets its own Celery queue +# named ``celery_executor_{executor_name}``, derived automatically +# from ``ExecutionContext.executor_name``. +_QUEUE_PREFIX = "celery_executor_" # Caller-side timeout (seconds) for AsyncResult.get(). # This controls how long the *caller* waits for the executor to @@ -77,6 +81,16 @@ def __init__(self, celery_app: Any = None) -> None: """ self._app = celery_app + @staticmethod + def _get_queue(executor_name: str) -> str: + """Derive the Celery queue name from *executor_name*. + + Convention: ``celery_executor_{executor_name}``. + Adding a new executor automatically gets its own queue — + no registry change needed. + """ + return f"{_QUEUE_PREFIX}{executor_name}" + def dispatch( self, context: ExecutionContext, @@ -108,20 +122,22 @@ def dispatch( ) ) + queue = self._get_queue(context.executor_name) logger.info( "Dispatching execution: executor=%s operation=%s " - "run_id=%s request_id=%s timeout=%ss", + "run_id=%s request_id=%s timeout=%ss queue=%s", context.executor_name, context.operation, context.run_id, context.request_id, timeout, + queue, ) async_result = self._app.send_task( _TASK_NAME, args=[context.to_dict()], - queue=_QUEUE_NAME, + queue=queue, ) logger.info( "Task sent: celery_task_id=%s, waiting for result...", @@ -172,19 +188,21 @@ def dispatch_async( "No Celery app configured on ExecutionDispatcher" ) + queue = self._get_queue(context.executor_name) logger.info( "Dispatching async execution: executor=%s " - "operation=%s run_id=%s request_id=%s", + "operation=%s run_id=%s request_id=%s queue=%s", context.executor_name, context.operation, context.run_id, context.request_id, + queue, ) async_result = self._app.send_task( _TASK_NAME, args=[context.to_dict()], - queue=_QUEUE_NAME, + queue=queue, ) return async_result.id @@ -228,21 +246,23 @@ def dispatch_with_callback( "No Celery app configured on ExecutionDispatcher" ) + queue = self._get_queue(context.executor_name) logger.info( "Dispatching with callback: executor=%s " "operation=%s run_id=%s request_id=%s " - "on_success=%s on_error=%s", + "on_success=%s on_error=%s queue=%s", context.executor_name, context.operation, context.run_id, context.request_id, on_success, on_error, + queue, ) send_kwargs: dict[str, Any] = { "args": [context.to_dict()], - "queue": _QUEUE_NAME, + "queue": queue, } if on_success is not None: send_kwargs["link"] = on_success diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index d128d9736a..048cccdc92 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -662,7 +662,7 @@ def test_dispatch_sends_task_and_returns_result( mock_app.send_task.assert_called_once_with( "execute_extraction", args=[ctx.to_dict()], - queue="executor", + queue="celery_executor_legacy", ) mock_app.send_task.return_value.get.assert_called_once_with( timeout=60, disable_sync_subtasks=False @@ -753,7 +753,7 @@ def test_dispatch_async_returns_task_id(self: Self) -> None: mock_app.send_task.assert_called_once_with( "execute_extraction", args=[ctx.to_dict()], - queue="executor", + queue="celery_executor_legacy", ) def test_dispatch_no_app_raises_value_error( @@ -841,7 +841,7 @@ def test_dispatch_with_callback_sends_link_and_link_error( mock_app.send_task.assert_called_once_with( "execute_extraction", args=[ctx.to_dict()], - queue="executor", + queue="celery_executor_legacy", link=on_success, link_error=on_error, ) diff --git a/workers/executor/executors/__init__.py b/workers/executor/executors/__init__.py index c42cc95175..cb2b54c980 100644 --- a/workers/executor/executors/__init__.py +++ b/workers/executor/executors/__init__.py @@ -1,9 +1,16 @@ """Executor implementations package. Importing this module triggers ``@ExecutorRegistry.register`` for all -bundled executors. +bundled executors and discovers cloud executors via entry points. """ from executor.executors.legacy_executor import LegacyExecutor +from executor.executors.plugins.loader import ExecutorPluginLoader + +# Discover and register cloud executors installed via entry points. +# Each cloud executor class is decorated with @ExecutorRegistry.register, +# so importing it (via ep.load()) is enough to register it. +# If no cloud plugins are installed this returns an empty list. +_cloud_executors = ExecutorPluginLoader.discover_executors() __all__ = ["LegacyExecutor"] diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py index 25d18edae9..902c81a66c 100644 --- a/workers/executor/executors/answer_prompt.py +++ b/workers/executor/executors/answer_prompt.py @@ -4,9 +4,10 @@ Flask dependencies (app.logger, PluginManager, APIError) replaced with standard logging and executor exceptions. -Plugin-dependent features (highlight, challenge, table-extraction, -line-item-extraction) are omitted — they require a plugin loading -mechanism that will be added in a later phase. +Highlight/word-confidence support is available via the ``process_text`` +callback parameter — callers pass the highlight-data plugin's ``run`` +method when the plugin is installed. Challenge and evaluation plugins +are integrated at the caller level (LegacyExecutor). """ import ipaddress @@ -104,6 +105,7 @@ def construct_and_run_prompt( metadata: dict[str, Any], file_path: str = "", execution_source: str | None = "ide", + process_text: Any = None, ) -> str: """Construct the full prompt and run LLM completion. @@ -116,6 +118,8 @@ def construct_and_run_prompt( metadata: Metadata dict (updated in place with highlight info). file_path: Path to the extracted text file. execution_source: "ide" or "tool". + process_text: Optional callback for text processing during + completion (e.g. highlight-data plugin's ``run`` method). Returns: The LLM answer string. @@ -156,6 +160,7 @@ def construct_and_run_prompt( enable_word_confidence=enable_word_confidence, file_path=file_path, execution_source=execution_source, + process_text=process_text, ) @staticmethod @@ -212,11 +217,15 @@ def run_completion( enable_word_confidence: bool = False, file_path: str = "", execution_source: str | None = None, + process_text: Any = None, ) -> str: """Run LLM completion and extract the answer. - Highlight/word-confidence plugin support is not available in the - executor worker yet — those features are skipped here. + Args: + process_text: Optional callback for text processing during + completion (e.g. highlight-data plugin's ``run`` method). + When provided, the SDK passes LLM response text through + this callback, enabling source attribution. """ try: from unstract.sdk1.exceptions import RateLimitError as SdkRateLimitError @@ -228,7 +237,7 @@ def run_completion( try: completion = llm.complete( prompt=prompt, - process_text=None, + process_text=process_text, extract_json=prompt_type.lower() != PSKeys.TEXT, ) answer: str = completion[PSKeys.RESPONSE].text diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 9c98eb353d..3c87482ee6 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -52,7 +52,6 @@ class LegacyExecutor(BaseExecutor): Operation.ANSWER_PROMPT.value: "_handle_answer_prompt", Operation.SINGLE_PASS_EXTRACTION.value: "_handle_single_pass_extraction", Operation.SUMMARIZE.value: "_handle_summarize", - Operation.AGENTIC_EXTRACTION.value: "_handle_agentic_extraction", Operation.IDE_INDEX.value: "_handle_ide_index", Operation.STRUCTURE_PIPELINE.value: "_handle_structure_pipeline", } @@ -221,9 +220,21 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: context.run_id, ) shim.stream_log("Text extraction completed") + result_data: dict[str, Any] = { + IKeys.EXTRACTED_TEXT: process_response.extracted_text, + } + # Include highlight metadata when available + # (used by agentic extraction for PDF source referencing) + if ( + process_response.extraction_metadata + and process_response.extraction_metadata.line_metadata + ): + result_data["highlight_metadata"] = ( + process_response.extraction_metadata.line_metadata + ) return ExecutionResult( success=True, - data={IKeys.EXTRACTED_TEXT: process_response.extracted_text}, + data=result_data, ) except AdapterError as e: name = x2text.x2text_instance.get_name() @@ -1027,6 +1038,30 @@ def _handle_answer_prompt( VectorDB, ) = self._get_prompt_deps() + # ---- Initialize highlight plugin (if enabled + installed) ---------- + process_text_fn = None + enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) + if enable_highlight: + from executor.executors.plugins import ExecutorPluginLoader + + highlight_cls = ExecutorPluginLoader.get("highlight-data") + if highlight_cls: + from executor.executors.file_utils import FileUtils + + fs_instance = FileUtils.get_fs_instance( + execution_source=execution_source + ) + highlight_instance = highlight_cls( + file_path=file_path, + fs_instance=fs_instance, + execution_source=execution_source, + ) + process_text_fn = highlight_instance.run + logger.info( + "Highlight plugin initialized for file=%s", + doc_name, + ) + # ---- First pass: collect variable names + required fields ---------- for output in prompts: variable_names.append(output[PSKeys.NAME]) @@ -1139,20 +1174,21 @@ def _handle_answer_prompt( message=msg, code=status_code ) from e - # TABLE and LINE_ITEM types require plugins not yet available + # TABLE type is handled by TableExtractorExecutor (separate + # queue). LINE_ITEM is not supported. The backend dispatcher + # must route these types to the correct executor; if they + # reach LegacyExecutor it's a mis-route. if output[PSKeys.TYPE] == PSKeys.TABLE: raise LegacyExecutorError( message=( - "TABLE extraction requires plugins not yet " - "available in the executor worker." + "TABLE extraction is handled by " + "TableExtractorExecutor. Route TABLE prompts " + "with executor_name='table'." ) ) if output[PSKeys.TYPE] == PSKeys.LINE_ITEM: raise LegacyExecutorError( - message=( - "LINE_ITEM extraction requires plugins not yet " - "available in the executor worker." - ) + message="LINE_ITEM extraction is not supported." ) # ---- Retrieval + Answer ---------------------------------------- @@ -1211,6 +1247,7 @@ def _handle_answer_prompt( metadata=metadata, execution_source=execution_source, file_path=file_path, + process_text=process_text_fn, ) else: logger.warning( @@ -1235,6 +1272,76 @@ def _handle_answer_prompt( doc_name=doc_name, ) + # ---- Challenge (quality verification) ---------------------- + if tool_settings.get(PSKeys.ENABLE_CHALLENGE): + from executor.executors.plugins import ( + ExecutorPluginLoader, + ) + + challenge_cls = ExecutorPluginLoader.get("challenge") + if challenge_cls: + challenge_llm_id = tool_settings.get( + PSKeys.CHALLENGE_LLM + ) + if challenge_llm_id: + shim.stream_log( + f"Running challenge for: {prompt_name}" + ) + challenge_llm = LLM( + adapter_instance_id=challenge_llm_id, + tool=shim, + usage_kwargs={ + **usage_kwargs, + PSKeys.LLM_USAGE_REASON: PSKeys.CHALLENGE, + }, + capture_metrics=True, + ) + challenger = challenge_cls( + llm=llm, + challenge_llm=challenge_llm, + context="\n".join(context_list), + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + run_id=run_id, + platform_key=platform_api_key, + metadata=metadata, + ) + challenger.run() + logger.info( + "Challenge completed: prompt=%s", + prompt_name, + ) + + # ---- Evaluation (prompt evaluation) ------------------------ + eval_settings = output.get(PSKeys.EVAL_SETTINGS, {}) + if eval_settings.get(PSKeys.EVAL_SETTINGS_EVALUATE): + from executor.executors.plugins import ( + ExecutorPluginLoader, + ) + + evaluator_cls = ExecutorPluginLoader.get("evaluation") + if evaluator_cls: + shim.stream_log( + f"Running evaluation for: {prompt_name}" + ) + evaluator = evaluator_cls( + query=output.get(PSKeys.COMBINED_PROMPT, ""), + context="\n".join(context_list), + response=structured_output.get(prompt_name), + reference_answer=output.get( + "reference_answer", "" + ), + prompt=output, + structured_output=structured_output, + platform_key=platform_api_key, + ) + evaluator.run() + logger.info( + "Evaluation completed: prompt=%s", + prompt_name, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") # Strip trailing newline @@ -1518,23 +1625,3 @@ def _handle_summarize( code=status_code, ) from e - def _handle_agentic_extraction( - self, context: ExecutionContext - ) -> ExecutionResult: - """Handle ``Operation.AGENTIC_EXTRACTION``. - - Agentic extraction requires the agentic extraction plugin - (AutoGen-based multi-agent system). This is not available - in the executor worker — it will be migrated when plugin - support is added. - - Returns: - ExecutionResult.failure indicating the plugin is required. - """ - raise LegacyExecutorError( - message=( - "Agentic extraction requires the agentic extraction " - "plugin which is not yet available in the executor " - "worker." - ), - ) diff --git a/workers/executor/executors/plugins/__init__.py b/workers/executor/executors/plugins/__init__.py new file mode 100644 index 0000000000..b730ff12b6 --- /dev/null +++ b/workers/executor/executors/plugins/__init__.py @@ -0,0 +1,3 @@ +from executor.executors.plugins.loader import ExecutorPluginLoader + +__all__ = ["ExecutorPluginLoader"] diff --git a/workers/executor/executors/plugins/loader.py b/workers/executor/executors/plugins/loader.py new file mode 100644 index 0000000000..35c29f0841 --- /dev/null +++ b/workers/executor/executors/plugins/loader.py @@ -0,0 +1,80 @@ +"""Entry-point-based discovery for cloud plugins and executors. + +Two entry point groups are used: + +- ``unstract.executor.plugins`` + Utility plugins (highlight-data, challenge, evaluation). + Loaded lazily on first ``get()`` call and cached. + +- ``unstract.executor.executors`` + Executor classes that self-register via ``@ExecutorRegistry.register``. + Loaded eagerly at worker startup from ``executors/__init__.py``. +""" + +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +class ExecutorPluginLoader: + """Discovers cloud plugins and executors via setuptools entry points.""" + + _plugins: dict[str, type] | None = None + + @classmethod + def get(cls, name: str) -> type | None: + """Get a plugin class by name. Returns None if not installed.""" + if cls._plugins is None: + cls._discover_plugins() + return cls._plugins.get(name) + + @classmethod + def discover_executors(cls) -> list[str]: + """Load cloud executor classes via entry points. + + Importing each entry point's class triggers + ``@ExecutorRegistry.register``. Called once at worker startup. + + Returns: + List of discovered executor entry point names. + """ + from importlib.metadata import entry_points + + discovered: list[str] = [] + eps = entry_points(group="unstract.executor.executors") + for ep in eps: + try: + ep.load() # import triggers @ExecutorRegistry.register + discovered.append(ep.name) + logger.info("Loaded cloud executor: %s", ep.name) + except Exception: + logger.warning( + "Failed to load cloud executor: %s", + ep.name, + exc_info=True, + ) + return discovered + + @classmethod + def _discover_plugins(cls) -> None: + """Discover utility plugins from entry points (lazy, first use).""" + from importlib.metadata import entry_points + + cls._plugins = {} + eps = entry_points(group="unstract.executor.plugins") + for ep in eps: + try: + cls._plugins[ep.name] = ep.load() + logger.info("Loaded executor plugin: %s", ep.name) + except Exception: + logger.warning( + "Failed to load executor plugin: %s", + ep.name, + exc_info=True, + ) + + @classmethod + def clear(cls) -> None: + """Reset cached state. Intended for tests only.""" + cls._plugins = None diff --git a/workers/executor/executors/plugins/protocols.py b/workers/executor/executors/plugins/protocols.py new file mode 100644 index 0000000000..0e81edb22a --- /dev/null +++ b/workers/executor/executors/plugins/protocols.py @@ -0,0 +1,45 @@ +"""Protocol classes defining contracts for cloud executor plugins. + +Cloud plugins must satisfy these protocols. The OSS repo never imports +cloud code — only these protocols and ``ExecutorPluginLoader.get(name)`` +are used to interact with plugins. +""" + +from typing import Any, Protocol, runtime_checkable + + +@runtime_checkable +class HighlightDataProtocol(Protocol): + """Cross-cutting: source attribution from LLMWhisperer metadata.""" + + def __init__( + self, + file_path: str, + fs_instance: Any, + execution_source: str = "", + **kwargs: Any, + ) -> None: ... + + def run( + self, response: str, is_json: bool = False, **kwargs: Any + ) -> dict: ... + + def get_highlight_data(self) -> Any: ... + + def get_confidence_data(self) -> Any: ... + + def extract_word_confidence(self, **kwargs: Any) -> dict: ... + + +@runtime_checkable +class ChallengeProtocol(Protocol): + """Legacy executor: quality verification with a second LLM.""" + + def run(self) -> None: ... + + +@runtime_checkable +class EvaluationProtocol(Protocol): + """Legacy executor: prompt evaluation.""" + + def run(self, **kwargs: Any) -> dict: ... diff --git a/workers/executor/executors/plugins/text_processor.py b/workers/executor/executors/plugins/text_processor.py new file mode 100644 index 0000000000..72eb17dd60 --- /dev/null +++ b/workers/executor/executors/plugins/text_processor.py @@ -0,0 +1,21 @@ +"""Pure-function text utilities used by the highlight-data plugin.""" + + +def add_hex_line_numbers(text: str) -> str: + """Add hex line numbers to extracted text for coordinate tracking. + + Each line is prefixed with ``0x: `` where ```` is the + zero-based line index. The hex width auto-adjusts to the total + number of lines. + + Args: + text: Multi-line string to number. + + Returns: + The same text with hex line-number prefixes. + """ + lines = text.split("\n") + hex_width = max(len(hex(len(lines))) - 2, 1) + return "\n".join( + f"0x{i:0{hex_width}X}: {line}" for i, line in enumerate(lines) + ) diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 24f5fc2184..00ee103751 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -91,6 +91,13 @@ def execute_extraction( ), "operation": context.operation, } + elif context.operation in ("table_extract", "smart_table_extract"): + context._log_component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } else: context._log_component = { "tool_id": params.get("tool_id", ""), diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 09d2e5d9cc..a4176edb2b 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -562,12 +562,12 @@ def _run_agentic_extraction( ) -> dict: """Execute agentic extraction pipeline via dispatcher. - Currently returns failure since the agentic extraction plugin - is not yet available in the executor worker. + Routes to AgenticPromptStudioExecutor (cloud plugin) which handles + the full multi-agent extraction pipeline using AutoGen. """ agentic_ctx = ExecutionContext( - executor_name="legacy", - operation="agentic_extraction", + executor_name="agentic", + operation="agentic_extract", run_id=file_execution_id, execution_source="tool", organization_id=organization_id, diff --git a/workers/shared/enums/worker_enums_base.py b/workers/shared/enums/worker_enums_base.py index 3f1c844fd5..3ed5a6ff35 100644 --- a/workers/shared/enums/worker_enums_base.py +++ b/workers/shared/enums/worker_enums_base.py @@ -149,8 +149,10 @@ class QueueName(str, Enum): # Scheduler queue SCHEDULER = "scheduler" - # Executor queue - EXECUTOR = "executor" + # Executor queue — queue-per-executor naming convention. + # The dispatcher derives queue names as ``celery_executor_{executor_name}``. + # The "legacy" executor is the default OSS executor. + EXECUTOR = "celery_executor_legacy" def to_env_var_name(self) -> str: """Convert queue name to environment variable name. diff --git a/workers/tests/test_executor_sanity.py b/workers/tests/test_executor_sanity.py index 25d75dad6d..8f0c10927a 100644 --- a/workers/tests/test_executor_sanity.py +++ b/workers/tests/test_executor_sanity.py @@ -71,7 +71,7 @@ def test_worker_type_executor_exists(self): def test_queue_name_executor_exists(self): from shared.enums.worker_enums import QueueName - assert QueueName.EXECUTOR.value == "executor" + assert QueueName.EXECUTOR.value == "celery_executor_legacy" def test_task_name_execute_extraction_exists(self): from shared.enums.task_enums import TaskName @@ -88,7 +88,7 @@ def test_worker_registry_has_executor_config(self): from shared.infrastructure.config.registry import WorkerRegistry config = WorkerRegistry.get_queue_config(WorkerType.EXECUTOR) - assert "executor" in config.all_queues() + assert "celery_executor_legacy" in config.all_queues() def test_task_routing_includes_execute_extraction(self): from shared.enums.worker_enums import WorkerType diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py index 4821bb7603..52fa92e127 100644 --- a/workers/tests/test_legacy_executor_scaffold.py +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -86,7 +86,8 @@ def test_unsupported_operation_returns_failure(self): # --- 4. All operations are implemented (no stubs remain) --- # TestHandlerStubs and TestOrchestratorWrapping removed: # All operations (extract, index, answer_prompt, single_pass_extraction, -# summarize, agentic_extraction) are now fully implemented. +# summarize) are now fully implemented. Agentic operations moved to +# AgenticPromptStudioExecutor (cloud plugin). # --- 6. Celery eager-mode chain --- @@ -133,9 +134,32 @@ def test_eager_unsupported_op_returns_failure(self, eager_app): class TestDispatchTableCoverage: def test_every_operation_has_handler(self): + """Every Operation handled by LegacyExecutor is in _OPERATION_MAP. + + Operations handled by cloud executors (discovered via entry points) + are excluded — they have their own executor classes. + """ from executor.executors.legacy_executor import LegacyExecutor + # Operations handled by cloud executors, not LegacyExecutor + cloud_executor_operations = { + "table_extract", # TableExtractorExecutor + "smart_table_extract", # SmartTableExtractorExecutor + "sps_answer_prompt", # SimplePromptStudioExecutor + "sps_index", # SimplePromptStudioExecutor + "agentic_extract", # AgenticPromptStudioExecutor + "agentic_summarize", # AgenticPromptStudioExecutor + "agentic_uniformize", # AgenticPromptStudioExecutor + "agentic_finalize", # AgenticPromptStudioExecutor + "agentic_generate_prompt", # AgenticPromptStudioExecutor + "agentic_generate_prompt_pipeline", # AgenticPromptStudioExecutor + "agentic_compare", # AgenticPromptStudioExecutor + "agentic_tune_field", # AgenticPromptStudioExecutor + } + for op in Operation: + if op.value in cloud_executor_operations: + continue assert op.value in LegacyExecutor._OPERATION_MAP, ( f"Operation {op.value} missing from _OPERATION_MAP" ) diff --git a/workers/tests/test_phase2f.py b/workers/tests/test_phase2f.py index d2dce922ab..a5913367c1 100644 --- a/workers/tests/test_phase2f.py +++ b/workers/tests/test_phase2f.py @@ -1,11 +1,11 @@ -"""Phase 2F — single_pass_extraction, summarize, agentic_extraction tests. +"""Phase 2F — single_pass_extraction, summarize, agentic operations tests. Verifies: 1. single_pass_extraction delegates to answer_prompt 2. summarize constructs prompt and calls LLM 3. summarize missing params return failure 4. summarize prompt includes prompt_keys -5. agentic_extraction raises LegacyExecutorError (plugin-dependent) +5. agentic operations rejected by LegacyExecutor (cloud executor handles them) """ from unittest.mock import MagicMock, patch @@ -301,31 +301,30 @@ def test_summarize_creates_llm_with_correct_adapter( # --------------------------------------------------------------------------- -# 3. agentic_extraction +# 3. agentic operations — handled by AgenticPromptStudioExecutor (cloud) # --------------------------------------------------------------------------- class TestAgenticExtraction: - def test_returns_failure(self): - """agentic_extraction returns failure (plugin not available).""" + def test_legacy_rejects_agentic_operations(self): + """LegacyExecutor does not handle agentic operations (cloud executor).""" _register_legacy() executor = ExecutorRegistry.get("legacy") - ctx = _make_context(operation="agentic_extraction") + ctx = _make_context(operation="agentic_extract") result = executor.execute(ctx) assert result.success is False - assert "agentic extraction" in result.error.lower() - assert "plugin" in result.error.lower() + assert "does not support" in result.error - def test_orchestrator_wraps_error(self): - """ExecutionOrchestrator also returns failure for agentic.""" + def test_orchestrator_wraps_unsupported_agentic(self): + """ExecutionOrchestrator returns failure for agentic ops on legacy.""" from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator _register_legacy() orchestrator = ExecutionOrchestrator() - ctx = _make_context(operation="agentic_extraction") + ctx = _make_context(operation="agentic_extract") result = orchestrator.execute(ctx) assert result.success is False - assert "plugin" in result.error.lower() + assert "does not support" in result.error diff --git a/workers/tests/test_sanity_phase2.py b/workers/tests/test_sanity_phase2.py index c104382dd2..2aaeb81730 100644 --- a/workers/tests/test_sanity_phase2.py +++ b/workers/tests/test_sanity_phase2.py @@ -578,13 +578,13 @@ def test_summarize_error_full_chain( class TestSanityAgenticExtraction: - """Full-chain agentic_extraction test.""" + """Full-chain agentic operations test — rejected by LegacyExecutor.""" - def test_agentic_extraction_fails_full_chain(self, eager_app): - """No mocks needed → failure mentioning agentic and plugin.""" + def test_agentic_extract_rejected_by_legacy(self, eager_app): + """Agentic operations are handled by cloud executor, not legacy.""" ctx = ExecutionContext( executor_name="legacy", - operation="agentic_extraction", + operation="agentic_extract", run_id="run-sanity-agentic", execution_source="tool", ) @@ -592,8 +592,7 @@ def test_agentic_extraction_fails_full_chain(self, eager_app): result = ExecutionResult.from_dict(result_dict) assert result.success is False - assert "agentic" in result.error.lower() - assert "plugin" in result.error.lower() + assert "does not support" in result.error class TestSanityResponseContracts: diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index 5b41a02206..ec70153185 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -427,7 +427,7 @@ def test_structure_tool_skip_extraction_smart_table( class TestStructureToolAgentic: - """Agentic project routes to agentic_extraction.""" + """Agentic project routes to AgenticPromptStudioExecutor.""" @patch(_PATCH_SHIM) @patch(_PATCH_FILE_STORAGE) @@ -465,21 +465,20 @@ def test_structure_tool_agentic_routing( dispatcher_instance = MagicMock() MockDispatcher.return_value = dispatcher_instance - # Agentic extraction currently fails (plugin not available) - agentic_result = ExecutionResult.failure( - error="Agentic extraction requires the agentic extraction plugin" + # Simulate successful agentic extraction + agentic_result = ExecutionResult( + success=True, + data={"output": {"field": "value"}}, ) dispatcher_instance.dispatch.return_value = agentic_result result = execute_structure_tool(base_params) - assert result["success"] is False - assert "agentic" in result["error"].lower() - - # Should dispatch to agentic_extraction + # Should dispatch to agentic executor with agentic_extract operation calls = dispatcher_instance.dispatch.call_args_list assert len(calls) == 1 - assert calls[0][0][0].operation == "agentic_extraction" + assert calls[0][0][0].executor_name == "agentic" + assert calls[0][0][0].operation == "agentic_extract" class TestStructureToolProfileOverrides: diff --git a/workers/tests/test_sanity_phase6a.py b/workers/tests/test_sanity_phase6a.py new file mode 100644 index 0000000000..eb7e2c6c2d --- /dev/null +++ b/workers/tests/test_sanity_phase6a.py @@ -0,0 +1,284 @@ +"""Phase 6A Sanity — Plugin loader infrastructure + queue-per-executor routing. + +Verifies: +1. ExecutorPluginLoader.get() returns None when no plugins installed +2. ExecutorPluginLoader.discover_executors() returns empty when no cloud executors +3. ExecutorPluginLoader.clear() resets cached state +4. ExecutorPluginLoader.get() discovers entry-point-based plugins (mocked) +5. ExecutorPluginLoader.discover_executors() loads cloud executors (mocked) +6. text_processor.add_hex_line_numbers() +7. ExecutionDispatcher._get_queue() naming convention +8. Protocol classes importable and runtime-checkable +9. executors/__init__.py triggers discover_executors() +""" + +from unittest.mock import MagicMock, patch + +import pytest +from executor.executors.plugins.loader import ExecutorPluginLoader +from executor.executors.plugins.text_processor import add_hex_line_numbers +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher + + +@pytest.fixture(autouse=True) +def _reset_plugin_loader(): + """Ensure clean plugin loader state for every test.""" + ExecutorPluginLoader.clear() + yield + ExecutorPluginLoader.clear() + + +# ── 1. Plugin loader: no plugins installed ────────────────────────── + + +class TestPluginLoaderNoPlugins: + """When no cloud plugins are installed, loader returns None / empty.""" + + def test_get_returns_none_for_unknown_plugin(self): + result = ExecutorPluginLoader.get("nonexistent-plugin") + assert result is None + + def test_get_returns_none_for_highlight_data(self): + """highlight-data is a cloud plugin, not installed in OSS.""" + result = ExecutorPluginLoader.get("highlight-data") + assert result is None + + def test_get_returns_none_for_challenge(self): + result = ExecutorPluginLoader.get("challenge") + assert result is None + + def test_get_returns_none_for_evaluation(self): + result = ExecutorPluginLoader.get("evaluation") + assert result is None + + def test_discover_executors_returns_empty(self): + discovered = ExecutorPluginLoader.discover_executors() + assert discovered == [] + + +# ── 2. Plugin loader: clear resets cached state ───────────────────── + + +class TestPluginLoaderClear: + def test_clear_resets_plugins(self): + # Force discovery (caches empty dict) + ExecutorPluginLoader.get("anything") + assert ExecutorPluginLoader._plugins is not None + + ExecutorPluginLoader.clear() + assert ExecutorPluginLoader._plugins is None + + def test_get_after_clear_re_discovers(self): + """After clear(), next get() re-runs discovery.""" + ExecutorPluginLoader.get("x") + assert ExecutorPluginLoader._plugins == {} + + ExecutorPluginLoader.clear() + assert ExecutorPluginLoader._plugins is None + + # Next get() triggers fresh discovery + ExecutorPluginLoader.get("y") + assert ExecutorPluginLoader._plugins is not None + + +# ── 3. Plugin loader with mocked entry points ────────────────────── + + +class TestPluginLoaderWithMockedEntryPoints: + """Simulate cloud plugins being installed by mocking entry_points().""" + + def test_get_discovers_plugin_from_entry_point(self): + """Mocked highlight-data entry point is loaded and cached.""" + + class FakeHighlightData: + pass + + fake_ep = MagicMock() + fake_ep.name = "highlight-data" + fake_ep.load.return_value = FakeHighlightData + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + result = ExecutorPluginLoader.get("highlight-data") + + assert result is FakeHighlightData + fake_ep.load.assert_called_once() + + def test_get_caches_after_first_call(self): + """Entry points are only queried once; subsequent calls use cache.""" + fake_ep = MagicMock() + fake_ep.name = "challenge" + fake_ep.load.return_value = type("FakeChallenge", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ) as mock_eps: + ExecutorPluginLoader.get("challenge") + ExecutorPluginLoader.get("challenge") # second call + + # entry_points() called only once (first get triggers discovery) + mock_eps.assert_called_once() + + def test_failed_plugin_load_is_skipped(self): + """If a plugin fails to load, it's skipped without raising.""" + bad_ep = MagicMock() + bad_ep.name = "bad-plugin" + bad_ep.load.side_effect = ImportError("missing dep") + + good_ep = MagicMock() + good_ep.name = "good-plugin" + good_ep.load.return_value = type("Good", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[bad_ep, good_ep], + ): + assert ExecutorPluginLoader.get("good-plugin") is not None + assert ExecutorPluginLoader.get("bad-plugin") is None + + def test_discover_executors_loads_classes(self): + """Mocked cloud executor entry points are imported.""" + + class FakeTableExecutor: + pass + + fake_ep = MagicMock() + fake_ep.name = "table" + fake_ep.load.return_value = FakeTableExecutor + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + discovered = ExecutorPluginLoader.discover_executors() + + assert discovered == ["table"] + fake_ep.load.assert_called_once() + + def test_discover_executors_skips_failures(self): + """Failed executor loads are skipped, successful ones returned.""" + bad_ep = MagicMock() + bad_ep.name = "broken" + bad_ep.load.side_effect = ImportError("nope") + + good_ep = MagicMock() + good_ep.name = "smart_table" + good_ep.load.return_value = type("FakeSmartTable", (), {}) + + with patch( + "importlib.metadata.entry_points", + return_value=[bad_ep, good_ep], + ): + discovered = ExecutorPluginLoader.discover_executors() + + assert discovered == ["smart_table"] + + +# ── 4. text_processor ─────────────────────────────────────────────── + + +class TestTextProcessor: + def test_single_line(self): + result = add_hex_line_numbers("hello") + assert result == "0x0: hello" + + def test_multiple_lines(self): + result = add_hex_line_numbers("a\nb\nc") + assert result == "0x0: a\n0x1: b\n0x2: c" + + def test_empty_string(self): + result = add_hex_line_numbers("") + assert result == "0x0: " + + def test_hex_width_grows(self): + # 17 lines → hex needs 2 digits (0x10 = 16) + text = "\n".join(f"line{i}" for i in range(17)) + result = add_hex_line_numbers(text) + lines = result.split("\n") + assert lines[0].startswith("0x00: ") + assert lines[16].startswith("0x10: ") + + +# ── 5. Queue-per-executor routing ─────────────────────────────────── + + +class TestQueuePerExecutor: + def test_get_queue_legacy(self): + assert ExecutionDispatcher._get_queue("legacy") == "celery_executor_legacy" + + def test_get_queue_table(self): + assert ExecutionDispatcher._get_queue("table") == "celery_executor_table" + + def test_get_queue_smart_table(self): + assert ( + ExecutionDispatcher._get_queue("smart_table") + == "celery_executor_smart_table" + ) + + def test_get_queue_simple_prompt_studio(self): + assert ( + ExecutionDispatcher._get_queue("simple_prompt_studio") + == "celery_executor_simple_prompt_studio" + ) + + def test_get_queue_agentic(self): + assert ExecutionDispatcher._get_queue("agentic") == "celery_executor_agentic" + + def test_get_queue_arbitrary_name(self): + """Any executor_name works — no whitelist.""" + assert ( + ExecutionDispatcher._get_queue("my_custom") + == "celery_executor_my_custom" + ) + + def test_queue_name_enum_matches_dispatcher(self): + """QueueName.EXECUTOR matches what dispatcher generates for 'legacy'.""" + from shared.enums.worker_enums import QueueName + + assert QueueName.EXECUTOR.value == ExecutionDispatcher._get_queue("legacy") + + +# ── 6. Protocol classes importable ────────────────────────────────── + + +class TestProtocols: + def test_highlight_data_protocol_importable(self): + from executor.executors.plugins.protocols import HighlightDataProtocol + + assert HighlightDataProtocol is not None + + def test_challenge_protocol_importable(self): + from executor.executors.plugins.protocols import ChallengeProtocol + + assert ChallengeProtocol is not None + + def test_evaluation_protocol_importable(self): + from executor.executors.plugins.protocols import EvaluationProtocol + + assert EvaluationProtocol is not None + + def test_runtime_checkable(self): + """Protocols are @runtime_checkable — isinstance checks work.""" + from executor.executors.plugins.protocols import ChallengeProtocol + + class FakeChallenge: + def run(self): + pass + + assert isinstance(FakeChallenge(), ChallengeProtocol) + + +# ── 7. executors/__init__.py triggers discovery ───────────────────── + + +class TestExecutorsInit: + def test_cloud_executors_list_exists(self): + """executors.__init__ populates _cloud_executors (empty in OSS).""" + import executor.executors as mod + + assert hasattr(mod, "_cloud_executors") + # In pure OSS, no cloud executors are installed + assert isinstance(mod._cloud_executors, list) diff --git a/workers/tests/test_sanity_phase6c.py b/workers/tests/test_sanity_phase6c.py new file mode 100644 index 0000000000..87a55d309c --- /dev/null +++ b/workers/tests/test_sanity_phase6c.py @@ -0,0 +1,559 @@ +"""Phase 6C Sanity — Highlight data as cross-cutting plugin. + +Verifies: +1. run_completion() passes process_text to llm.complete() +2. run_completion() with process_text=None (default) works as before +3. construct_and_run_prompt() passes process_text through to run_completion() +4. _handle_answer_prompt() initializes highlight plugin when enabled + available +5. _handle_answer_prompt() skips highlight when plugin not installed +6. _handle_answer_prompt() skips highlight when enable_highlight=False +7. Highlight metadata populated when plugin provides data via process_text +""" + +from unittest.mock import MagicMock, call, patch + +import pytest +from executor.executors.answer_prompt import AnswerPromptService +from executor.executors.constants import PromptServiceConstants as PSKeys + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def mock_llm(): + """Create a mock LLM that returns a realistic completion dict.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="42"), + PSKeys.HIGHLIGHT_DATA: [{"line": 1}], + PSKeys.CONFIDENCE_DATA: {"score": 0.95}, + PSKeys.WORD_CONFIDENCE_DATA: {"words": []}, + PSKeys.LINE_NUMBERS: [1, 2], + PSKeys.WHISPER_HASH: "abc123", + } + return llm + + +@pytest.fixture() +def mock_llm_no_highlight(): + """Create a mock LLM that returns completion without highlight data.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="answer"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + return llm + + +# --------------------------------------------------------------------------- +# 1. run_completion() passes process_text to llm.complete() +# --------------------------------------------------------------------------- + +class TestRunCompletionProcessText: + def test_process_text_passed_to_llm_complete(self, mock_llm): + """process_text callback is forwarded to llm.complete().""" + callback = MagicMock(name="highlight_run") + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + process_text=callback, + ) + mock_llm.complete.assert_called_once() + call_kwargs = mock_llm.complete.call_args + assert call_kwargs.kwargs.get("process_text") is callback or \ + call_kwargs[1].get("process_text") is callback + + def test_process_text_none_by_default(self, mock_llm): + """When process_text not provided, None is passed to llm.complete().""" + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + ) + call_kwargs = mock_llm.complete.call_args + # Check both positional and keyword args + pt = call_kwargs.kwargs.get("process_text", "MISSING") + if pt == "MISSING": + # Might be positional + pt = call_kwargs[1].get("process_text") + assert pt is None + + def test_process_text_none_explicit(self, mock_llm): + """Explicit process_text=None works as before.""" + answer = AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test prompt", + process_text=None, + ) + assert answer == "42" + + +# --------------------------------------------------------------------------- +# 2. run_completion() populates metadata from completion dict +# --------------------------------------------------------------------------- + +class TestRunCompletionMetadata: + def test_highlight_metadata_populated_with_process_text(self, mock_llm): + """When process_text is provided and LLM returns highlight data, + metadata is populated correctly.""" + callback = MagicMock(name="highlight_run") + metadata: dict = {} + AnswerPromptService.run_completion( + llm=mock_llm, + prompt="test", + metadata=metadata, + prompt_key="field1", + enable_highlight=True, + enable_word_confidence=True, + process_text=callback, + ) + assert metadata[PSKeys.HIGHLIGHT_DATA]["field1"] == [{"line": 1}] + assert metadata[PSKeys.CONFIDENCE_DATA]["field1"] == {"score": 0.95} + assert metadata[PSKeys.WORD_CONFIDENCE_DATA]["field1"] == {"words": []} + assert metadata[PSKeys.LINE_NUMBERS]["field1"] == [1, 2] + assert metadata[PSKeys.WHISPER_HASH] == "abc123" + + def test_highlight_metadata_empty_without_process_text( + self, mock_llm_no_highlight + ): + """Without process_text, highlight data is empty but no error.""" + metadata: dict = {} + AnswerPromptService.run_completion( + llm=mock_llm_no_highlight, + prompt="test", + metadata=metadata, + prompt_key="field1", + enable_highlight=True, + process_text=None, + ) + assert metadata[PSKeys.HIGHLIGHT_DATA]["field1"] == [] + assert metadata[PSKeys.LINE_NUMBERS]["field1"] == [] + + +# --------------------------------------------------------------------------- +# 3. construct_and_run_prompt() passes process_text through +# --------------------------------------------------------------------------- + +class TestConstructAndRunPromptProcessText: + def test_process_text_forwarded(self, mock_llm): + """construct_and_run_prompt passes process_text to run_completion.""" + callback = MagicMock(name="highlight_run") + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: True, + } + output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is the value?", + PSKeys.PROMPTX: "What is the value?", + PSKeys.TYPE: PSKeys.TEXT, + } + answer = AnswerPromptService.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=mock_llm, + context="some context", + prompt=PSKeys.PROMPTX, + metadata={}, + process_text=callback, + ) + # Verify callback was passed to llm.complete + call_kwargs = mock_llm.complete.call_args + pt = call_kwargs.kwargs.get("process_text") + if pt is None: + pt = call_kwargs[1].get("process_text") + assert pt is callback + assert answer == "42" + + def test_process_text_none_default(self, mock_llm): + """construct_and_run_prompt defaults process_text to None.""" + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + } + output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What?", + PSKeys.PROMPTX: "What?", + PSKeys.TYPE: PSKeys.TEXT, + } + AnswerPromptService.construct_and_run_prompt( + tool_settings=tool_settings, + output=output, + llm=mock_llm, + context="ctx", + prompt=PSKeys.PROMPTX, + metadata={}, + ) + call_kwargs = mock_llm.complete.call_args + pt = call_kwargs.kwargs.get("process_text") + if pt is None and "process_text" not in (call_kwargs.kwargs or {}): + pt = call_kwargs[1].get("process_text") + assert pt is None + + +# --------------------------------------------------------------------------- +# 4. _handle_answer_prompt() initializes highlight plugin +# --------------------------------------------------------------------------- + +class TestHandleAnswerPromptHighlight: + """Test highlight plugin integration in LegacyExecutor._handle_answer_prompt.""" + + def _make_context(self, enable_highlight=False): + """Build a minimal ExecutionContext for answer_prompt.""" + from unstract.sdk1.execution.context import ExecutionContext + + prompt_output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is X?", + PSKeys.PROMPTX: "What is X?", + PSKeys.TYPE: PSKeys.TEXT, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + } + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-001", + execution_source="ide", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: enable_highlight, + }, + PSKeys.OUTPUTS: [prompt_output], + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + def _get_executor(self): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + return ExecutorRegistry.get("legacy") + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_plugin_initialized_when_enabled( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=True and plugin available, highlight is used.""" + mock_shim_cls.return_value = MagicMock() + + # Mock highlight plugin + mock_highlight_cls = MagicMock() + mock_highlight_instance = MagicMock() + mock_highlight_cls.return_value = mock_highlight_instance + + # Mock LLM + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [{"line": 5}], + PSKeys.CONFIDENCE_DATA: {"score": 0.9}, + PSKeys.LINE_NUMBERS: [5], + PSKeys.WHISPER_HASH: "hash1", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + mock_fs = MagicMock() + mock_llm_cls = MagicMock(return_value=mock_llm) + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=True) + + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["context chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, # Index + mock_llm_cls, + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_highlight_cls, + ), + patch( + "executor.executors.file_utils.FileUtils.get_fs_instance", + return_value=mock_fs, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Verify highlight plugin was instantiated + mock_highlight_cls.assert_called_once_with( + file_path="/data/doc.txt", + fs_instance=mock_fs, + execution_source="ide", + ) + # Verify process_text was the highlight instance's run method + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is \ + mock_highlight_instance.run + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_skipped_when_plugin_not_installed( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=True but plugin not installed, process_text=None.""" + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=True) + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, # Plugin not installed + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # process_text should be None since plugin not available + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is None + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_highlight_skipped_when_disabled( + self, mock_index_key, mock_shim_cls + ): + """When enable_highlight=False, plugin loader is not even called.""" + mock_shim = MagicMock() + mock_shim_cls.return_value = mock_shim + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="result"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + executor = self._get_executor() + ctx = self._make_context(enable_highlight=False) + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_plugin_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called + mock_plugin_get.assert_not_called() + # process_text should be None + llm_complete_call = mock_llm.complete.call_args + assert llm_complete_call.kwargs.get("process_text") is None + + +# --------------------------------------------------------------------------- +# 5. Multiple prompts share same highlight instance +# --------------------------------------------------------------------------- + +class TestHighlightMultiplePrompts: + """Verify that one highlight instance is shared across all prompts.""" + + def _make_multi_prompt_context(self): + from unstract.sdk1.execution.context import ExecutionContext + + prompts = [] + for name in ["field1", "field2", "field3"]: + prompts.append({ + PSKeys.NAME: name, + PSKeys.PROMPT: f"What is {name}?", + PSKeys.PROMPTX: f"What is {name}?", + PSKeys.TYPE: PSKeys.TEXT, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + }) + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-002", + execution_source="tool", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: True, + }, + PSKeys.OUTPUTS: prompts, + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_single_highlight_instance_for_all_prompts( + self, mock_index_key, mock_shim_cls + ): + """One highlight instance is created and reused for all prompts.""" + mock_shim_cls.return_value = MagicMock() + + mock_highlight_cls = MagicMock() + mock_highlight_instance = MagicMock() + mock_highlight_cls.return_value = mock_highlight_instance + + mock_llm = MagicMock() + mock_llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="val"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + mock_llm.get_usage_reason.return_value = "extraction" + mock_llm.get_metrics.return_value = {} + + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + ctx = self._make_multi_prompt_context() + + mock_llm_cls = MagicMock(return_value=mock_llm) + with ( + patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, + mock_llm_cls, + MagicMock(), + MagicMock(), + ), + ), + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_highlight_cls, + ), + patch( + "executor.executors.file_utils.FileUtils.get_fs_instance", + return_value=MagicMock(), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # highlight_cls should be instantiated exactly ONCE + assert mock_highlight_cls.call_count == 1 + # llm.complete should be called 3 times (once per prompt) + assert mock_llm.complete.call_count == 3 + # Each call should use the same process_text + for c in mock_llm.complete.call_args_list: + assert c.kwargs.get("process_text") is mock_highlight_instance.run diff --git a/workers/tests/test_sanity_phase6d.py b/workers/tests/test_sanity_phase6d.py new file mode 100644 index 0000000000..0465abcd59 --- /dev/null +++ b/workers/tests/test_sanity_phase6d.py @@ -0,0 +1,514 @@ +"""Phase 6D Sanity — LegacyExecutor plugin integration. + +Verifies: +1. TABLE type raises LegacyExecutorError with routing guidance +2. LINE_ITEM type raises LegacyExecutorError (not supported) +3. Challenge plugin invoked when enable_challenge=True + plugin installed +4. Challenge skipped when plugin not installed (graceful degradation) +5. Challenge skipped when enable_challenge=False +6. Challenge skipped when challenge_llm not configured +7. Evaluation plugin invoked when eval_settings.evaluate=True + plugin installed +8. Evaluation skipped when plugin not installed +9. Evaluation skipped when eval_settings.evaluate=False +10. Challenge runs before evaluation (order matters) +11. Challenge mutates structured_output (via mock) +""" + +from unittest.mock import MagicMock, patch, call + +import pytest +from executor.executors.answer_prompt import AnswerPromptService +from executor.executors.constants import PromptServiceConstants as PSKeys +from executor.executors.exceptions import LegacyExecutorError + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_context( + output_type="TEXT", + enable_highlight=False, + enable_challenge=False, + challenge_llm="", + eval_settings=None, +): + """Build a minimal ExecutionContext for answer_prompt tests.""" + from unstract.sdk1.execution.context import ExecutionContext + + prompt_output = { + PSKeys.NAME: "field1", + PSKeys.PROMPT: "What is X?", + PSKeys.PROMPTX: "What is X?", + PSKeys.TYPE: output_type, + PSKeys.CHUNK_SIZE: 0, + PSKeys.CHUNK_OVERLAP: 0, + PSKeys.LLM: "llm-123", + PSKeys.EMBEDDING: "emb-123", + PSKeys.VECTOR_DB: "vdb-123", + PSKeys.X2TEXT_ADAPTER: "x2t-123", + PSKeys.RETRIEVAL_STRATEGY: "simple", + } + if eval_settings: + prompt_output[PSKeys.EVAL_SETTINGS] = eval_settings + + tool_settings = { + PSKeys.PREAMBLE: "", + PSKeys.POSTAMBLE: "", + PSKeys.GRAMMAR: [], + PSKeys.ENABLE_HIGHLIGHT: enable_highlight, + PSKeys.ENABLE_CHALLENGE: enable_challenge, + } + if challenge_llm: + tool_settings[PSKeys.CHALLENGE_LLM] = challenge_llm + + return ExecutionContext( + executor_name="legacy", + operation="answer_prompt", + run_id="run-001", + execution_source="ide", + organization_id="org-1", + executor_params={ + PSKeys.TOOL_SETTINGS: tool_settings, + PSKeys.OUTPUTS: [prompt_output], + PSKeys.TOOL_ID: "tool-1", + PSKeys.FILE_HASH: "hash123", + PSKeys.FILE_PATH: "/data/doc.txt", + PSKeys.FILE_NAME: "doc.txt", + PSKeys.PLATFORM_SERVICE_API_KEY: "key-123", + }, + ) + + +def _get_executor(): + from executor.executors.legacy_executor import LegacyExecutor + from unstract.sdk1.execution.registry import ExecutorRegistry + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + return ExecutorRegistry.get("legacy") + + +def _mock_llm(): + """Create a mock LLM that returns a realistic completion dict.""" + llm = MagicMock() + llm.complete.return_value = { + PSKeys.RESPONSE: MagicMock(text="42"), + PSKeys.HIGHLIGHT_DATA: [], + PSKeys.LINE_NUMBERS: [], + PSKeys.WHISPER_HASH: "", + } + llm.get_usage_reason.return_value = "extraction" + llm.get_metrics.return_value = {} + return llm + + +def _standard_patches(executor, mock_llm_instance): + """Return common patches for _handle_answer_prompt tests.""" + mock_llm_cls = MagicMock(return_value=mock_llm_instance) + return { + "_get_prompt_deps": patch.object( + executor, "_get_prompt_deps", + return_value=( + AnswerPromptService, + MagicMock( + retrieve_complete_context=MagicMock( + return_value=["context chunk"] + ) + ), + MagicMock( + is_variables_present=MagicMock(return_value=False) + ), + None, # Index + mock_llm_cls, + MagicMock(), # EmbeddingCompat + MagicMock(), # VectorDB + ), + ), + "shim": patch( + "executor.executors.legacy_executor.ExecutorToolShim", + return_value=MagicMock(), + ), + "index_key": patch( + "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1", + ), + } + + +# --------------------------------------------------------------------------- +# 1. TABLE type raises with routing guidance +# --------------------------------------------------------------------------- + +class TestTableLineItemGuard: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_table_type_raises_with_routing_guidance( + self, mock_key, mock_shim_cls + ): + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.TABLE) # "table" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with pytest.raises(LegacyExecutorError, match="TableExtractorExecutor"): + executor._handle_answer_prompt(ctx) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_line_item_type_raises_not_supported( + self, mock_key, mock_shim_cls + ): + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.LINE_ITEM) # "line-item" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with pytest.raises(LegacyExecutorError, match="not supported"): + executor._handle_answer_prompt(ctx) + + +# --------------------------------------------------------------------------- +# 2. Challenge plugin integration +# --------------------------------------------------------------------------- + +class TestChallengeIntegration: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_invoked_when_enabled_and_installed( + self, mock_key, mock_shim_cls + ): + """Challenge plugin is instantiated and run() called.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenge_cls.return_value = mock_challenger + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_challenge_cls if name == "challenge" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Challenge class was instantiated with correct args + mock_challenge_cls.assert_called_once() + init_kwargs = mock_challenge_cls.call_args.kwargs + assert init_kwargs["run_id"] == "run-001" + assert init_kwargs["platform_key"] == "key-123" + assert init_kwargs["llm"] is llm + # run() was called + mock_challenger.run.assert_called_once() + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_plugin_not_installed( + self, mock_key, mock_shim_cls + ): + """When challenge enabled but plugin missing, no error.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_disabled( + self, mock_key, mock_shim_cls + ): + """When enable_challenge=False, plugin loader not called for challenge.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=False) + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called for "challenge" + for c in mock_get.call_args_list: + assert c.args[0] != "challenge", ( + "ExecutorPluginLoader.get('challenge') should not be called" + ) + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_skipped_when_no_challenge_llm( + self, mock_key, mock_shim_cls + ): + """When enable_challenge=True but no challenge_llm, skip challenge.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + # enable_challenge=True but challenge_llm="" (empty) + ctx = _make_context(enable_challenge=True, challenge_llm="") + llm = _mock_llm() + mock_challenge_cls = MagicMock() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=mock_challenge_cls, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Challenge class should NOT be instantiated (no LLM ID) + mock_challenge_cls.assert_not_called() + + +# --------------------------------------------------------------------------- +# 3. Evaluation plugin integration +# --------------------------------------------------------------------------- + +class TestEvaluationIntegration: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_invoked_when_enabled_and_installed( + self, mock_key, mock_shim_cls + ): + """Evaluation plugin is instantiated and run() called.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True} + ) + llm = _mock_llm() + mock_eval_cls = MagicMock() + mock_evaluator = MagicMock() + mock_eval_cls.return_value = mock_evaluator + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_eval_cls if name == "evaluation" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + mock_eval_cls.assert_called_once() + init_kwargs = mock_eval_cls.call_args.kwargs + assert init_kwargs["platform_key"] == "key-123" + assert init_kwargs["response"] == "42" # from mock LLM + mock_evaluator.run.assert_called_once() + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_skipped_when_plugin_not_installed( + self, mock_key, mock_shim_cls + ): + """When evaluation enabled but plugin missing, no error.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True} + ) + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + return_value=None, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_evaluation_skipped_when_not_enabled( + self, mock_key, mock_shim_cls + ): + """When no eval_settings or evaluate=False, evaluation skipped.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + # No eval_settings at all + ctx = _make_context() + llm = _mock_llm() + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + ) as mock_get, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # Plugin loader should NOT have been called for "evaluation" + for c in mock_get.call_args_list: + assert c.args[0] != "evaluation", ( + "ExecutorPluginLoader.get('evaluation') should not be called" + ) + + +# --------------------------------------------------------------------------- +# 4. Challenge runs before evaluation (ordering) +# --------------------------------------------------------------------------- + +class TestChallengeBeforeEvaluation: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_runs_before_evaluation( + self, mock_key, mock_shim_cls + ): + """Challenge mutates structured_output before evaluation reads it.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context( + enable_challenge=True, + challenge_llm="ch-llm-1", + eval_settings={PSKeys.EVAL_SETTINGS_EVALUATE: True}, + ) + llm = _mock_llm() + + # Track call order + call_order = [] + + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenger.run.side_effect = lambda: call_order.append("challenge") + mock_challenge_cls.return_value = mock_challenger + + mock_eval_cls = MagicMock() + mock_evaluator = MagicMock() + mock_evaluator.run.side_effect = lambda: call_order.append("evaluation") + mock_eval_cls.return_value = mock_evaluator + + def plugin_get(name): + if name == "challenge": + return mock_challenge_cls + if name == "evaluation": + return mock_eval_cls + return None + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=plugin_get, + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + assert call_order == ["challenge", "evaluation"] + + +# --------------------------------------------------------------------------- +# 5. Challenge mutates structured_output +# --------------------------------------------------------------------------- + +class TestChallengeMutation: + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_challenge_mutates_structured_output( + self, mock_key, mock_shim_cls + ): + """Challenge plugin can mutate structured_output dict.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(enable_challenge=True, challenge_llm="ch-llm-1") + llm = _mock_llm() + + def challenge_run_side_effect(): + # Simulate challenge replacing the answer with improved version + challenger_instance = mock_challenge_cls.return_value + # Access the structured_output passed to constructor + so = mock_challenge_cls.call_args.kwargs["structured_output"] + so["field1"] = "improved_42" + + mock_challenge_cls = MagicMock() + mock_challenger = MagicMock() + mock_challenger.run.side_effect = challenge_run_side_effect + mock_challenge_cls.return_value = mock_challenger + + patches = _standard_patches(executor, llm) + with ( + patches["_get_prompt_deps"], + patches["shim"], + patches["index_key"], + patch( + "executor.executors.plugins.loader.ExecutorPluginLoader.get", + side_effect=lambda name: ( + mock_challenge_cls if name == "challenge" else None + ), + ), + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + # The structured_output should contain the mutated value + assert result.data[PSKeys.OUTPUT]["field1"] == "improved_42" diff --git a/workers/tests/test_sanity_phase6e.py b/workers/tests/test_sanity_phase6e.py new file mode 100644 index 0000000000..5a33de50a5 --- /dev/null +++ b/workers/tests/test_sanity_phase6e.py @@ -0,0 +1,218 @@ +"""Phase 6E Sanity — TableExtractorExecutor + TABLE_EXTRACT operation. + +Verifies: +1. Operation.TABLE_EXTRACT enum exists with value "table_extract" +2. tasks.py log_component builder handles table_extract operation +3. TableExtractorExecutor mock — registration via entry point +4. TableExtractorExecutor mock — dispatch to correct queue +5. LegacyExecutor excludes table_extract from its _OPERATION_MAP +6. Cloud executor entry point name matches pyproject.toml +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enum +# --------------------------------------------------------------------------- + +class TestTableExtractOperation: + def test_table_extract_enum_exists(self): + assert hasattr(Operation, "TABLE_EXTRACT") + assert Operation.TABLE_EXTRACT.value == "table_extract" + + def test_table_extract_in_operation_values(self): + values = {op.value for op in Operation} + assert "table_extract" in values + + +# --------------------------------------------------------------------------- +# 2. tasks.py log_component for table_extract +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_table_extract_log_component(self): + """tasks.py builds correct log_component for table_extract.""" + from executor.tasks import execute_extraction + + # Build a mock context dict + ctx_dict = { + "executor_name": "table", + "operation": "table_extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "invoice.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + + # We just need to verify the log_component is built correctly. + # Deserialize the context and check the branch. + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # Simulate the tasks.py logic + if context.log_events_id: + if context.operation == "table_extract": + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "invoice.pdf", + "operation": "table_extract", + } + + +# --------------------------------------------------------------------------- +# 3. Mock TableExtractorExecutor — entry point registration +# --------------------------------------------------------------------------- + +class TestTableExtractorExecutorRegistration: + def test_mock_table_executor_discovered_via_entry_point(self): + """Simulate cloud executor discovery via entry point.""" + from executor.executors.plugins.loader import ExecutorPluginLoader + from unstract.sdk1.execution.executor import BaseExecutor + + # Create a mock TableExtractorExecutor + @ExecutorRegistry.register + class MockTableExtractorExecutor(BaseExecutor): + @property + def name(self) -> str: + return "table" + + def execute(self, context): + if context.operation != "table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "table_data", "metadata": {}}, + ) + + try: + # Verify it was registered + assert "table" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("table") + assert executor.name == "table" + + # Verify it handles table_extract + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == "table_data" + + # Verify it rejects unsupported operations + ctx2 = ExecutionContext( + executor_name="table", + operation="answer_prompt", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert not result2.success + finally: + # Cleanup + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 4. Queue routing for table executor +# --------------------------------------------------------------------------- + +class TestTableQueueRouting: + def test_table_executor_routes_to_correct_queue(self): + """executor_name='table' routes to celery_executor_table queue.""" + queue = ExecutionDispatcher._get_queue("table") + assert queue == "celery_executor_table" + + def test_dispatch_sends_to_table_queue(self): + """ExecutionDispatcher sends table_extract to correct queue.""" + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "ok"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={"table_settings": {}}, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_table" + + +# --------------------------------------------------------------------------- +# 5. LegacyExecutor does NOT handle table_extract +# --------------------------------------------------------------------------- + +class TestLegacyExcludesTable: + def test_table_extract_not_in_legacy_operation_map(self): + """LegacyExecutor._OPERATION_MAP should NOT contain table_extract.""" + from executor.executors.legacy_executor import LegacyExecutor + + assert "table_extract" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_table_extract(self): + """LegacyExecutor.execute() returns failure for table_extract.""" + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 6. Entry point name verification +# --------------------------------------------------------------------------- + +class TestEntryPointConfig: + def test_entry_point_name_is_table(self): + """The pyproject.toml entry point name should be 'table'.""" + # This is a documentation/verification test — the entry point + # in pyproject.toml maps 'table' to TableExtractorExecutor. + # Verify the queue name matches. + assert ExecutionDispatcher._get_queue("table") == "celery_executor_table" diff --git a/workers/tests/test_sanity_phase6f.py b/workers/tests/test_sanity_phase6f.py new file mode 100644 index 0000000000..8eb0636792 --- /dev/null +++ b/workers/tests/test_sanity_phase6f.py @@ -0,0 +1,192 @@ +"""Phase 6F Sanity — SmartTableExtractorExecutor + SMART_TABLE_EXTRACT operation. + +Verifies: +1. Operation.SMART_TABLE_EXTRACT enum exists with value "smart_table_extract" +2. tasks.py log_component builder handles smart_table_extract operation +3. Mock SmartTableExtractorExecutor — registration and execution +4. Queue routing: executor_name="smart_table" → celery_executor_smart_table +5. LegacyExecutor does NOT handle smart_table_extract +6. Dispatch sends to correct queue +""" + +from unittest.mock import MagicMock + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enum +# --------------------------------------------------------------------------- + +class TestSmartTableExtractOperation: + def test_smart_table_extract_enum_exists(self): + assert hasattr(Operation, "SMART_TABLE_EXTRACT") + assert Operation.SMART_TABLE_EXTRACT.value == "smart_table_extract" + + def test_smart_table_extract_in_operation_values(self): + values = {op.value for op in Operation} + assert "smart_table_extract" in values + + +# --------------------------------------------------------------------------- +# 2. tasks.py log_component for smart_table_extract +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_smart_table_extract_log_component(self): + """tasks.py handles smart_table_extract in the same branch as table_extract.""" + ctx_dict = { + "executor_name": "smart_table", + "operation": "smart_table_extract", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "data.xlsx", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # Simulate the tasks.py logic — smart_table_extract shares the + # branch with table_extract + assert context.operation in ("table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "data.xlsx", + "operation": "smart_table_extract", + } + + +# --------------------------------------------------------------------------- +# 3. Mock SmartTableExtractorExecutor — registration and execution +# --------------------------------------------------------------------------- + +class TestSmartTableExtractorRegistration: + def test_mock_smart_table_executor_registers_and_executes(self): + """Simulate cloud executor discovery and execution.""" + @ExecutorRegistry.register + class MockSmartTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "smart_table" + + def execute(self, context): + if context.operation != "smart_table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={ + "output": [{"col1": "val1"}], + "metadata": {"total_records": 1}, + }, + ) + + try: + assert "smart_table" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("smart_table") + assert executor.name == "smart_table" + + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == [{"col1": "val1"}] + assert result.data["metadata"]["total_records"] == 1 + + # Rejects unsupported operations + ctx2 = ExecutionContext( + executor_name="smart_table", + operation="answer_prompt", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert not result2.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 4. Queue routing +# --------------------------------------------------------------------------- + +class TestSmartTableQueueRouting: + def test_smart_table_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("smart_table") + assert queue == "celery_executor_smart_table" + + def test_dispatch_sends_to_smart_table_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "ok"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={"table_settings": {}}, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_smart_table" + + +# --------------------------------------------------------------------------- +# 5. LegacyExecutor does NOT handle smart_table_extract +# --------------------------------------------------------------------------- + +class TestLegacyExcludesSmartTable: + def test_smart_table_extract_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "smart_table_extract" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_smart_table_extract(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="smart_table_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py new file mode 100644 index 0000000000..e93e893a8e --- /dev/null +++ b/workers/tests/test_sanity_phase6g.py @@ -0,0 +1,297 @@ +"""Phase 6G Sanity — SimplePromptStudioExecutor + SPS operations. + +Verifies: +1. Operation.SPS_ANSWER_PROMPT enum exists with value "sps_answer_prompt" +2. Operation.SPS_INDEX enum exists with value "sps_index" +3. Mock SimplePromptStudioExecutor — registration and execution +4. Queue routing: executor_name="simple_prompt_studio" → celery_executor_simple_prompt_studio +5. LegacyExecutor does NOT handle sps_answer_prompt or sps_index +6. Dispatch sends to correct queue +7. SimplePromptStudioExecutor rejects unsupported operations +""" + +from unittest.mock import MagicMock + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# 1. Operation enums +# --------------------------------------------------------------------------- + +class TestSPSOperations: + def test_sps_answer_prompt_enum_exists(self): + assert hasattr(Operation, "SPS_ANSWER_PROMPT") + assert Operation.SPS_ANSWER_PROMPT.value == "sps_answer_prompt" + + def test_sps_index_enum_exists(self): + assert hasattr(Operation, "SPS_INDEX") + assert Operation.SPS_INDEX.value == "sps_index" + + def test_sps_operations_in_operation_values(self): + values = {op.value for op in Operation} + assert "sps_answer_prompt" in values + assert "sps_index" in values + + +# --------------------------------------------------------------------------- +# 2. Mock SimplePromptStudioExecutor — registration and execution +# --------------------------------------------------------------------------- + +class TestSimplePromptStudioRegistration: + def test_mock_sps_executor_registers_and_executes(self): + """Simulate cloud executor discovery and execution.""" + @ExecutorRegistry.register + class MockSPSExecutor(BaseExecutor): + _OPERATION_MAP = { + "sps_answer_prompt": "_handle_answer_prompt", + "sps_index": "_handle_index", + } + + @property + def name(self) -> str: + return "simple_prompt_studio" + + def execute(self, context): + handler_name = self._OPERATION_MAP.get(context.operation) + if not handler_name: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return getattr(self, handler_name)(context) + + def _handle_answer_prompt(self, context): + return ExecutionResult( + success=True, + data={ + "output": {"invoice_number": "INV-001"}, + "metadata": {}, + }, + ) + + def _handle_index(self, context): + return ExecutionResult( + success=True, + data={"output": "indexed", "metadata": {}}, + ) + + try: + assert "simple_prompt_studio" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("simple_prompt_studio") + assert executor.name == "simple_prompt_studio" + + # sps_answer_prompt + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success + assert result.data["output"] == {"invoice_number": "INV-001"} + + # sps_index + ctx2 = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_index", + run_id="run-2", + execution_source="tool", + executor_params={}, + ) + result2 = executor.execute(ctx2) + assert result2.success + assert result2.data["output"] == "indexed" + + # Rejects unsupported operations + ctx3 = ExecutionContext( + executor_name="simple_prompt_studio", + operation="extract", + run_id="run-3", + execution_source="tool", + executor_params={}, + ) + result3 = executor.execute(ctx3) + assert not result3.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestSPSQueueRouting: + def test_sps_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("simple_prompt_studio") + assert queue == "celery_executor_simple_prompt_studio" + + def test_dispatch_sends_to_sps_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": {"field": "value"}} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={"tool_settings": {}, "output": {}}, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" + + def test_dispatch_sps_index_to_correct_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "indexed"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_index", + run_id="run-1", + execution_source="tool", + executor_params={"output": {}, "file_path": "/tmp/test.pdf"}, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" + + +# --------------------------------------------------------------------------- +# 4. LegacyExecutor does NOT handle SPS operations +# --------------------------------------------------------------------------- + +class TestLegacyExcludesSPS: + def test_sps_answer_prompt_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "sps_answer_prompt" not in LegacyExecutor._OPERATION_MAP + + def test_sps_index_not_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "sps_index" not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_sps_answer_prompt(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="sps_answer_prompt", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_legacy_returns_failure_for_sps_index(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="sps_index", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 5. tasks.py log_component for SPS operations +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + def test_sps_answer_prompt_uses_default_log_component(self): + """SPS operations use the default log_component branch in tasks.py.""" + ctx_dict = { + "executor_name": "simple_prompt_studio", + "operation": "sps_answer_prompt", + "run_id": "run-001", + "execution_source": "tool", + "organization_id": "org-1", + "executor_params": { + "tool_id": "tool-1", + "file_name": "invoice.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + # SPS operations fall through to the default branch + assert context.operation not in ("ide_index", "structure_pipeline", + "table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component == { + "tool_id": "tool-1", + "run_id": "run-001", + "doc_name": "invoice.pdf", + "operation": "sps_answer_prompt", + } + + def test_sps_index_uses_default_log_component(self): + """SPS index also uses the default log_component branch.""" + ctx_dict = { + "executor_name": "simple_prompt_studio", + "operation": "sps_index", + "run_id": "run-002", + "execution_source": "tool", + "executor_params": { + "tool_id": "tool-2", + "file_name": "contract.pdf", + }, + "request_id": "req-2", + "log_events_id": "evt-2", + } + context = ExecutionContext.from_dict(ctx_dict) + params = context.executor_params + + assert context.operation not in ("ide_index", "structure_pipeline", + "table_extract", "smart_table_extract") + component = { + "tool_id": params.get("tool_id", ""), + "run_id": context.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": context.operation, + } + assert component["operation"] == "sps_index" diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py new file mode 100644 index 0000000000..570fba004c --- /dev/null +++ b/workers/tests/test_sanity_phase6h.py @@ -0,0 +1,261 @@ +"""Phase 6H Sanity — AgenticPromptStudioExecutor + agentic operations. + +Verifies: +1. All 8 agentic Operation enums exist +2. AGENTIC_EXTRACTION removed from Operation enum +3. Mock AgenticPromptStudioExecutor — registration and all 8 operations +4. Queue routing: executor_name="agentic" → celery_executor_agentic +5. LegacyExecutor does NOT handle any agentic operations +6. Dispatch sends to correct queue +7. Structure tool routes to agentic executor (not legacy) +""" + +from unittest.mock import MagicMock + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +AGENTIC_OPERATIONS = [ + "agentic_extract", + "agentic_summarize", + "agentic_uniformize", + "agentic_finalize", + "agentic_generate_prompt", + "agentic_generate_prompt_pipeline", + "agentic_compare", + "agentic_tune_field", +] + + +# --------------------------------------------------------------------------- +# 1. Operation enums +# --------------------------------------------------------------------------- + +class TestAgenticOperations: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_operation_enum_exists(self, op): + values = {o.value for o in Operation} + assert op in values + + def test_agentic_extraction_removed(self): + """Old AGENTIC_EXTRACTION enum no longer exists.""" + assert not hasattr(Operation, "AGENTIC_EXTRACTION") + values = {o.value for o in Operation} + assert "agentic_extraction" not in values + + +# --------------------------------------------------------------------------- +# 2. Mock AgenticPromptStudioExecutor — registration and all operations +# --------------------------------------------------------------------------- + +class TestAgenticExecutorRegistration: + def test_mock_agentic_executor_registers_and_routes_all_ops(self): + """Simulate cloud executor discovery and execution of all 8 ops.""" + @ExecutorRegistry.register + class MockAgenticExecutor(BaseExecutor): + _OPERATION_MAP = {op: f"_handle_{op}" for op in AGENTIC_OPERATIONS} + + @property + def name(self) -> str: + return "agentic" + + def execute(self, context): + handler_name = self._OPERATION_MAP.get(context.operation) + if not handler_name: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={ + "output": {"operation": context.operation}, + "metadata": {}, + }, + ) + + try: + assert "agentic" in ExecutorRegistry.list_executors() + executor = ExecutorRegistry.get("agentic") + assert executor.name == "agentic" + + # Test all 8 operations route successfully + for op in AGENTIC_OPERATIONS: + ctx = ExecutionContext( + executor_name="agentic", + operation=op, + run_id=f"run-{op}", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert result.success, f"Operation {op} failed" + assert result.data["output"]["operation"] == op + + # Rejects unsupported operations + ctx = ExecutionContext( + executor_name="agentic", + operation="answer_prompt", + run_id="run-unsupported", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + finally: + ExecutorRegistry.clear() + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestAgenticQueueRouting: + def test_agentic_routes_to_correct_queue(self): + queue = ExecutionDispatcher._get_queue("agentic") + assert queue == "celery_executor_agentic" + + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_dispatch_sends_to_agentic_queue(self, op): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": {}} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="agentic", + operation=op, + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_agentic" + + +# --------------------------------------------------------------------------- +# 4. LegacyExecutor does NOT handle agentic operations +# --------------------------------------------------------------------------- + +class TestLegacyExcludesAgentic: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_op_not_in_legacy_operation_map(self, op): + from executor.executors.legacy_executor import LegacyExecutor + assert op not in LegacyExecutor._OPERATION_MAP + + def test_legacy_returns_failure_for_agentic_extract(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_extract", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_legacy_returns_failure_for_agentic_summarize(self): + from executor.executors.legacy_executor import LegacyExecutor + + ExecutorRegistry.clear() + if "legacy" not in ExecutorRegistry.list_executors(): + ExecutorRegistry.register(LegacyExecutor) + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="agentic_summarize", + run_id="run-1", + execution_source="tool", + executor_params={}, + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + +# --------------------------------------------------------------------------- +# 5. Structure tool routes to agentic executor +# --------------------------------------------------------------------------- + +class TestStructureToolAgenticRouting: + def test_structure_tool_dispatches_agentic_extract(self): + """Verify _run_agentic_extraction sends executor_name='agentic'.""" + from unittest.mock import patch + + from file_processing.structure_tool_task import _run_agentic_extraction + + mock_dispatcher = MagicMock() + mock_dispatcher.dispatch.return_value = ExecutionResult( + success=True, data={"output": {"field": "value"}} + ) + + result = _run_agentic_extraction( + tool_metadata={"name": "test"}, + input_file_path="/tmp/test.pdf", + output_dir_path="/tmp/output", + tool_instance_metadata={}, + dispatcher=mock_dispatcher, + shim=MagicMock(), + platform_helper=MagicMock(), + file_execution_id="exec-001", + organization_id="org-001", + source_file_name="test.pdf", + fs=MagicMock(), + ) + + # Verify dispatch was called with correct routing + mock_dispatcher.dispatch.assert_called_once() + dispatched_ctx = mock_dispatcher.dispatch.call_args[0][0] + assert dispatched_ctx.executor_name == "agentic" + assert dispatched_ctx.operation == "agentic_extract" + assert dispatched_ctx.organization_id == "org-001" + + +# --------------------------------------------------------------------------- +# 6. tasks.py log_component for agentic operations +# --------------------------------------------------------------------------- + +class TestTasksLogComponent: + @pytest.mark.parametrize("op", AGENTIC_OPERATIONS) + def test_agentic_ops_use_default_log_component(self, op): + """Agentic operations fall through to default log_component.""" + ctx_dict = { + "executor_name": "agentic", + "operation": op, + "run_id": "run-001", + "execution_source": "tool", + "executor_params": { + "tool_id": "tool-1", + "file_name": "doc.pdf", + }, + "request_id": "req-1", + "log_events_id": "evt-1", + } + context = ExecutionContext.from_dict(ctx_dict) + + # Agentic ops should NOT match ide_index, structure_pipeline, + # or table_extract/smart_table_extract branches + assert context.operation not in ( + "ide_index", "structure_pipeline", + "table_extract", "smart_table_extract", + ) diff --git a/workers/tests/test_sanity_phase6i.py b/workers/tests/test_sanity_phase6i.py new file mode 100644 index 0000000000..4de0e8f662 --- /dev/null +++ b/workers/tests/test_sanity_phase6i.py @@ -0,0 +1,272 @@ +"""Phase 6I Sanity — Backend Summarizer Migration. + +Verifies: +1. Summarize operation exists and routes through LegacyExecutor +2. Summarize executor_params contract matches _handle_summarize expectations +3. Dispatch routes summarize to celery_executor_legacy queue +4. Summarize result has expected shape (data.data = summary text) +5. Full Celery chain for summarize operation +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# Patches +_PATCH_GET_PROMPT_DEPS = ( + "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" +) + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor + ExecutorRegistry.clear() + ExecutorRegistry.register(LegacyExecutor) + + +# --------------------------------------------------------------------------- +# 1. Summarize operation enum +# --------------------------------------------------------------------------- + +class TestSummarizeOperation: + def test_summarize_enum_exists(self): + assert hasattr(Operation, "SUMMARIZE") + assert Operation.SUMMARIZE.value == "summarize" + + def test_summarize_in_legacy_operation_map(self): + from executor.executors.legacy_executor import LegacyExecutor + assert "summarize" in LegacyExecutor._OPERATION_MAP + + +# --------------------------------------------------------------------------- +# 2. Executor params contract +# --------------------------------------------------------------------------- + +class TestSummarizeParamsContract: + def test_summarize_params_match_handler_expectations(self): + """Verify the params the backend summarizer sends match + what _handle_summarize expects.""" + # These are the keys the cloud summarizer.py now sends + backend_params = { + "llm_adapter_instance_id": "llm-uuid", + "summarize_prompt": "Summarize the document...", + "context": "This is the full document text...", + "prompt_keys": ["invoice_number", "total_amount"], + "PLATFORM_SERVICE_API_KEY": "platform-key-123", + } + + # _handle_summarize reads these keys + assert "llm_adapter_instance_id" in backend_params + assert "summarize_prompt" in backend_params + assert "context" in backend_params + assert "prompt_keys" in backend_params + assert "PLATFORM_SERVICE_API_KEY" in backend_params + + +# --------------------------------------------------------------------------- +# 3. Queue routing +# --------------------------------------------------------------------------- + +class TestSummarizeQueueRouting: + def test_summarize_routes_to_legacy_queue(self): + """Summarize dispatches to celery_executor_legacy (LegacyExecutor).""" + queue = ExecutionDispatcher._get_queue("legacy") + assert queue == "celery_executor_legacy" + + def test_dispatch_sends_summarize_to_legacy_queue(self): + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"data": "Summary text here"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-summarize", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize...", + "context": "Document text", + "prompt_keys": ["field1"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = dispatcher.dispatch(ctx) + + mock_app.send_task.assert_called_once() + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs.get("queue") == "celery_executor_legacy" + assert result.success + assert result.data["data"] == "Summary text here" + + +# --------------------------------------------------------------------------- +# 4. Result shape +# --------------------------------------------------------------------------- + +class TestSummarizeResultShape: + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_returns_data_key(self, mock_deps): + """_handle_summarize returns ExecutionResult with data.data = str.""" + mock_LLM = MagicMock() + mock_llm_instance = MagicMock() + mock_LLM.return_value = mock_llm_instance + + mock_deps.return_value = ( + MagicMock(), # RetrievalService + MagicMock(), # PostProcessor + MagicMock(), # VariableReplacement + MagicMock(), # JsonRepair + mock_LLM, # LLM + MagicMock(), # Embedding + MagicMock(), # VectorDB + ) + + # Mock AnswerPromptService.run_completion + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="This is the summary.", + ): + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-result-shape", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize the document.", + "context": "Full document text here.", + "prompt_keys": ["total"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert result.success + assert result.data["data"] == "This is the summary." + + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_missing_context_returns_failure(self, mock_deps): + """Missing context param returns failure without LLM call.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-missing-ctx", + execution_source="ide", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize.", + "context": "", # empty + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert not result.success + assert "context" in result.error.lower() + + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_missing_llm_returns_failure(self, mock_deps): + """Missing llm_adapter_instance_id returns failure.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-missing-llm", + execution_source="ide", + executor_params={ + "llm_adapter_instance_id": "", # empty + "summarize_prompt": "Summarize.", + "context": "Some text", + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + result = executor.execute(ctx) + + assert not result.success + assert "llm_adapter_instance_id" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 5. Full Celery chain +# --------------------------------------------------------------------------- + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +class TestSummarizeCeleryChain: + @patch(_PATCH_GET_PROMPT_DEPS) + def test_summarize_full_celery_chain(self, mock_deps, eager_app): + """Summarize through full Celery task chain.""" + mock_LLM = MagicMock() + mock_llm_instance = MagicMock() + mock_LLM.return_value = mock_llm_instance + + mock_deps.return_value = ( + MagicMock(), MagicMock(), MagicMock(), MagicMock(), + mock_LLM, MagicMock(), MagicMock(), + ) + + with patch( + "executor.executors.answer_prompt.AnswerPromptService.run_completion", + return_value="Celery chain summary.", + ): + _register_legacy() + + ctx = ExecutionContext( + executor_name="legacy", + operation="summarize", + run_id="run-celery-summarize", + execution_source="ide", + organization_id="org-1", + executor_params={ + "llm_adapter_instance_id": "llm-1", + "summarize_prompt": "Summarize.", + "context": "Document text for celery chain.", + "prompt_keys": ["amount"], + "PLATFORM_SERVICE_API_KEY": "key-1", + }, + ) + + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["data"] == "Celery chain summary." diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py new file mode 100644 index 0000000000..7e900652d9 --- /dev/null +++ b/workers/tests/test_sanity_phase6j.py @@ -0,0 +1,682 @@ +"""Phase 6J — Comprehensive Phase 6 sanity tests. + +Consolidated regression + integration tests for the full Phase 6 +plugin migration. Verifies: + +1. Full Operation enum coverage — every operation has exactly one executor +2. Multi-executor coexistence in ExecutorRegistry +3. End-to-end Celery chain for each cloud executor (mock executors) +4. Cross-cutting highlight plugin works across executors +5. Plugin loader → executor registration → dispatch → result flow +6. Queue routing for all executor names +7. Graceful degradation when cloud plugins missing +8. tasks.py log_component for all operation types +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.dispatcher import ExecutionDispatcher +from unstract.sdk1.execution.executor import BaseExecutor +from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator +from unstract.sdk1.execution.registry import ExecutorRegistry +from unstract.sdk1.execution.result import ExecutionResult + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(autouse=True) +def _clean_registry(): + ExecutorRegistry.clear() + yield + ExecutorRegistry.clear() + + +@pytest.fixture +def eager_app(): + """Configure executor Celery app for eager-mode testing.""" + from executor.worker import app + + original = { + "task_always_eager": app.conf.task_always_eager, + "task_eager_propagates": app.conf.task_eager_propagates, + "result_backend": app.conf.result_backend, + } + app.conf.update( + task_always_eager=True, + task_eager_propagates=False, + result_backend="cache+memory://", + ) + yield app + app.conf.update(original) + + +def _register_legacy(): + from executor.executors.legacy_executor import LegacyExecutor + ExecutorRegistry.register(LegacyExecutor) + + +# Mock cloud executors for multi-executor tests +def _register_mock_cloud_executors(): + """Register mock cloud executors alongside LegacyExecutor.""" + + @ExecutorRegistry.register + class MockTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "table" + + def execute(self, context): + if context.operation != "table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "table_data", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockSmartTableExecutor(BaseExecutor): + @property + def name(self) -> str: + return "smart_table" + + def execute(self, context): + if context.operation != "smart_table_extract": + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": "smart_table_data", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockSPSExecutor(BaseExecutor): + @property + def name(self) -> str: + return "simple_prompt_studio" + + def execute(self, context): + if context.operation not in ("sps_answer_prompt", "sps_index"): + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": f"sps_{context.operation}", "metadata": {}}, + ) + + @ExecutorRegistry.register + class MockAgenticExecutor(BaseExecutor): + _OPS = { + "agentic_extract", "agentic_summarize", "agentic_uniformize", + "agentic_finalize", "agentic_generate_prompt", + "agentic_generate_prompt_pipeline", "agentic_compare", + "agentic_tune_field", + } + + @property + def name(self) -> str: + return "agentic" + + def execute(self, context): + if context.operation not in self._OPS: + return ExecutionResult.failure( + error=f"Unsupported: {context.operation}" + ) + return ExecutionResult( + success=True, + data={"output": f"agentic_{context.operation}", "metadata": {}}, + ) + + +# --------------------------------------------------------------------------- +# 1. Full Operation enum coverage — every operation has exactly one executor +# --------------------------------------------------------------------------- + +# Map of every Operation value to the executor that handles it +OPERATION_TO_EXECUTOR = { + # LegacyExecutor (OSS) + "extract": "legacy", + "index": "legacy", + "answer_prompt": "legacy", + "single_pass_extraction": "legacy", + "summarize": "legacy", + "ide_index": "legacy", + "structure_pipeline": "legacy", + # Cloud executors + "table_extract": "table", + "smart_table_extract": "smart_table", + "sps_answer_prompt": "simple_prompt_studio", + "sps_index": "simple_prompt_studio", + "agentic_extract": "agentic", + "agentic_summarize": "agentic", + "agentic_uniformize": "agentic", + "agentic_finalize": "agentic", + "agentic_generate_prompt": "agentic", + "agentic_generate_prompt_pipeline": "agentic", + "agentic_compare": "agentic", + "agentic_tune_field": "agentic", +} + + +class TestOperationEnumCoverage: + def test_every_operation_is_mapped(self): + """Every Operation enum value has an assigned executor.""" + for op in Operation: + assert op.value in OPERATION_TO_EXECUTOR, ( + f"Operation {op.value} not mapped to any executor" + ) + + def test_no_extra_mappings(self): + """No stale mappings for removed operations.""" + valid_ops = {op.value for op in Operation} + for mapped_op in OPERATION_TO_EXECUTOR: + assert mapped_op in valid_ops, ( + f"Mapped operation '{mapped_op}' not in Operation enum" + ) + + def test_operation_count(self): + """Verify total operation count matches expectations.""" + assert len(Operation) == 19 # 7 legacy + 2 table + 2 sps + 8 agentic + + def test_legacy_operations_in_operation_map(self): + """All legacy operations are in LegacyExecutor._OPERATION_MAP.""" + from executor.executors.legacy_executor import LegacyExecutor + + for op_value, executor_name in OPERATION_TO_EXECUTOR.items(): + if executor_name == "legacy": + assert op_value in LegacyExecutor._OPERATION_MAP, ( + f"Legacy operation {op_value} missing from _OPERATION_MAP" + ) + + def test_cloud_operations_not_in_legacy_map(self): + """Cloud operations are NOT in LegacyExecutor._OPERATION_MAP.""" + from executor.executors.legacy_executor import LegacyExecutor + + for op_value, executor_name in OPERATION_TO_EXECUTOR.items(): + if executor_name != "legacy": + assert op_value not in LegacyExecutor._OPERATION_MAP, ( + f"Cloud operation {op_value} should NOT be in legacy map" + ) + + +# --------------------------------------------------------------------------- +# 2. Multi-executor coexistence in registry +# --------------------------------------------------------------------------- + +class TestMultiExecutorCoexistence: + def test_all_five_executors_registered(self): + """Legacy + 4 cloud executors all coexist in registry.""" + _register_legacy() + _register_mock_cloud_executors() + + executors = ExecutorRegistry.list_executors() + assert "legacy" in executors + assert "table" in executors + assert "smart_table" in executors + assert "simple_prompt_studio" in executors + assert "agentic" in executors + assert len(executors) == 5 + + def test_each_executor_has_correct_name(self): + _register_legacy() + _register_mock_cloud_executors() + + for name in ["legacy", "table", "smart_table", "simple_prompt_studio", "agentic"]: + executor = ExecutorRegistry.get(name) + assert executor.name == name + + def test_wrong_executor_rejects_operation(self): + """Dispatching a table operation to legacy returns failure.""" + _register_legacy() + _register_mock_cloud_executors() + + legacy = ExecutorRegistry.get("legacy") + ctx = ExecutionContext( + executor_name="legacy", + operation="table_extract", + run_id="run-1", + execution_source="tool", + ) + result = legacy.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_correct_executor_handles_operation(self): + """Each operation routes to the right executor.""" + _register_legacy() + _register_mock_cloud_executors() + + test_cases = [ + ("table", "table_extract"), + ("smart_table", "smart_table_extract"), + ("simple_prompt_studio", "sps_answer_prompt"), + ("simple_prompt_studio", "sps_index"), + ("agentic", "agentic_extract"), + ("agentic", "agentic_compare"), + ] + for executor_name, operation in test_cases: + executor = ExecutorRegistry.get(executor_name) + ctx = ExecutionContext( + executor_name=executor_name, + operation=operation, + run_id=f"run-{operation}", + execution_source="tool", + ) + result = executor.execute(ctx) + assert result.success, f"{executor_name}/{operation} failed" + + +# --------------------------------------------------------------------------- +# 3. End-to-end Celery chain for cloud executors +# --------------------------------------------------------------------------- + +class TestCeleryChainCloudExecutors: + def test_table_extract_celery_chain(self, eager_app): + """TABLE extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-celery-table", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["output"] == "table_data" + + def test_smart_table_extract_celery_chain(self, eager_app): + """SMART TABLE extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="smart_table", + operation="smart_table_extract", + run_id="run-celery-smart-table", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + assert result.data["output"] == "smart_table_data" + + def test_sps_answer_prompt_celery_chain(self, eager_app): + """SPS answer_prompt through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="simple_prompt_studio", + operation="sps_answer_prompt", + run_id="run-celery-sps", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + + def test_agentic_extract_celery_chain(self, eager_app): + """Agentic extraction through full Celery task chain.""" + _register_legacy() + _register_mock_cloud_executors() + + ctx = ExecutionContext( + executor_name="agentic", + operation="agentic_extract", + run_id="run-celery-agentic", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert result.success + + def test_unregistered_executor_returns_failure(self, eager_app): + """Dispatching to unregistered executor returns failure.""" + _register_legacy() + # Don't register cloud executors + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-missing", + execution_source="tool", + ) + task = eager_app.tasks["execute_extraction"] + result_dict = task.apply(args=[ctx.to_dict()]).get() + result = ExecutionResult.from_dict(result_dict) + + assert not result.success + assert "table" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 4. Cross-cutting highlight plugin across executors +# --------------------------------------------------------------------------- + +class TestCrossCuttingHighlight: + def test_highlight_plugin_not_installed_no_error(self): + """When highlight plugin not installed, extraction still works.""" + from executor.executors.plugins.loader import ExecutorPluginLoader + + assert ExecutorPluginLoader.get("highlight-data") is None + # No error — graceful degradation + + def test_mock_highlight_plugin_shared_across_executors(self): + """Multiple executors can use the same highlight plugin instance.""" + from executor.executors.plugins.loader import ExecutorPluginLoader + + class FakeHighlight: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def run(self, response, **kwargs): + return {"highlighted": True} + + def get_highlight_data(self): + return {"lines": [1, 2, 3]} + + def get_confidence_data(self): + return {"confidence": 0.95} + + fake_ep = MagicMock() + fake_ep.name = "highlight-data" + fake_ep.load.return_value = FakeHighlight + + with patch( + "importlib.metadata.entry_points", + return_value=[fake_ep], + ): + ExecutorPluginLoader.clear() + cls = ExecutorPluginLoader.get("highlight-data") + assert cls is FakeHighlight + + # Both legacy and agentic contexts can create instances + legacy_hl = cls(file_path="/tmp/doc.txt", execution_source="ide") + agentic_hl = cls(file_path="/tmp/other.txt", execution_source="tool") + + assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]} + assert agentic_hl.get_confidence_data() == {"confidence": 0.95} + + +# --------------------------------------------------------------------------- +# 5. Plugin loader → registration → dispatch → result flow +# --------------------------------------------------------------------------- + +class TestPluginDiscoveryToDispatchFlow: + def test_full_discovery_to_dispatch_flow(self): + """Simulate: entry point discovery → register → dispatch → result.""" + # Step 1: "Discover" a cloud executor via entry point + @ExecutorRegistry.register + class DiscoveredExecutor(BaseExecutor): + @property + def name(self): + return "discovered" + + def execute(self, context): + return ExecutionResult( + success=True, + data={"output": "discovered_result"}, + ) + + # Step 2: Verify registration + assert "discovered" in ExecutorRegistry.list_executors() + + # Step 3: Dispatch via mock Celery + mock_app = MagicMock() + mock_result = MagicMock() + mock_result.get.return_value = ExecutionResult( + success=True, data={"output": "discovered_result"} + ).to_dict() + mock_app.send_task.return_value = mock_result + + dispatcher = ExecutionDispatcher(celery_app=mock_app) + ctx = ExecutionContext( + executor_name="discovered", + operation="custom_op", + run_id="run-flow", + execution_source="tool", + ) + result = dispatcher.dispatch(ctx) + + # Step 4: Verify result + assert result.success + assert result.data["output"] == "discovered_result" + + # Step 5: Verify queue routing + call_kwargs = mock_app.send_task.call_args + assert call_kwargs.kwargs["queue"] == "celery_executor_discovered" + + +# --------------------------------------------------------------------------- +# 6. Queue routing for all executor names +# --------------------------------------------------------------------------- + +EXECUTOR_QUEUE_MAP = { + "legacy": "celery_executor_legacy", + "table": "celery_executor_table", + "smart_table": "celery_executor_smart_table", + "simple_prompt_studio": "celery_executor_simple_prompt_studio", + "agentic": "celery_executor_agentic", +} + + +class TestQueueRoutingAllExecutors: + @pytest.mark.parametrize( + "executor_name,expected_queue", + list(EXECUTOR_QUEUE_MAP.items()), + ) + def test_queue_name_for_executor(self, executor_name, expected_queue): + assert ExecutionDispatcher._get_queue(executor_name) == expected_queue + + +# --------------------------------------------------------------------------- +# 7. Graceful degradation when cloud plugins missing +# --------------------------------------------------------------------------- + +class TestGracefulDegradation: + def test_legacy_works_without_cloud_executors(self, eager_app): + """Legacy operations work even when no cloud executors installed.""" + _register_legacy() + + # Only legacy should be in registry + assert ExecutorRegistry.list_executors() == ["legacy"] + + # Legacy operations still work + ctx = ExecutionContext( + executor_name="legacy", + operation="extract", + run_id="run-degrade", + execution_source="tool", + executor_params={ + "tool_id": "t-1", + "file_name": "test.pdf", + "file_hash": "abc", + "PLATFORM_SERVICE_API_KEY": "key", + }, + ) + # This will fail at the handler level (no mocks), but it should + # route correctly and NOT fail at registry/dispatch level + executor = ExecutorRegistry.get("legacy") + assert executor is not None + assert executor.name == "legacy" + + def test_cloud_op_on_legacy_returns_meaningful_error(self): + """Attempting a cloud operation on legacy gives clear error.""" + _register_legacy() + executor = ExecutorRegistry.get("legacy") + + for cloud_op in ["table_extract", "smart_table_extract", + "sps_answer_prompt", "agentic_extract"]: + ctx = ExecutionContext( + executor_name="legacy", + operation=cloud_op, + run_id=f"run-{cloud_op}", + execution_source="tool", + ) + result = executor.execute(ctx) + assert not result.success + assert "does not support" in result.error + + def test_missing_executor_via_orchestrator(self): + """Orchestrator returns failure for unregistered executor.""" + _register_legacy() + orchestrator = ExecutionOrchestrator() + + ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id="run-no-table", + execution_source="tool", + ) + result = orchestrator.execute(ctx) + assert not result.success + assert "table" in result.error.lower() + + +# --------------------------------------------------------------------------- +# 8. tasks.py log_component for all operation types +# --------------------------------------------------------------------------- + +class TestLogComponentAllOperations: + """Verify tasks.py log_component builder handles all operation types.""" + + def _build_log_component(self, operation, executor_params=None): + """Simulate the tasks.py log_component logic.""" + params = executor_params or { + "tool_id": "t-1", + "file_name": "doc.pdf", + } + ctx = ExecutionContext.from_dict({ + "executor_name": "legacy", + "operation": operation, + "run_id": "run-log", + "execution_source": "tool", + "executor_params": params, + "request_id": "req-1", + "log_events_id": "evt-1", + }) + + # Replicate tasks.py logic + if ctx.operation == "ide_index": + extract_params = params.get("extract_params", {}) + return { + "tool_id": extract_params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(extract_params.get("file_name", "")), + "operation": ctx.operation, + } + elif ctx.operation == "structure_pipeline": + answer_params = params.get("answer_params", {}) + pipeline_opts = params.get("pipeline_options", {}) + return { + "tool_id": answer_params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(pipeline_opts.get("source_file_name", "")), + "operation": ctx.operation, + } + elif ctx.operation in ("table_extract", "smart_table_extract"): + return { + "tool_id": params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": ctx.operation, + } + else: + return { + "tool_id": params.get("tool_id", ""), + "run_id": ctx.run_id, + "doc_name": str(params.get("file_name", "")), + "operation": ctx.operation, + } + + def test_ide_index_extracts_nested_params(self): + comp = self._build_log_component("ide_index", { + "extract_params": {"tool_id": "t-nested", "file_name": "nested.pdf"}, + }) + assert comp["tool_id"] == "t-nested" + assert comp["doc_name"] == "nested.pdf" + + def test_structure_pipeline_extracts_nested_params(self): + comp = self._build_log_component("structure_pipeline", { + "answer_params": {"tool_id": "t-pipe"}, + "pipeline_options": {"source_file_name": "pipe.pdf"}, + }) + assert comp["tool_id"] == "t-pipe" + assert comp["doc_name"] == "pipe.pdf" + + def test_table_extract_uses_direct_params(self): + comp = self._build_log_component("table_extract") + assert comp["tool_id"] == "t-1" + assert comp["operation"] == "table_extract" + + def test_smart_table_extract_uses_direct_params(self): + comp = self._build_log_component("smart_table_extract") + assert comp["operation"] == "smart_table_extract" + + @pytest.mark.parametrize("op", [ + "extract", "index", "answer_prompt", "single_pass_extraction", + "summarize", "sps_answer_prompt", "sps_index", + "agentic_extract", "agentic_summarize", "agentic_compare", + ]) + def test_default_branch_for_standard_ops(self, op): + comp = self._build_log_component(op) + assert comp["tool_id"] == "t-1" + assert comp["doc_name"] == "doc.pdf" + assert comp["operation"] == op + + +# --------------------------------------------------------------------------- +# 9. ExecutionResult serialization round-trip +# --------------------------------------------------------------------------- + +class TestResultRoundTrip: + def test_success_result_round_trip(self): + original = ExecutionResult( + success=True, + data={"output": {"field": "value"}, "metadata": {"tokens": 100}}, + ) + restored = ExecutionResult.from_dict(original.to_dict()) + assert restored.success == original.success + assert restored.data == original.data + + def test_failure_result_round_trip(self): + original = ExecutionResult.failure(error="Something went wrong") + restored = ExecutionResult.from_dict(original.to_dict()) + assert not restored.success + assert restored.error == "Something went wrong" + + def test_context_round_trip(self): + original = ExecutionContext( + executor_name="agentic", + operation="agentic_extract", + run_id="run-rt", + execution_source="tool", + organization_id="org-1", + executor_params={"key": "value"}, + log_events_id="evt-1", + ) + restored = ExecutionContext.from_dict(original.to_dict()) + assert restored.executor_name == "agentic" + assert restored.operation == "agentic_extract" + assert restored.organization_id == "org-1" + assert restored.executor_params == {"key": "value"} + assert restored.log_events_id == "evt-1" From 5c2295675fc201ca6c39d5b69deaffb5a8c6e1f8 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 2 Mar 2026 19:57:10 +0530 Subject: [PATCH 07/64] Added executors for agentic prompt studio --- docker/dockerfiles/worker-unified.Dockerfile | 11 +++++++++++ workers/run-worker-docker.sh | 2 +- workers/run-worker.sh | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docker/dockerfiles/worker-unified.Dockerfile b/docker/dockerfiles/worker-unified.Dockerfile index 202f71b699..4b7e88198a 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile +++ b/docker/dockerfiles/worker-unified.Dockerfile @@ -83,6 +83,17 @@ RUN uv sync --group deploy --locked && \ touch requirements.txt && \ { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } +# Install executor plugins from prompt-service (cloud-only, no-op for OSS). +# Each plugin with an "unstract.executor.executors" entry point gets installed +# so that importlib.metadata.entry_points() can discover it at worker startup. +RUN for plugin_dir in /unstract/prompt-service/src/unstract/prompt_service/plugins/*/; do \ + if [ -f "$plugin_dir/pyproject.toml" ] && \ + grep -q 'unstract.executor.executors' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ + echo "Installing executor plugin: $(basename $plugin_dir)" && \ + uv pip install "$plugin_dir" || true; \ + fi; \ + done + # Switch to worker user USER worker diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index 42afe9c91c..16668a919e 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -52,7 +52,7 @@ declare -A WORKER_QUEUES=( ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["log_consumer"]="celery_log_task_queue" ["scheduler"]="scheduler" - ["executor"]="executor" + ["executor"]="celery_executor_legacy" ) # Worker health ports diff --git a/workers/run-worker.sh b/workers/run-worker.sh index d974be3955..abd6931534 100755 --- a/workers/run-worker.sh +++ b/workers/run-worker.sh @@ -53,7 +53,7 @@ declare -A WORKER_QUEUES=( ["log_consumer"]="celery_log_task_queue" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["scheduler"]="scheduler" - ["executor"]="executor" + ["executor"]="celery_executor_legacy" ) # Worker health ports From 3cc3213a0165285807856905262b81b922738836 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 2 Mar 2026 22:09:55 +0530 Subject: [PATCH 08/64] Removed redundant envs --- backend/backend/worker_celery.py | 33 ++++++++++++-------------------- workers/executor/worker.py | 2 +- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index c22cb9e6da..86ed85ca35 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -1,17 +1,15 @@ """Lightweight Celery app for dispatching tasks to worker-v2 workers. -The Django backend uses Redis as its Celery broker for internal tasks -(beat, periodic tasks, etc.). The worker-v2 workers (executor, -file_processing, etc.) use a separate broker configured via -``WORKER_CELERY_BROKER_URL``. - -This module provides a Celery app connected to that worker-v2 broker -for dispatching tasks (via ExecutionDispatcher) to worker-v2 workers. +The Django backend already has a Celery app for internal tasks (beat, +periodic tasks, etc.) whose broker URL is set via CELERY_BROKER_URL. +Workers use the same broker. This module provides a second Celery app +instance that reuses the same broker URL (from Django settings) but +bypasses Celery's env-var-takes-priority behaviour so it can coexist +with the main Django Celery app in the same process. Problem: Celery reads the ``CELERY_BROKER_URL`` environment variable with highest priority — overriding constructor args, ``conf.update()``, -and ``config_from_object()``. Since Django sets that env var to Redis, -every Celery app created in this process inherits Redis as broker. +and ``config_from_object()``. Solution: Subclass Celery and override ``connection_for_write`` / ``connection_for_read`` so they always use our explicit broker URL, @@ -19,7 +17,6 @@ """ import logging -import os from urllib.parse import quote_plus from celery import Celery @@ -58,26 +55,20 @@ def get_worker_celery_app() -> Celery: """Get or create a Celery app for dispatching to worker-v2 workers. The app uses: - - Worker-v2 broker (WORKER_CELERY_BROKER_URL env var) + - Same broker as the workers (built from CELERY_BROKER_BASE_URL, + CELERY_BROKER_USER, CELERY_BROKER_PASS via Django settings) - Same PostgreSQL result backend as the Django Celery app Returns: Celery app configured for worker-v2 dispatch. - - Raises: - ValueError: If WORKER_CELERY_BROKER_URL is not set. """ global _worker_app if _worker_app is not None: return _worker_app - broker_url = os.environ.get("WORKER_CELERY_BROKER_URL") - if not broker_url: - raise ValueError( - "WORKER_CELERY_BROKER_URL is not set. " - "This should point to the broker used by worker-v2 " - "workers (e.g., redis://unstract-redis:6379)." - ) + # Reuse the broker URL already built by Django settings (base.py) + # from CELERY_BROKER_BASE_URL + CELERY_BROKER_USER + CELERY_BROKER_PASS + broker_url = settings.CELERY_BROKER_URL # Reuse the same PostgreSQL result backend as Django's Celery app result_backend = ( diff --git a/workers/executor/worker.py b/workers/executor/worker.py index a9ec204e2a..65c30db98a 100644 --- a/workers/executor/worker.py +++ b/workers/executor/worker.py @@ -36,7 +36,7 @@ def check_executor_health(): "worker_type": "executor", "registered_executors": executors, "executor_count": len(executors), - "queues": ["executor"], + "queues": ["celery_executor_legacy"], }, ) From d0532f8b77f339f338c3aaf9b9b3821fb99aae96 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 2 Mar 2026 23:03:16 +0530 Subject: [PATCH 09/64] Removed redundant envs --- .../src/components/helpers/socket-messages/SocketMessages.js | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/components/helpers/socket-messages/SocketMessages.js b/frontend/src/components/helpers/socket-messages/SocketMessages.js index a22e6ce6e2..36cb9b9d60 100644 --- a/frontend/src/components/helpers/socket-messages/SocketMessages.js +++ b/frontend/src/components/helpers/socket-messages/SocketMessages.js @@ -13,7 +13,6 @@ import { useExceptionHandler } from "../../../hooks/useExceptionHandler"; import { useAlertStore } from "../../../store/alert-store"; import { useSessionStore } from "../../../store/session-store"; import { useSocketCustomToolStore } from "../../../store/socket-custom-tool"; -import { useSessionStore } from "../../../store/session-store"; import { useSocketLogsStore } from "../../../store/socket-logs-store"; import { useSocketMessagesStore } from "../../../store/socket-messages-store"; import { useUsageStore } from "../../../store/usage-store"; From 6173df5f9da2198d0dc6bcaeb05c6eb5b635b28c Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 14:00:22 +0530 Subject: [PATCH 10/64] Removed redundant envs --- frontend/src/components/custom-tools/prompt-card/PromptRun.jsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx b/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx index c3a5726a42..48a8369140 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx @@ -1,11 +1,9 @@ import Cookies from "js-cookie"; import { useEffect } from "react"; -import Cookies from "js-cookie"; import { usePromptRunQueueStore } from "../../../store/prompt-run-queue-store"; import usePromptRun from "../../../hooks/usePromptRun"; import usePromptStudioSocket from "../../../hooks/usePromptStudioSocket"; import { useCustomToolStore } from "../../../store/custom-tool-store"; -import { usePromptRunQueueStore } from "../../../store/prompt-run-queue-store"; import { usePromptRunStatusStore } from "../../../store/prompt-run-status-store"; const MAX_ACTIVE_APIS = 5; From bbe6f5834ac059b82e4e9fbedb0438c3dc4164bd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Mar 2026 08:31:04 +0000 Subject: [PATCH 11/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/backend/worker_celery.py | 4 +- .../prompt_studio_helper.py | 39 +-- .../prompt_studio_core_v2/tasks.py | 31 +- .../prompt_studio_core_v2/test_tasks.py | 108 ++----- .../sdk1/adapters/vectordb/exceptions.py | 1 - .../src/unstract/sdk1/execution/dispatcher.py | 22 +- .../src/unstract/sdk1/execution/executor.py | 4 +- .../unstract/sdk1/execution/orchestrator.py | 10 +- .../src/unstract/sdk1/execution/registry.py | 12 +- .../src/unstract/sdk1/execution/result.py | 4 +- unstract/sdk1/tests/test_execution.py | 92 ++---- workers/executor/executor_tool_shim.py | 12 +- workers/executor/executors/answer_prompt.py | 6 +- workers/executor/executors/file_utils.py | 1 + workers/executor/executors/index.py | 1 + workers/executor/executors/legacy_executor.py | 283 ++++++------------ workers/executor/executors/plugins/loader.py | 1 - .../executor/executors/plugins/protocols.py | 4 +- .../executors/plugins/text_processor.py | 4 +- .../executors/retrievers/automerging.py | 5 +- .../executor/executors/retrievers/fusion.py | 5 +- .../executors/retrievers/keyword_table.py | 5 +- .../executors/retrievers/recursive.py | 5 +- .../executor/executors/retrievers/router.py | 5 +- .../executor/executors/retrievers/simple.py | 3 +- .../executors/retrievers/subquestion.py | 5 +- .../executors/variable_replacement.py | 8 +- workers/executor/tasks.py | 18 +- workers/executor/worker.py | 7 +- .../file_processing/structure_tool_task.py | 96 ++---- workers/tests/test_answer_prompt.py | 3 - workers/tests/test_legacy_executor_extract.py | 3 +- .../tests/test_legacy_executor_scaffold.py | 1 - workers/tests/test_phase1_log_streaming.py | 1 - workers/tests/test_phase2h.py | 2 +- workers/tests/test_phase5d.py | 2 - workers/tests/test_sanity_phase3.py | 4 - workers/tests/test_sanity_phase4.py | 4 +- workers/tests/test_sanity_phase5.py | 1 - workers/tests/test_sanity_phase6c.py | 2 +- workers/tests/test_sanity_phase6d.py | 2 +- workers/tests/test_sanity_phase6e.py | 5 +- workers/tests/test_sanity_phase6f.py | 1 - workers/tests/test_sanity_phase6g.py | 1 - workers/tests/test_sanity_phase6h.py | 1 - workers/tests/test_usage.py | 1 - 46 files changed, 249 insertions(+), 586 deletions(-) diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index 86ed85ca35..018f3d485b 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -98,7 +98,9 @@ def get_worker_celery_app() -> Celery: _worker_app = app # Log broker host only (mask credentials) safe_broker = broker_url.split("@")[-1] if "@" in broker_url else broker_url - safe_backend = result_backend.split("@")[-1] if "@" in result_backend else result_backend + safe_backend = ( + result_backend.split("@")[-1] if "@" in result_backend else result_backend + ) logger.info( "Created worker dispatch Celery app (broker=%s, result_backend=%s)", safe_broker, diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index fd09a4b99c..f11231415f 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -312,7 +312,10 @@ def build_index_payload( """ tool: CustomTool = CustomTool.objects.get(pk=tool_id) file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( - org_id, is_create=False, user_id=user_id, tool_id=tool_id, + org_id, + is_create=False, + user_id=user_id, + tool_id=tool_id, ) file_path = str(Path(file_path) / file_name) @@ -339,9 +342,7 @@ def build_index_payload( if summary_profile != default_profile: PromptStudioHelper.validate_adapter_status(summary_profile) - PromptStudioHelper.validate_profile_manager_owner_access( - summary_profile - ) + PromptStudioHelper.validate_profile_manager_owner_access(summary_profile) summarize_file_path = PromptStudioHelper.summarize( file_name, org_id, run_id, tool @@ -597,9 +598,7 @@ def build_fetch_response_payload( prompt_grammer = tool.prompt_grammer if prompt_grammer: for word, synonyms in prompt_grammer.items(): - grammar_list.append( - {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} - ) + grammar_list.append({TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms}) output[TSPKeys.PROMPT] = prompt.prompt output[TSPKeys.ACTIVE] = prompt.active @@ -625,12 +624,8 @@ def build_fetch_response_payload( output[TSPKeys.POSTPROCESSING_WEBHOOK_URL] = webhook_url output[TSPKeys.EVAL_SETTINGS] = {} - output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = ( - prompt.evaluate - ) - output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [ - monitor_llm - ] + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EVALUATE] = prompt.evaluate + output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_MONITOR_LLM] = [monitor_llm] output[TSPKeys.EVAL_SETTINGS][TSPKeys.EVAL_SETTINGS_EXCLUDE_FAILED] = ( tool.exclude_failed ) @@ -641,10 +636,8 @@ def build_fetch_response_payload( output = PromptStudioHelper.fetch_table_settings_if_enabled( doc_name, prompt, org_id, user_id, tool_id, output ) - variable_map = ( - PromptStudioVariableService.frame_variable_replacement_map( - doc_id=document_id, prompt_object=prompt - ) + variable_map = PromptStudioVariableService.frame_variable_replacement_map( + doc_id=document_id, prompt_object=prompt ) if variable_map: output[TSPKeys.VARIABLE_MAP] = variable_map @@ -661,9 +654,7 @@ def build_fetch_response_payload( tool_settings[TSPKeys.POSTAMBLE] = tool.postamble tool_settings[TSPKeys.GRAMMAR] = grammar_list tool_settings[TSPKeys.ENABLE_HIGHLIGHT] = tool.enable_highlight - tool_settings[TSPKeys.ENABLE_WORD_CONFIDENCE] = ( - tool.enable_word_confidence - ) + tool_settings[TSPKeys.ENABLE_WORD_CONFIDENCE] = tool.enable_word_confidence tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr( settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), "" ) @@ -756,9 +747,7 @@ def build_single_pass_payload( if prompt_grammar: for word, synonyms in prompt_grammar.items(): - grammar.append( - {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} - ) + grammar.append({TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms}) fs_instance = EnvHelper.get_storage( storage_type=StorageType.PERMANENT, @@ -822,9 +811,7 @@ def build_single_pass_payload( if tool.summarize_as_source: path_obj = Path(file_path) file_path = str( - path_obj.parent.parent - / TSPKeys.SUMMARIZE - / (path_obj.stem + ".txt") + path_obj.parent.parent / TSPKeys.SUMMARIZE / (path_obj.stem + ".txt") ) file_hash = fs_instance.get_hash_from_file(path=file_path) diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index 30d13e3b54..1f9d917359 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -3,9 +3,8 @@ import uuid from typing import Any -from celery import shared_task - from account_v2.constants import Common +from celery import shared_task from utils.constants import Account from utils.local_context import StateStore from utils.log_events import _emit_websocket_event @@ -34,9 +33,7 @@ def _json_safe(data: Any) -> Any: return json.loads(json.dumps(data, cls=_UUIDEncoder)) -def _setup_state_store( - log_events_id: str, request_id: str, org_id: str = "" -) -> None: +def _setup_state_store(log_events_id: str, request_id: str, org_id: str = "") -> None: """Restore thread-local context that was captured in the Django view.""" StateStore.set(Common.LOG_EVENTS_ID, log_events_id) StateStore.set(Common.REQUEST_ID, request_id) @@ -61,12 +58,14 @@ def _emit_result( _emit_websocket_event( room=log_events_id, event=PROMPT_STUDIO_RESULT_EVENT, - data=_json_safe({ - "task_id": task_id, - "status": "completed", - "operation": operation, - "result": result, - }), + data=_json_safe( + { + "task_id": task_id, + "status": "completed", + "operation": operation, + "result": result, + } + ), ) @@ -113,13 +112,13 @@ def ide_index_complete( Performs post-indexing ORM bookkeeping and pushes a socket event to the frontend. """ + from prompt_studio.prompt_profile_manager_v2.models import ProfileManager from prompt_studio.prompt_studio_core_v2.document_indexing_service import ( DocumentIndexingService, ) from prompt_studio.prompt_studio_index_manager_v2.prompt_studio_index_helper import ( PromptStudioIndexHelper, ) - from prompt_studio.prompt_profile_manager_v2.models import ProfileManager cb = callback_kwargs or {} log_events_id = cb.get("log_events_id", "") @@ -461,15 +460,11 @@ def run_single_pass_extraction( document_id=document_id, run_id=run_id, ) - _emit_result( - log_events_id, self.request.id, "single_pass_extraction", response - ) + _emit_result(log_events_id, self.request.id, "single_pass_extraction", response) return response except Exception as e: logger.exception("run_single_pass_extraction failed") - _emit_error( - log_events_id, self.request.id, "single_pass_extraction", str(e) - ) + _emit_error(log_events_id, self.request.id, "single_pass_extraction", str(e)) raise finally: _clear_state_store() diff --git a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py index d8e2731144..4efef90987 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py @@ -9,7 +9,7 @@ """ import os -from unittest.mock import MagicMock, patch +from unittest.mock import patch import django @@ -17,16 +17,16 @@ django.setup() import pytest # noqa: E402 +from account_v2.constants import Common # noqa: E402 from celery import Celery # noqa: E402 +from utils.local_context import StateStore # noqa: E402 -from account_v2.constants import Common # noqa: E402 from prompt_studio.prompt_studio_core_v2.tasks import ( # noqa: E402 PROMPT_STUDIO_RESULT_EVENT, run_fetch_response, run_index_document, run_single_pass_extraction, ) -from utils.local_context import StateStore # noqa: E402 # --------------------------------------------------------------------------- # Celery eager-mode app for testing @@ -72,9 +72,7 @@ def test_single_pass_task_name(self): class TestRunIndexDocument: @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_returns_result(self, mock_helper, mock_emit): mock_helper.index_document.return_value = "unique-id-123" result = run_index_document.apply( @@ -92,14 +90,10 @@ def test_success_returns_result(self, mock_helper, mock_emit): ) @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_emits_completed_event(self, mock_helper, mock_emit): mock_helper.index_document.return_value = "unique-id-123" - run_index_document.apply( - kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} - ).get() + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() mock_emit.assert_called_once() kwargs = mock_emit.call_args.kwargs @@ -107,15 +101,11 @@ def test_success_emits_completed_event(self, mock_helper, mock_emit): assert kwargs["event"] == PROMPT_STUDIO_RESULT_EVENT assert kwargs["data"]["status"] == "completed" assert kwargs["data"]["operation"] == "index_document" - assert kwargs["data"]["result"] == { - "message": "Document indexed successfully." - } + assert kwargs["data"]["result"] == {"message": "Document indexed successfully."} assert "task_id" in kwargs["data"] @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_failure_emits_error_and_reraises(self, mock_helper, mock_emit): mock_helper.index_document.side_effect = RuntimeError("index boom") @@ -129,22 +119,16 @@ def test_failure_emits_error_and_reraises(self, mock_helper, mock_emit): assert "index boom" in mock_emit.call_args.kwargs["data"]["error"] @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_state_store_cleared_on_success(self, mock_helper, mock_emit): mock_helper.index_document.return_value = "ok" - run_index_document.apply( - kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} - ).get() + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() assert StateStore.get(Common.LOG_EVENTS_ID) is None assert StateStore.get(Common.REQUEST_ID) is None @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_state_store_cleared_on_failure(self, mock_helper, mock_emit): mock_helper.index_document.side_effect = RuntimeError("fail") with pytest.raises(RuntimeError): @@ -156,9 +140,7 @@ def test_state_store_cleared_on_failure(self, mock_helper, mock_emit): assert StateStore.get(Common.REQUEST_ID) is None @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_state_store_set_during_execution(self, mock_helper, mock_emit): """Verify StateStore has the right values while the helper runs.""" captured = {} @@ -169,9 +151,7 @@ def capture_state(**kwargs): return "ok" mock_helper.index_document.side_effect = capture_state - run_index_document.apply( - kwargs={**COMMON_KWARGS, "file_name": "test.pdf"} - ).get() + run_index_document.apply(kwargs={**COMMON_KWARGS, "file_name": "test.pdf"}).get() assert captured["log_events_id"] == "session-room-xyz" assert captured["request_id"] == "req-001" @@ -181,9 +161,7 @@ def capture_state(**kwargs): class TestRunFetchResponse: @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_returns_response(self, mock_helper, mock_emit): expected = {"output": {"field": "value"}, "metadata": {"tokens": 42}} mock_helper.prompt_responder.return_value = expected @@ -208,9 +186,7 @@ def test_success_returns_response(self, mock_helper, mock_emit): ) @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_emits_fetch_response_event(self, mock_helper, mock_emit): mock_helper.prompt_responder.return_value = {"output": "data"} run_fetch_response.apply( @@ -222,9 +198,7 @@ def test_success_emits_fetch_response_event(self, mock_helper, mock_emit): assert data["operation"] == "fetch_response" @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_failure_emits_error(self, mock_helper, mock_emit): mock_helper.prompt_responder.side_effect = ValueError("prompt fail") @@ -236,9 +210,7 @@ def test_failure_emits_error(self, mock_helper, mock_emit): assert "prompt fail" in data["error"] @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_optional_params_default_none(self, mock_helper, mock_emit): mock_helper.prompt_responder.return_value = {} run_fetch_response.apply(kwargs=COMMON_KWARGS).get() @@ -254,9 +226,7 @@ def test_optional_params_default_none(self, mock_helper, mock_emit): ) @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_state_store_cleared(self, mock_helper, mock_emit): mock_helper.prompt_responder.return_value = {} run_fetch_response.apply(kwargs=COMMON_KWARGS).get() @@ -265,9 +235,7 @@ def test_state_store_cleared(self, mock_helper, mock_emit): class TestRunSinglePassExtraction: @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_returns_response(self, mock_helper, mock_emit): expected = {"output": {"key": "val"}} mock_helper.prompt_responder.return_value = expected @@ -284,9 +252,7 @@ def test_success_returns_response(self, mock_helper, mock_emit): ) @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_success_emits_single_pass_event(self, mock_helper, mock_emit): mock_helper.prompt_responder.return_value = {"data": "ok"} run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() @@ -296,9 +262,7 @@ def test_success_emits_single_pass_event(self, mock_helper, mock_emit): assert data["operation"] == "single_pass_extraction" @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_failure_emits_error(self, mock_helper, mock_emit): mock_helper.prompt_responder.side_effect = TypeError("single pass fail") @@ -309,9 +273,7 @@ def test_failure_emits_error(self, mock_helper, mock_emit): assert data["status"] == "failed" @patch("prompt_studio.prompt_studio_core_v2.tasks._emit_websocket_event") - @patch( - "prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper" - ) + @patch("prompt_studio.prompt_studio_core_v2.prompt_studio_helper.PromptStudioHelper") def test_state_store_cleared(self, mock_helper, mock_emit): mock_helper.prompt_responder.return_value = {} run_single_pass_extraction.apply(kwargs=COMMON_KWARGS).get() @@ -365,12 +327,12 @@ def test_views_capture_state_store_context(self): "single_pass_extraction", ]: source = inspect.getsource(getattr(PromptStudioCoreView, method_name)) - assert "StateStore.get(Common.LOG_EVENTS_ID)" in source, ( - f"{method_name} missing LOG_EVENTS_ID capture" - ) - assert "StateStore.get(Common.REQUEST_ID)" in source, ( - f"{method_name} missing REQUEST_ID capture" - ) + assert ( + "StateStore.get(Common.LOG_EVENTS_ID)" in source + ), f"{method_name} missing LOG_EVENTS_ID capture" + assert ( + "StateStore.get(Common.REQUEST_ID)" in source + ), f"{method_name} missing REQUEST_ID capture" # =================================================================== @@ -381,7 +343,7 @@ def test_task_status_method_exists(self): from prompt_studio.prompt_studio_core_v2.views import PromptStudioCoreView assert hasattr(PromptStudioCoreView, "task_status") - assert callable(getattr(PromptStudioCoreView, "task_status")) + assert callable(PromptStudioCoreView.task_status) def test_task_status_url_registered(self): from prompt_studio.prompt_studio_core_v2.urls import urlpatterns @@ -444,15 +406,9 @@ def test_all_three_tasks_routed(self): from backend.celery_config import CeleryConfig routes = CeleryConfig.task_routes - assert routes["prompt_studio_index_document"] == { - "queue": "celery_prompt_studio" - } - assert routes["prompt_studio_fetch_response"] == { - "queue": "celery_prompt_studio" - } - assert routes["prompt_studio_single_pass"] == { - "queue": "celery_prompt_studio" - } + assert routes["prompt_studio_index_document"] == {"queue": "celery_prompt_studio"} + assert routes["prompt_studio_fetch_response"] == {"queue": "celery_prompt_studio"} + assert routes["prompt_studio_single_pass"] == {"queue": "celery_prompt_studio"} def test_celery_app_loads_routes(self): from backend.celery_service import app diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py index 82aee35d18..e44784671e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/vectordb/exceptions.py @@ -23,7 +23,6 @@ def parse_vector_db_err(e: Exception, vector_db: VectorDBAdapter) -> VectorDBErr # protobuf runtime version (KeyError: '_POINTID'). try: from qdrant_client.http.exceptions import ApiException as QdrantAPIException - from unstract.sdk1.adapters.vectordb.qdrant.src import Qdrant if isinstance(e, QdrantAPIException): diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 4afe708a1d..7fc9c5f720 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -64,6 +64,7 @@ class ExecutionDispatcher: Fire-and-forget with callbacks:: from celery import signature + task = dispatcher.dispatch_with_callback( context, on_success=signature("my_success_task", args=[...], queue="q"), @@ -111,16 +112,10 @@ def dispatch( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError( - "No Celery app configured on ExecutionDispatcher" - ) + raise ValueError("No Celery app configured on ExecutionDispatcher") if timeout is None: - timeout = int( - os.environ.get( - _DEFAULT_TIMEOUT_ENV, _DEFAULT_TIMEOUT - ) - ) + timeout = int(os.environ.get(_DEFAULT_TIMEOUT_ENV, _DEFAULT_TIMEOUT)) queue = self._get_queue(context.executor_name) logger.info( @@ -155,8 +150,7 @@ def dispatch( ) except Exception as exc: logger.error( - "Dispatch failed: executor=%s operation=%s " - "run_id=%s error=%s", + "Dispatch failed: executor=%s operation=%s " "run_id=%s error=%s", context.executor_name, context.operation, context.run_id, @@ -184,9 +178,7 @@ def dispatch_async( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError( - "No Celery app configured on ExecutionDispatcher" - ) + raise ValueError("No Celery app configured on ExecutionDispatcher") queue = self._get_queue(context.executor_name) logger.info( @@ -242,9 +234,7 @@ def dispatch_with_callback( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError( - "No Celery app configured on ExecutionDispatcher" - ) + raise ValueError("No Celery app configured on ExecutionDispatcher") queue = self._get_queue(context.executor_name) logger.info( diff --git a/unstract/sdk1/src/unstract/sdk1/execution/executor.py b/unstract/sdk1/src/unstract/sdk1/execution/executor.py index 8d3bda8407..142109945d 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/executor.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/executor.py @@ -31,9 +31,7 @@ def name(self) -> str: """ @abstractmethod - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: + def execute(self, context: ExecutionContext) -> ExecutionResult: """Run the extraction strategy described by *context*. Args: diff --git a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py index fe066c7265..02693a0509 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py @@ -26,9 +26,7 @@ class ExecutionOrchestrator: result = orchestrator.execute(context) """ - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: + def execute(self, context: ExecutionContext) -> ExecutionResult: """Resolve the executor and run it. Args: @@ -59,8 +57,7 @@ def execute( except Exception as exc: elapsed = time.monotonic() - start logger.exception( - "Executor %r raised an unhandled exception " - "after %.2fs", + "Executor %r raised an unhandled exception " "after %.2fs", context.executor_name, elapsed, ) @@ -71,8 +68,7 @@ def execute( elapsed = time.monotonic() - start logger.info( - "Execution completed: executor=%s operation=%s " - "success=%s elapsed=%.2fs", + "Execution completed: executor=%s operation=%s " "success=%s elapsed=%.2fs", context.executor_name, context.operation, result.success, diff --git a/unstract/sdk1/src/unstract/sdk1/execution/registry.py b/unstract/sdk1/src/unstract/sdk1/execution/registry.py index 10cfeecaf6..999487a2e5 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/registry.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/registry.py @@ -26,8 +26,10 @@ class LegacyExecutor(BaseExecutor): @property def name(self) -> str: return "legacy" + ... + executor = ExecutorRegistry.get("legacy") """ @@ -54,12 +56,9 @@ def register(cls, executor_cls: T) -> T: already registered. """ if not ( - isinstance(executor_cls, type) - and issubclass(executor_cls, BaseExecutor) + isinstance(executor_cls, type) and issubclass(executor_cls, BaseExecutor) ): - raise TypeError( - f"{executor_cls!r} is not a BaseExecutor subclass" - ) + raise TypeError(f"{executor_cls!r} is not a BaseExecutor subclass") # Instantiate temporarily to read the name property instance = executor_cls() @@ -98,8 +97,7 @@ def get(cls, name: str) -> BaseExecutor: if executor_cls is None: available = ", ".join(sorted(cls._registry)) or "(none)" raise KeyError( - f"No executor registered with name {name!r}. " - f"Available: {available}" + f"No executor registered with name {name!r}. " f"Available: {available}" ) return executor_cls() diff --git a/unstract/sdk1/src/unstract/sdk1/execution/result.py b/unstract/sdk1/src/unstract/sdk1/execution/result.py index 2660dcb7d2..0088d071f5 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/result.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/result.py @@ -35,9 +35,7 @@ class ExecutionResult: def __post_init__(self) -> None: """Validate result consistency after initialization.""" if not self.success and not self.error: - raise ValueError( - "error message is required when success is False" - ) + raise ValueError("error message is required when success is False") def to_dict(self) -> dict[str, Any]: """Serialize to a JSON-compatible dict for Celery.""" diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 048cccdc92..3839a01073 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -3,7 +3,7 @@ import json import logging from typing import Any, Self -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest from unstract.sdk1.constants import LogLevel, ToolEnv @@ -244,7 +244,7 @@ def test_error_in_failure_dict(self: Self) -> None: assert d["error"] == "fail" def test_default_empty_dicts(self: Self) -> None: - """data and metadata default to empty dicts.""" + """Data and metadata default to empty dicts.""" result = ExecutionResult(success=True) assert result.data == {} assert result.metadata == {} @@ -301,9 +301,7 @@ class _Executor(BaseExecutor): def name(self) -> str: return executor_name - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: + def execute(self, context: ExecutionContext) -> ExecutionResult: return ExecutionResult( success=True, data={"echo": context.operation}, @@ -380,9 +378,7 @@ class MyExecutor(BaseExecutor): def name(self) -> str: return "decorated" - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: + def execute(self, context: ExecutionContext) -> ExecutionResult: return ExecutionResult(success=True) executor = ExecutorRegistry.get("decorated") @@ -474,9 +470,7 @@ class _FailExecutor(BaseExecutor): def name(self) -> str: return executor_name - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: + def execute(self, context: ExecutionContext) -> ExecutionResult: raise exc _FailExecutor.__name__ = f"{executor_name.title()}FailExecutor" @@ -524,23 +518,17 @@ def test_dispatches_to_correct_executor(self: Self) -> None: def test_unknown_executor_returns_failure(self: Self) -> None: """Unknown executor_name yields a failure result (not exception).""" orchestrator = ExecutionOrchestrator() - result = orchestrator.execute( - self._make_context(executor_name="nonexistent") - ) + result = orchestrator.execute(self._make_context(executor_name="nonexistent")) assert result.success is False assert "nonexistent" in result.error def test_executor_exception_returns_failure(self: Self) -> None: """Unhandled executor exception is wrapped in failure result.""" ExecutorRegistry.register( - _make_failing_executor_class( - "boom", RuntimeError("kaboom") - ) + _make_failing_executor_class("boom", RuntimeError("kaboom")) ) orchestrator = ExecutionOrchestrator() - result = orchestrator.execute( - self._make_context(executor_name="boom") - ) + result = orchestrator.execute(self._make_context(executor_name="boom")) assert result.success is False assert "RuntimeError" in result.error assert "kaboom" in result.error @@ -548,14 +536,10 @@ def test_executor_exception_returns_failure(self: Self) -> None: def test_exception_result_has_elapsed_metadata(self: Self) -> None: """Failure from exception includes elapsed_seconds metadata.""" ExecutorRegistry.register( - _make_failing_executor_class( - "slow_fail", ValueError("bad input") - ) + _make_failing_executor_class("slow_fail", ValueError("bad input")) ) orchestrator = ExecutionOrchestrator() - result = orchestrator.execute( - self._make_context(executor_name="slow_fail") - ) + result = orchestrator.execute(self._make_context(executor_name="slow_fail")) assert result.success is False assert "elapsed_seconds" in result.metadata assert isinstance(result.metadata["elapsed_seconds"], float) @@ -565,9 +549,7 @@ def test_successful_result_passed_through(self: Self) -> None: ExecutorRegistry.register(_make_executor_class("passthru")) orchestrator = ExecutionOrchestrator() - ctx = self._make_context( - executor_name="passthru", operation="answer_prompt" - ) + ctx = self._make_context(executor_name="passthru", operation="answer_prompt") result = orchestrator.execute(ctx) assert result.success is True @@ -583,19 +565,13 @@ class FailingExecutor(BaseExecutor): def name(self) -> str: return "graceful_fail" - def execute( - self, context: ExecutionContext - ) -> ExecutionResult: - return ExecutionResult.failure( - error="LLM rate limited" - ) + def execute(self, context: ExecutionContext) -> ExecutionResult: + return ExecutionResult.failure(error="LLM rate limited") ExecutorRegistry.register(FailingExecutor) orchestrator = ExecutionOrchestrator() - result = orchestrator.execute( - self._make_context(executor_name="graceful_fail") - ) + result = orchestrator.execute(self._make_context(executor_name="graceful_fail")) assert result.success is False assert result.error == "LLM rate limited" @@ -714,9 +690,7 @@ def test_dispatch_timeout_returns_failure( self: Self, ) -> None: """TimeoutError from AsyncResult.get() is wrapped in failure.""" - mock_app = self._make_mock_app( - side_effect=TimeoutError("Task timed out") - ) + mock_app = self._make_mock_app(side_effect=TimeoutError("Task timed out")) dispatcher = ExecutionDispatcher(celery_app=mock_app) ctx = self._make_context() @@ -729,9 +703,7 @@ def test_dispatch_generic_exception_returns_failure( self: Self, ) -> None: """Any exception from AsyncResult.get() becomes a failure.""" - mock_app = self._make_mock_app( - side_effect=RuntimeError("broker down") - ) + mock_app = self._make_mock_app(side_effect=RuntimeError("broker down")) dispatcher = ExecutionDispatcher(celery_app=mock_app) ctx = self._make_context() @@ -816,9 +788,7 @@ def test_dispatch_context_serialized_correctly( assert context_dict["executor_name"] == "agentic_table" assert context_dict["operation"] == "agentic_extraction" assert context_dict["organization_id"] == "org-42" - assert context_dict["executor_params"] == { - "schema": {"name": "str"} - } + assert context_dict["executor_params"] == {"schema": {"name": "str"}} # ---- Phase 5A: dispatch_with_callback ---- @@ -928,16 +898,12 @@ def test_dispatch_with_callback_context_serialized( executor_params={"prompt_key": "p1"}, ) - dispatcher.dispatch_with_callback( - ctx, on_success=MagicMock() - ) + dispatcher.dispatch_with_callback(ctx, on_success=MagicMock()) sent_args = mock_app.send_task.call_args context_dict = sent_args[1]["args"][0] assert context_dict["operation"] == "answer_prompt" - assert context_dict["executor_params"] == { - "prompt_key": "p1" - } + assert context_dict["executor_params"] == {"prompt_key": "p1"} def test_dispatch_with_callback_custom_task_id( self: Self, @@ -947,9 +913,7 @@ def test_dispatch_with_callback_custom_task_id( dispatcher = ExecutionDispatcher(celery_app=mock_app) ctx = self._make_context() - result = dispatcher.dispatch_with_callback( - ctx, task_id="pre-gen-id-123" - ) + result = dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") call_kwargs = mock_app.send_task.call_args assert call_kwargs[1]["task_id"] == "pre-gen-id-123" @@ -993,16 +957,12 @@ def get_env_or_die(self, env_key: str) -> str: if env_key == ToolEnv.PLATFORM_API_KEY: if not self.platform_api_key: - raise SdkError( - f"Env variable '{env_key}' is required" - ) + raise SdkError(f"Env variable '{env_key}' is required") return self.platform_api_key env_value = os.environ.get(env_key) if env_value is None or env_value == "": - raise SdkError( - f"Env variable '{env_key}' is required" - ) + raise SdkError(f"Env variable '{env_key}' is required") return env_value def stream_log( @@ -1022,9 +982,7 @@ def stream_log( py_level = _level_map.get(level, logging.INFO) logging.getLogger("executor_tool_shim").log(py_level, log) - def stream_error_and_exit( - self, message: str, err: Exception | None = None - ) -> None: + def stream_error_and_exit(self, message: str, err: Exception | None = None) -> None: raise SdkError(message, actual_err=err) @@ -1058,9 +1016,7 @@ def test_missing_env_var_raises(self: Self) -> None: with pytest.raises(SdkError, match="NONEXISTENT_VAR"): shim.get_env_or_die("NONEXISTENT_VAR") - def test_empty_env_var_raises( - self: Self, monkeypatch: pytest.MonkeyPatch - ) -> None: + def test_empty_env_var_raises(self: Self, monkeypatch: pytest.MonkeyPatch) -> None: """get_env_or_die() raises SdkError for empty env var.""" monkeypatch.setenv("EMPTY_VAR", "") shim = _MockExecutorToolShim(platform_api_key="sk-test") diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py index 22f44c1ffc..8b7789e201 100644 --- a/workers/executor/executor_tool_shim.py +++ b/workers/executor/executor_tool_shim.py @@ -98,16 +98,12 @@ def get_env_or_die(self, env_key: str) -> str: """ if env_key == ToolEnv.PLATFORM_API_KEY: if not self.platform_api_key: - raise SdkError( - f"Env variable '{env_key}' is required" - ) + raise SdkError(f"Env variable '{env_key}' is required") return self.platform_api_key env_value = os.environ.get(env_key) if env_value is None or env_value == "": - raise SdkError( - f"Env variable '{env_key}' is required" - ) + raise SdkError(f"Env variable '{env_key}' is required") return env_value def stream_log( @@ -155,9 +151,7 @@ def stream_log( exc_info=True, ) - def stream_error_and_exit( - self, message: str, err: Exception | None = None - ) -> None: + def stream_error_and_exit(self, message: str, err: Exception | None = None) -> None: """Log error and raise SdkError. Unlike the base StreamMixin which may call ``sys.exit(1)`` diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py index 902c81a66c..925cf95e5e 100644 --- a/workers/executor/executors/answer_prompt.py +++ b/workers/executor/executors/answer_prompt.py @@ -334,11 +334,7 @@ def handle_json( structured_output[prompt_key] = processed_data - if ( - enable_highlight - and metadata - and updated_highlight_data is not None - ): + if enable_highlight and metadata and updated_highlight_data is not None: metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( updated_highlight_data ) diff --git a/workers/executor/executors/file_utils.py b/workers/executor/executors/file_utils.py index 3741aa26d4..92f80d6d76 100644 --- a/workers/executor/executors/file_utils.py +++ b/workers/executor/executors/file_utils.py @@ -5,6 +5,7 @@ """ from executor.executors.constants import ExecutionSource, FileStorageKeys + from unstract.sdk1.file_storage import FileStorage from unstract.sdk1.file_storage.constants import StorageType from unstract.sdk1.file_storage.env_helper import EnvHelper diff --git a/workers/executor/executors/index.py b/workers/executor/executors/index.py index cb4de85a11..da2b68be82 100644 --- a/workers/executor/executors/index.py +++ b/workers/executor/executors/index.py @@ -20,6 +20,7 @@ InstanceIdentifiers, ProcessingOptions, ) + from unstract.sdk1.constants import LogLevel from unstract.sdk1.exceptions import SdkError, parse_litellm_err from unstract.sdk1.file_storage.impl import FileStorage diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 3c87482ee6..18ddbe0490 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -13,7 +13,8 @@ from typing import Any from executor.executor_tool_shim import ExecutorToolShim -from executor.executors.constants import ExecutionSource, IndexingConstants as IKeys +from executor.executors.constants import ExecutionSource +from executor.executors.constants import IndexingConstants as IKeys from executor.executors.dto import ( ChunkingConfig, FileInfo, @@ -22,6 +23,7 @@ ) from executor.executors.exceptions import ExtractionError, LegacyExecutorError from executor.executors.file_utils import FileUtils + from unstract.sdk1.adapters.exceptions import AdapterError from unstract.sdk1.adapters.x2text.constants import X2TextConstants from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer @@ -77,16 +79,13 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: """ # Extract log streaming info (set by tasks.py for IDE sessions). self._log_events_id: str = context.log_events_id or "" - self._log_component: dict[str, str] = getattr( - context, "_log_component", {} - ) + self._log_component: dict[str, str] = getattr(context, "_log_component", {}) handler_name = self._OPERATION_MAP.get(context.operation) if handler_name is None: return ExecutionResult.failure( error=( - f"LegacyExecutor does not support operation " - f"'{context.operation}'" + f"LegacyExecutor does not support operation " f"'{context.operation}'" ) ) @@ -105,8 +104,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: result = handler(context) elapsed = time.monotonic() - start logger.info( - "Handler %s completed in %.2fs " - "(run_id=%s success=%s)", + "Handler %s completed in %.2fs " "(run_id=%s success=%s)", handler_name, elapsed, context.run_id, @@ -160,9 +158,7 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) tags: list[str] | None = params.get(IKeys.TAGS) execution_source: str = context.execution_source - tool_exec_metadata: dict[str, Any] = params.get( - IKeys.TOOL_EXECUTION_METATADA, {} - ) + tool_exec_metadata: dict[str, Any] = params.get(IKeys.TOOL_EXECUTION_METATADA, {}) execution_data_dir: str | None = params.get(IKeys.EXECUTION_DATA_DIR) # Build adapter shim and X2Text @@ -179,8 +175,7 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: fs = FileUtils.get_fs_instance(execution_source=execution_source) logger.info( - "Starting text extraction: x2text_adapter=%s file=%s " - "run_id=%s", + "Starting text extraction: x2text_adapter=%s file=%s " "run_id=%s", x2text_instance_id, Path(file_path).name, context.run_id, @@ -279,6 +274,7 @@ def _get_indexing_deps(): Wrapped in a method so tests can mock it cleanly. """ from executor.executors.index import Index + from unstract.sdk1.embedding import EmbeddingCompat from unstract.sdk1.vector_db import VectorDB @@ -288,9 +284,7 @@ def _get_indexing_deps(): # Phase 5C — Compound IDE index handler (extract + index) # ------------------------------------------------------------------ - def _handle_ide_index( - self, context: ExecutionContext - ) -> ExecutionResult: + def _handle_ide_index(self, context: ExecutionContext) -> ExecutionResult: """Handle ``Operation.IDE_INDEX`` — compound extract then index. This compound operation combines ``_handle_extract`` and @@ -318,8 +312,7 @@ def _handle_ide_index( if not index_params: missing.append("index_params") return ExecutionResult.failure( - error=f"ide_index missing required params: " - f"{', '.join(missing)}" + error=f"ide_index missing required params: " f"{', '.join(missing)}" ) # Step 1: Extract @@ -366,9 +359,7 @@ def _handle_ide_index( # Phase 5D — Compound structure pipeline handler # ------------------------------------------------------------------ - def _handle_structure_pipeline( - self, context: ExecutionContext - ) -> ExecutionResult: + def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResult: """Handle ``Operation.STRUCTURE_PIPELINE``. Runs the full structure-tool pipeline in a single executor @@ -407,15 +398,9 @@ def _handle_structure_pipeline( pipeline_options = params.get("pipeline_options", {}) summarize_params = params.get("summarize_params") - skip_extraction = pipeline_options.get( - "skip_extraction_and_indexing", False - ) - is_summarization = pipeline_options.get( - "is_summarization_enabled", False - ) - is_single_pass = pipeline_options.get( - "is_single_pass_enabled", False - ) + skip_extraction = pipeline_options.get("skip_extraction_and_indexing", False) + is_summarization = pipeline_options.get("is_summarization_enabled", False) + is_single_pass = pipeline_options.get("is_single_pass_enabled", False) input_file_path = pipeline_options.get("input_file_path", "") source_file_name = pipeline_options.get("source_file_name", "") @@ -437,9 +422,7 @@ def _handle_structure_pipeline( extract_result = self._handle_extract(extract_ctx) if not extract_result.success: return extract_result - extracted_text = extract_result.data.get( - IKeys.EXTRACTED_TEXT, "" - ) + extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "") # ---- Step 2: Summarize (if enabled) ---- if is_summarization: @@ -466,9 +449,7 @@ def _handle_structure_pipeline( # ---- Step 4: Table settings injection ---- if not is_single_pass: outputs = answer_params.get("outputs", []) - extracted_file_path = index_template.get( - "extracted_file_path", "" - ) + extracted_file_path = index_template.get("extracted_file_path", "") for output in outputs: if "table_settings" in output: table_settings = output["table_settings"] @@ -511,16 +492,12 @@ def _handle_structure_pipeline( # Add extracted text for HITL raw view if extracted_text: - structured_output["metadata"]["extracted_text"] = ( - extracted_text - ) + structured_output["metadata"]["extracted_text"] = extracted_text # Merge index metrics if index_metrics: existing_metrics = structured_output.get("metrics", {}) - merged = self._merge_pipeline_metrics( - existing_metrics, index_metrics - ) + merged = self._merge_pipeline_metrics(existing_metrics, index_metrics) structured_output["metrics"] = merged return ExecutionResult(success=True, data=structured_output) @@ -539,31 +516,19 @@ def _run_pipeline_summarize( file_hash. """ extract_file_path = summarize_params.get("extract_file_path", "") - summarize_file_path = summarize_params.get( - "summarize_file_path", "" - ) + summarize_file_path = summarize_params.get("summarize_file_path", "") platform_api_key = summarize_params.get("platform_api_key", "") - llm_adapter_id = summarize_params.get( - "llm_adapter_instance_id", "" - ) + llm_adapter_id = summarize_params.get("llm_adapter_instance_id", "") summarize_prompt = summarize_params.get("summarize_prompt", "") prompt_keys = summarize_params.get("prompt_keys", []) outputs = answer_params.get("outputs", []) - fs = FileUtils.get_fs_instance( - execution_source=context.execution_source - ) + fs = FileUtils.get_fs_instance(execution_source=context.execution_source) # Set chunk_size=0 for all outputs when summarizing - embedding = answer_params.get("tool_settings", {}).get( - "embedding", "" - ) - vector_db = answer_params.get("tool_settings", {}).get( - "vector-db", "" - ) - x2text = answer_params.get("tool_settings", {}).get( - "x2text_adapter", "" - ) + embedding = answer_params.get("tool_settings", {}).get("embedding", "") + vector_db = answer_params.get("tool_settings", {}).get("vector-db", "") + x2text = answer_params.get("tool_settings", {}).get("x2text_adapter", "") for output in outputs: output["embedding"] = embedding output["vector-db"] = vector_db @@ -574,9 +539,7 @@ def _run_pipeline_summarize( # Check cache summarized_context = "" if fs.exists(summarize_file_path): - summarized_context = fs.read( - path=summarize_file_path, mode="r" - ) + summarized_context = fs.read(path=summarize_file_path, mode="r") if not summarized_context: # Read extracted text @@ -614,9 +577,7 @@ def _run_pipeline_summarize( ) # Update answer_params - summarize_file_hash = fs.get_hash_from_file( - path=summarize_file_path - ) + summarize_file_hash = fs.get_hash_from_file(path=summarize_file_path) answer_params["file_hash"] = summarize_file_hash answer_params["file_path"] = str(summarize_file_path) @@ -642,9 +603,7 @@ def _run_pipeline_index( file_hash = index_template.get("file_hash", "") is_highlight = index_template.get("is_highlight_enabled", False) platform_api_key = index_template.get("platform_api_key", "") - extracted_file_path = index_template.get( - "extracted_file_path", "" - ) + extracted_file_path = index_template.get("extracted_file_path", "") index_metrics: dict = {} seen_params: set = set() @@ -669,8 +628,7 @@ def _run_pipeline_index( indexing_start = datetime.datetime.now() logger.info( - "Pipeline indexing: chunk_size=%s " - "chunk_overlap=%s vector_db=%s", + "Pipeline indexing: chunk_size=%s " "chunk_overlap=%s vector_db=%s", chunk_size, chunk_overlap, vector_db, @@ -707,20 +665,14 @@ def _run_pipeline_index( index_result.error, ) - elapsed = ( - datetime.datetime.now() - indexing_start - ).total_seconds() + elapsed = (datetime.datetime.now() - indexing_start).total_seconds() output_name = output.get("name", "") - index_metrics[output_name] = { - "indexing": {"time_taken(s)": elapsed} - } + index_metrics[output_name] = {"indexing": {"time_taken(s)": elapsed}} return index_metrics @staticmethod - def _merge_pipeline_metrics( - metrics1: dict, metrics2: dict - ) -> dict: + def _merge_pipeline_metrics(metrics1: dict, metrics2: dict) -> dict: """Merge two metrics dicts, combining sub-dicts for shared keys.""" merged: dict = {} all_keys = set(metrics1) | set(metrics2) @@ -782,9 +734,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: chunk_overlap: int = params.get(IKeys.CHUNK_OVERLAP, 128) reindex: bool = params.get(IKeys.REINDEX, False) enable_highlight: bool = params.get(IKeys.ENABLE_HIGHLIGHT, False) - enable_word_confidence: bool = params.get( - IKeys.ENABLE_WORD_CONFIDENCE, False - ) + enable_word_confidence: bool = params.get(IKeys.ENABLE_WORD_CONFIDENCE, False) usage_kwargs: dict[Any, Any] = params.get(IKeys.USAGE_KWARGS, {}) tags: list[str] | None = params.get(IKeys.TAGS) execution_source: str = context.execution_source @@ -810,9 +760,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: log_events_id=self._log_events_id, component=self._log_component, ) - fs_instance = FileUtils.get_fs_instance( - execution_source=execution_source - ) + fs_instance = FileUtils.get_fs_instance(execution_source=execution_source) logger.info( "Starting indexing: chunk_size=%d chunk_overlap=%d " @@ -842,9 +790,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: fs=fs_instance, ) logger.info("Skipping indexing for chunk_size=0. Doc ID: %s", doc_id) - return ExecutionResult( - success=True, data={IKeys.DOC_ID: doc_id} - ) + return ExecutionResult(success=True, data={IKeys.DOC_ID: doc_id}) chunking_config = ChunkingConfig( chunk_size=chunk_size, chunk_overlap=chunk_overlap @@ -862,9 +808,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: chunking_config=chunking_config, processing_options=processing_options, ) - doc_id = index.generate_index_key( - file_info=file_info, fs=fs_instance - ) + doc_id = index.generate_index_key(file_info=file_info, fs=fs_instance) logger.debug("Generated index key: doc_id=%s", doc_id) shim.stream_log("Checking document index status...") @@ -901,9 +845,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: Path(file_path).name, ) shim.stream_log("Document indexing completed") - return ExecutionResult( - success=True, data={IKeys.DOC_ID: doc_id} - ) + return ExecutionResult(success=True, data={IKeys.DOC_ID: doc_id}) except Exception as e: logger.error( "Indexing failed: file=%s error=%s", @@ -931,6 +873,7 @@ def _get_prompt_deps(): from executor.executors.variable_replacement import ( VariableReplacementService, ) + from unstract.sdk1.embedding import EmbeddingCompat from unstract.sdk1.llm import LLM from unstract.sdk1.vector_db import VectorDB @@ -967,9 +910,7 @@ def _sanitize_null_values( v[k1] = None return structured_output - def _handle_answer_prompt( - self, context: ExecutionContext - ) -> ExecutionResult: + def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: """Handle ``Operation.ANSWER_PROMPT`` — multi-prompt extraction. Migrated from ``prompt_processor()`` in the prompt-service @@ -984,6 +925,8 @@ def _handle_answer_prompt( """ from executor.executors.constants import ( PromptServiceConstants as PSKeys, + ) + from executor.executors.constants import ( RetrievalStrategy, ) @@ -1000,12 +943,8 @@ def _handle_answer_prompt( doc_name = str(params.get(PSKeys.FILE_NAME, "")) log_events_id: str = params.get(PSKeys.LOG_EVENTS_ID, "") custom_data: dict[str, Any] = params.get(PSKeys.CUSTOM_DATA, {}) - execution_source = params.get( - PSKeys.EXECUTION_SOURCE, context.execution_source - ) - platform_api_key: str = params.get( - PSKeys.PLATFORM_SERVICE_API_KEY, "" - ) + execution_source = params.get(PSKeys.EXECUTION_SOURCE, context.execution_source) + platform_api_key: str = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") structured_output: dict[str, Any] = {} metadata: dict[str, Any] = { @@ -1019,8 +958,7 @@ def _handle_answer_prompt( context_retrieval_metrics: dict[str, Any] = {} logger.info( - "Starting answer_prompt: tool_id=%s prompt_count=%d " - "file=%s run_id=%s", + "Starting answer_prompt: tool_id=%s prompt_count=%d " "file=%s run_id=%s", tool_id, len(prompts), doc_name, @@ -1048,9 +986,7 @@ def _handle_answer_prompt( if highlight_cls: from executor.executors.file_utils import FileUtils - fs_instance = FileUtils.get_fs_instance( - execution_source=execution_source - ) + fs_instance = FileUtils.get_fs_instance(execution_source=execution_source) highlight_instance = highlight_cls( file_path=file_path, fs_instance=fs_instance, @@ -1095,21 +1031,17 @@ def _handle_answer_prompt( shim.stream_log(f"Processing prompt: {prompt_name}") # {{variable}} template replacement - if VariableReplacementService.is_variables_present( - prompt_text=prompt_text - ): + if VariableReplacementService.is_variables_present(prompt_text=prompt_text): is_ide = execution_source == "ide" - prompt_text = ( - VariableReplacementService.replace_variables_in_prompt( - prompt=output, - structured_output=structured_output, - log_events_id=log_events_id, - tool_id=tool_id, - prompt_name=prompt_name, - doc_name=doc_name, - custom_data=custom_data, - is_ide=is_ide, - ) + prompt_text = VariableReplacementService.replace_variables_in_prompt( + prompt=output, + structured_output=structured_output, + log_events_id=log_events_id, + tool_id=tool_id, + prompt_name=prompt_name, + doc_name=doc_name, + custom_data=custom_data, + is_ide=is_ide, ) logger.info( @@ -1170,9 +1102,7 @@ def _handle_answer_prompt( msg = f"Couldn't fetch adapter. {e}" logger.error(msg) status_code = getattr(e, "status_code", None) or 500 - raise LegacyExecutorError( - message=msg, code=status_code - ) from e + raise LegacyExecutorError(message=msg, code=status_code) from e # TABLE type is handled by TableExtractorExecutor (separate # queue). LINE_ITEM is not supported. The backend dispatcher @@ -1199,24 +1129,19 @@ def _handle_answer_prompt( valid_strategies = {s.value for s in RetrievalStrategy} if retrieval_strategy in valid_strategies: - shim.stream_log( - f"Retrieving context for: {prompt_name}" - ) + shim.stream_log(f"Retrieving context for: {prompt_name}") logger.info( - "Performing retrieval: prompt=%s strategy=%s " - "chunk_size=%d", + "Performing retrieval: prompt=%s strategy=%s " "chunk_size=%d", prompt_name, retrieval_strategy, chunk_size, ) if chunk_size == 0: - context_list = ( - RetrievalService.retrieve_complete_context( - execution_source=execution_source, - file_path=file_path, - context_retrieval_metrics=context_retrieval_metrics, - prompt_key=prompt_name, - ) + context_list = RetrievalService.retrieve_complete_context( + execution_source=execution_source, + file_path=file_path, + context_retrieval_metrics=context_retrieval_metrics, + prompt_key=prompt_name, ) else: context_list = RetrievalService.run_retrieval( @@ -1235,9 +1160,7 @@ def _handle_answer_prompt( ) # Run prompt with retrieved context - shim.stream_log( - f"Running LLM completion for: {prompt_name}" - ) + shim.stream_log(f"Running LLM completion for: {prompt_name}") answer = AnswerPromptService.construct_and_run_prompt( tool_settings=tool_settings, output=output, @@ -1251,8 +1174,7 @@ def _handle_answer_prompt( ) else: logger.warning( - "Skipping retrieval: invalid strategy=%s " - "for prompt=%s", + "Skipping retrieval: invalid strategy=%s " "for prompt=%s", retrieval_strategy, prompt_name, ) @@ -1280,13 +1202,9 @@ def _handle_answer_prompt( challenge_cls = ExecutorPluginLoader.get("challenge") if challenge_cls: - challenge_llm_id = tool_settings.get( - PSKeys.CHALLENGE_LLM - ) + challenge_llm_id = tool_settings.get(PSKeys.CHALLENGE_LLM) if challenge_llm_id: - shim.stream_log( - f"Running challenge for: {prompt_name}" - ) + shim.stream_log(f"Running challenge for: {prompt_name}") challenge_llm = LLM( adapter_instance_id=challenge_llm_id, tool=shim, @@ -1322,16 +1240,12 @@ def _handle_answer_prompt( evaluator_cls = ExecutorPluginLoader.get("evaluation") if evaluator_cls: - shim.stream_log( - f"Running evaluation for: {prompt_name}" - ) + shim.stream_log(f"Running evaluation for: {prompt_name}") evaluator = evaluator_cls( query=output.get(PSKeys.COMBINED_PROMPT, ""), context="\n".join(context_list), response=structured_output.get(prompt_name), - reference_answer=output.get( - "reference_answer", "" - ), + reference_answer=output.get("reference_answer", ""), prompt=output, structured_output=structured_output, platform_key=platform_api_key, @@ -1421,9 +1335,7 @@ def _apply_type_conversion( f"characters. No explanation is required. " f"If you cannot extract the number, output 0." ) - answer = AnswerPromptService.run_completion( - llm=llm, prompt=prompt - ) + answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) try: structured_output[prompt_name] = float(answer) except Exception: @@ -1440,9 +1352,7 @@ def _apply_type_conversion( f"variable. No explanation is required. If you cannot " f'extract the email, output "NA".' ) - answer = AnswerPromptService.run_completion( - llm=llm, prompt=prompt - ) + answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer elif output_type == PSKeys.DATE: @@ -1455,12 +1365,10 @@ def _apply_type_conversion( f"The date should be in ISO date time format. " f"No explanation is required. The date should be " f"directly assignable to a date variable. " - f'If you cannot convert the string into a date, ' + f"If you cannot convert the string into a date, " f'output "NA".' ) - answer = AnswerPromptService.run_completion( - llm=llm, prompt=prompt - ) + answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer elif output_type == PSKeys.BOOLEAN: @@ -1473,9 +1381,7 @@ def _apply_type_conversion( f"If the context is trying to convey that the answer " f'is true, then return "yes", else return "no".' ) - answer = AnswerPromptService.run_completion( - llm=llm, prompt=prompt - ) + answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer.lower() == "yes" elif output_type == PSKeys.JSON: @@ -1484,9 +1390,7 @@ def _apply_type_conversion( structured_output=structured_output, output=output, llm=llm, - enable_highlight=tool_settings.get( - PSKeys.ENABLE_HIGHLIGHT, False - ), + enable_highlight=tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False), enable_word_confidence=tool_settings.get( PSKeys.ENABLE_WORD_CONFIDENCE, False ), @@ -1519,15 +1423,12 @@ def _handle_single_pass_extraction( {"output": dict, "metadata": dict, "metrics": dict} """ logger.info( - "single_pass_extraction delegating to answer_prompt " - "(run_id=%s)", + "single_pass_extraction delegating to answer_prompt " "(run_id=%s)", context.run_id, ) return self._handle_answer_prompt(context) - def _handle_summarize( - self, context: ExecutionContext - ) -> ExecutionResult: + def _handle_summarize(self, context: ExecutionContext) -> ExecutionResult: """Handle ``Operation.SUMMARIZE`` — document summarization. Called by the structure tool when ``summarize_as_source`` is @@ -1545,7 +1446,7 @@ def _handle_summarize( Returns: ExecutionResult with ``data`` containing:: - {"data": str} # summarized text + {"data": str} # summarized text """ from executor.executors.constants import PromptServiceConstants as PSKeys @@ -1555,18 +1456,14 @@ def _handle_summarize( summarize_prompt: str = params.get("summarize_prompt", "") doc_context: str = params.get(PSKeys.CONTEXT, "") prompt_keys: list[str] = params.get("prompt_keys", []) - platform_api_key: str = params.get( - PSKeys.PLATFORM_SERVICE_API_KEY, "" - ) + platform_api_key: str = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") if not llm_adapter_id: return ExecutionResult.failure( error="Missing required param: llm_adapter_instance_id" ) if not doc_context: - return ExecutionResult.failure( - error="Missing required param: context" - ) + return ExecutionResult.failure(error="Missing required param: context") logger.info( "Starting summarization: prompt_keys=%s run_id=%s", @@ -1577,12 +1474,9 @@ def _handle_summarize( # Build the summarize prompt prompt = f"{summarize_prompt}\n\n" if prompt_keys: - prompt += ( - f"Focus on these fields: {', '.join(prompt_keys)}\n\n" - ) + prompt += f"Focus on these fields: {', '.join(prompt_keys)}\n\n" prompt += ( - f"Context:\n---------------\n{doc_context}\n" - f"-----------------\n\nSummary:" + f"Context:\n---------------\n{doc_context}\n" f"-----------------\n\nSummary:" ) shim = ExecutorToolShim( @@ -1604,24 +1498,17 @@ def _handle_summarize( from executor.executors.answer_prompt import AnswerPromptService shim.stream_log("Running document summarization...") - summary = AnswerPromptService.run_completion( - llm=llm, prompt=prompt - ) - logger.info( - "Summarization completed: run_id=%s", context.run_id - ) + summary = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + logger.info("Summarization completed: run_id=%s", context.run_id) shim.stream_log("Summarization completed") return ExecutionResult( success=True, data={"data": summary}, ) except Exception as e: - logger.error( - "Summarization failed: error=%s", str(e) - ) + logger.error("Summarization failed: error=%s", str(e)) status_code = getattr(e, "status_code", None) or 500 raise LegacyExecutorError( message=f"Error during summarization: {e}", code=status_code, ) from e - diff --git a/workers/executor/executors/plugins/loader.py b/workers/executor/executors/plugins/loader.py index 35c29f0841..7a4ed25da5 100644 --- a/workers/executor/executors/plugins/loader.py +++ b/workers/executor/executors/plugins/loader.py @@ -12,7 +12,6 @@ """ import logging -from typing import Any logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/plugins/protocols.py b/workers/executor/executors/plugins/protocols.py index 0e81edb22a..4a8ebd7d5f 100644 --- a/workers/executor/executors/plugins/protocols.py +++ b/workers/executor/executors/plugins/protocols.py @@ -20,9 +20,7 @@ def __init__( **kwargs: Any, ) -> None: ... - def run( - self, response: str, is_json: bool = False, **kwargs: Any - ) -> dict: ... + def run(self, response: str, is_json: bool = False, **kwargs: Any) -> dict: ... def get_highlight_data(self) -> Any: ... diff --git a/workers/executor/executors/plugins/text_processor.py b/workers/executor/executors/plugins/text_processor.py index 72eb17dd60..472d9dc828 100644 --- a/workers/executor/executors/plugins/text_processor.py +++ b/workers/executor/executors/plugins/text_processor.py @@ -16,6 +16,4 @@ def add_hex_line_numbers(text: str) -> str: """ lines = text.split("\n") hex_width = max(len(hex(len(lines))) - 2, 1) - return "\n".join( - f"0x{i:0{hex_width}X}: {line}" for i, line in enumerate(lines) - ) + return "\n".join(f"0x{i:0{hex_width}X}: {line}" for i, line in enumerate(lines)) diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py index 09adcc7739..3527458494 100644 --- a/workers/executor/executors/retrievers/automerging.py +++ b/workers/executor/executors/retrievers/automerging.py @@ -1,12 +1,11 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.retrievers import AutoMergingRetriever as LlamaAutoMergingRetriever from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py index 313f28caba..476e5fa6da 100644 --- a/workers/executor/executors/retrievers/fusion.py +++ b/workers/executor/executors/retrievers/fusion.py @@ -1,12 +1,11 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.retrievers import QueryFusionRetriever from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py index d58db0f74a..dfaaff1db3 100644 --- a/workers/executor/executors/retrievers/keyword_table.py +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -1,12 +1,11 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.indices.keyword_table import KeywordTableIndex from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py index b520d26ea0..d1c8d0e786 100644 --- a/workers/executor/executors/retrievers/recursive.py +++ b/workers/executor/executors/retrievers/recursive.py @@ -1,12 +1,11 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.retrievers import RecursiveRetriever from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py index ae7a99dfdf..b995dc9043 100644 --- a/workers/executor/executors/retrievers/router.py +++ b/workers/executor/executors/retrievers/router.py @@ -1,14 +1,13 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.query_engine import RouterQueryEngine from llama_index.core.selectors import LLMSingleSelector from llama_index.core.tools import QueryEngineTool, ToolMetadata from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/simple.py b/workers/executor/executors/retrievers/simple.py index 71ed7e6af5..5e533e72b3 100644 --- a/workers/executor/executors/retrievers/simple.py +++ b/workers/executor/executors/retrievers/simple.py @@ -1,11 +1,10 @@ import logging import time +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core import VectorStoreIndex from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py index 2b4665465b..729abbb2cd 100644 --- a/workers/executor/executors/retrievers/subquestion.py +++ b/workers/executor/executors/retrievers/subquestion.py @@ -1,12 +1,11 @@ import logging +from executor.executors.exceptions import RetrievalError +from executor.executors.retrievers.base_retriever import BaseRetriever from llama_index.core.query_engine import SubQuestionQueryEngine from llama_index.core.schema import QueryBundle from llama_index.core.tools import QueryEngineTool, ToolMetadata -from executor.executors.exceptions import RetrievalError -from executor.executors.retrievers.base_retriever import BaseRetriever - logger = logging.getLogger(__name__) diff --git a/workers/executor/executors/variable_replacement.py b/workers/executor/executors/variable_replacement.py index bd72d42e8c..cca158cba0 100644 --- a/workers/executor/executors/variable_replacement.py +++ b/workers/executor/executors/variable_replacement.py @@ -11,10 +11,9 @@ from typing import Any import requests as pyrequests -from requests.exceptions import RequestException - from executor.executors.constants import VariableConstants, VariableType from executor.executors.exceptions import CustomDataError, LegacyExecutorError +from requests.exceptions import RequestException logger = logging.getLogger(__name__) @@ -23,6 +22,7 @@ # VariableReplacementHelper — low-level replacement logic # --------------------------------------------------------------------------- + class VariableReplacementHelper: @staticmethod def replace_static_variable( @@ -48,8 +48,7 @@ def check_static_variable_run_status( output = structure_output[variable] except KeyError: logger.warning( - "Prompt with %s is not executed yet. " - "Unable to replace the variable", + "Prompt with %s is not executed yet. " "Unable to replace the variable", variable, ) return output @@ -188,6 +187,7 @@ def fetch_dynamic_variable_value(url: str, data: str) -> Any: # VariableReplacementService — high-level orchestration # --------------------------------------------------------------------------- + class VariableReplacementService: @staticmethod def is_variables_present(prompt_text: str) -> bool: diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 00ee103751..759a463d67 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -5,10 +5,7 @@ ExecutionOrchestrator, and returns an ExecutionResult dict. """ -import logging - from celery import shared_task - from shared.enums.task_enums import TaskName from shared.infrastructure.logging import WorkerLogger @@ -28,9 +25,7 @@ max_retries=3, retry_jitter=True, ) -def execute_extraction( - self, execution_context_dict: dict -) -> dict: +def execute_extraction(self, execution_context_dict: dict) -> dict: """Execute an extraction operation via the executor framework. This is the single Celery task entry point for all extraction @@ -59,9 +54,7 @@ def execute_extraction( try: context = ExecutionContext.from_dict(execution_context_dict) except (KeyError, ValueError) as exc: - logger.error( - "Invalid execution context: %s", exc, exc_info=True - ) + logger.error("Invalid execution context: %s", exc, exc_info=True) return ExecutionResult.failure( error=f"Invalid execution context: {exc}" ).to_dict() @@ -86,9 +79,7 @@ def execute_extraction( context._log_component = { "tool_id": answer_params.get("tool_id", ""), "run_id": context.run_id, - "doc_name": str( - pipeline_opts.get("source_file_name", "") - ), + "doc_name": str(pipeline_opts.get("source_file_name", "")), "operation": context.operation, } elif context.operation in ("table_extract", "smart_table_extract"): @@ -112,8 +103,7 @@ def execute_extraction( result = orchestrator.execute(context) logger.info( - "execute_extraction complete: " - "celery_task_id=%s request_id=%s success=%s", + "execute_extraction complete: " "celery_task_id=%s request_id=%s success=%s", self.request.id, context.request_id, result.success, diff --git a/workers/executor/worker.py b/workers/executor/worker.py index 65c30db98a..4b80c089da 100644 --- a/workers/executor/worker.py +++ b/workers/executor/worker.py @@ -64,14 +64,11 @@ def healthcheck(self): "status": "healthy", "worker_type": "executor", "task_id": self.request.id, - "worker_name": ( - config.worker_name if config else "executor-worker" - ), + "worker_name": (config.worker_name if config else "executor-worker"), } # Import tasks so shared_task definitions bind to this app. -import executor.tasks # noqa: E402, F401 - # Import executors to trigger @ExecutorRegistry.register at import time. import executor.executors # noqa: E402, F401 +import executor.tasks # noqa: E402, F401 diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index a4176edb2b..0ed3d1f9d7 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -23,7 +23,8 @@ from file_processing.worker import app from shared.enums.task_enums import TaskName -from unstract.sdk1.constants import MetadataKey, ToolEnv, UsageKwargs + +from unstract.sdk1.constants import ToolEnv, UsageKwargs from unstract.sdk1.execution.context import ExecutionContext from unstract.sdk1.execution.dispatcher import ExecutionDispatcher from unstract.sdk1.execution.result import ExecutionResult @@ -40,6 +41,7 @@ # These are the keys used in tool_metadata and payload dicts. # ----------------------------------------------------------------------- + class _SK: """SettingsKeys subset needed by the structure tool task.""" @@ -94,9 +96,7 @@ class _SK: # ----------------------------------------------------------------------- -def _apply_profile_overrides( - tool_metadata: dict, profile_data: dict -) -> list[str]: +def _apply_profile_overrides(tool_metadata: dict, profile_data: dict) -> list[str]: """Apply profile overrides to tool metadata. Standalone version of StructureTool._apply_profile_overrides. @@ -153,9 +153,7 @@ def _override_section( new_value = profile_data[profile_key] if old_value != new_value: section[section_key] = new_value - change_desc = ( - f"{section_name}.{section_key}: {old_value} -> {new_value}" - ) + change_desc = f"{section_name}.{section_key}: {old_value} -> {new_value}" changes.append(change_desc) logger.info("Overrode %s", change_desc) return changes @@ -206,9 +204,7 @@ def execute_structure_tool(self, params: dict) -> dict: return _execute_structure_tool_impl(params) except Exception as e: logger.error("Structure tool task failed: %s", e, exc_info=True) - return ExecutionResult.failure( - error=f"Structure tool failed: {e}" - ).to_dict() + return ExecutionResult.failure(error=f"Structure tool failed: {e}").to_dict() def _execute_structure_tool_impl(params: dict) -> dict: @@ -245,16 +241,10 @@ def _execute_structure_tool_impl(params: dict) -> dict: fs = _get_file_storage() # ---- Step 2: Fetch tool metadata ---- - prompt_registry_id = tool_instance_metadata.get( - _SK.PROMPT_REGISTRY_ID, "" - ) - logger.info( - "Fetching exported tool with UUID '%s'", prompt_registry_id - ) + prompt_registry_id = tool_instance_metadata.get(_SK.PROMPT_REGISTRY_ID, "") + logger.info("Fetching exported tool with UUID '%s'", prompt_registry_id) - tool_metadata, is_agentic = _fetch_tool_metadata( - platform_helper, prompt_registry_id - ) + tool_metadata, is_agentic = _fetch_tool_metadata(platform_helper, prompt_registry_id) # ---- Route agentic vs regular ---- if is_agentic: @@ -273,17 +263,13 @@ def _execute_structure_tool_impl(params: dict) -> dict: ) # ---- Step 3: Profile overrides ---- - _handle_profile_overrides( - exec_metadata, platform_helper, tool_metadata - ) + _handle_profile_overrides(exec_metadata, platform_helper, tool_metadata) # ---- Extract settings from tool_metadata ---- settings = tool_instance_metadata is_challenge_enabled = settings.get(_SK.ENABLE_CHALLENGE, False) is_summarization_enabled = settings.get(_SK.SUMMARIZE_AS_SOURCE, False) - is_single_pass_enabled = settings.get( - _SK.SINGLE_PASS_EXTRACTION_MODE, False - ) + is_single_pass_enabled = settings.get(_SK.SINGLE_PASS_EXTRACTION_MODE, False) challenge_llm = settings.get(_SK.CHALLENGE_LLM_ADAPTER_ID, "") is_highlight_enabled = settings.get(_SK.ENABLE_HIGHLIGHT, False) @@ -311,8 +297,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: ) if skip_extraction_and_indexing: logger.info( - "Skipping extraction and indexing for Excel table " - "with valid JSON schema" + "Skipping extraction and indexing for Excel table " "with valid JSON schema" ) # ---- Step 5: Build pipeline params ---- @@ -371,15 +356,9 @@ def _execute_structure_tool_impl(params: dict) -> dict: prompt_keys = [o[_SK.NAME] for o in outputs] summarize_params = { "llm_adapter_instance_id": tool_settings[_SK.LLM], - "summarize_prompt": tool_settings.get( - _SK.SUMMARIZE_PROMPT, "" - ), - "extract_file_path": str( - execution_run_data_folder / _SK.EXTRACT - ), - "summarize_file_path": str( - execution_run_data_folder / _SK.SUMMARIZE - ), + "summarize_prompt": tool_settings.get(_SK.SUMMARIZE_PROMPT, ""), + "extract_file_path": str(execution_run_data_folder / _SK.EXTRACT), + "summarize_file_path": str(execution_run_data_folder / _SK.SUMMARIZE), "platform_api_key": platform_service_api_key, "prompt_keys": prompt_keys, } @@ -409,9 +388,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: "summarize_params": summarize_params, }, ) - pipeline_result = dispatcher.dispatch( - pipeline_ctx, timeout=EXECUTOR_TIMEOUT - ) + pipeline_result = dispatcher.dispatch(pipeline_ctx, timeout=EXECUTOR_TIMEOUT) if not pipeline_result.success: return pipeline_result.to_dict() @@ -420,24 +397,17 @@ def _execute_structure_tool_impl(params: dict) -> dict: # ---- Step 7: Write output files ---- # (metadata/metrics merging already done by executor pipeline) try: - output_path = ( - Path(output_dir_path) - / f"{Path(source_file_name).stem}.json" - ) + output_path = Path(output_dir_path) / f"{Path(source_file_name).stem}.json" logger.info("Writing output to %s", output_path) fs.json_dump(path=output_path, data=structured_output) logger.info("Output written successfully to workflow storage") except (OSError, json.JSONDecodeError) as e: - return ExecutionResult.failure( - error=f"Error writing output file: {e}" - ).to_dict() + return ExecutionResult.failure(error=f"Error writing output file: {e}").to_dict() # Write tool result to METADATA.json _write_tool_result(fs, execution_data_dir, structured_output) - return ExecutionResult( - success=True, data=structured_output - ).to_dict() + return ExecutionResult(success=True, data=structured_output).to_dict() # ----------------------------------------------------------------------- @@ -464,9 +434,7 @@ def _get_file_storage(): return FileSystem(FileStorageType.WORKFLOW_EXECUTION).get_file_storage() -def _fetch_tool_metadata( - platform_helper, prompt_registry_id: str -) -> tuple[dict, bool]: +def _fetch_tool_metadata(platform_helper, prompt_registry_id: str) -> tuple[dict, bool]: """Fetch tool metadata from platform, trying prompt studio then agentic. Returns: @@ -481,9 +449,7 @@ def _fetch_tool_metadata( prompt_registry_id=prompt_registry_id ) except Exception as e: - logger.info( - "Not found as prompt studio project, trying agentic: %s", e - ) + logger.info("Not found as prompt studio project, trying agentic: %s", e) if exported_tool and _SK.TOOL_METADATA in exported_tool: tool_metadata = exported_tool[_SK.TOOL_METADATA] @@ -524,9 +490,7 @@ def _handle_profile_overrides( try: llm_profile = platform_helper.get_llm_profile(llm_profile_id) if llm_profile: - profile_name = llm_profile.get( - "profile_name", llm_profile_id - ) + profile_name = llm_profile.get("profile_name", llm_profile_id) logger.info( "Applying profile overrides from profile: %s", profile_name, @@ -538,13 +502,9 @@ def _handle_profile_overrides( "; ".join(changes), ) else: - logger.info( - "Profile overrides applied - no changes needed" - ) + logger.info("Profile overrides applied - no changes needed") except Exception as e: - raise RuntimeError( - f"Error applying profile overrides: {e}" - ) from e + raise RuntimeError(f"Error applying profile overrides: {e}") from e def _run_agentic_extraction( @@ -578,15 +538,11 @@ def _run_agentic_extraction( "tool_instance_metadata": tool_instance_metadata, }, ) - agentic_result = dispatcher.dispatch( - agentic_ctx, timeout=EXECUTOR_TIMEOUT - ) + agentic_result = dispatcher.dispatch(agentic_ctx, timeout=EXECUTOR_TIMEOUT) return agentic_result.to_dict() -def _write_tool_result( - fs: Any, execution_data_dir: str, data: dict -) -> None: +def _write_tool_result(fs: Any, execution_data_dir: str, data: dict) -> None: """Write tool result to METADATA.json (matches BaseTool.write_tool_result).""" try: metadata_path = Path(execution_data_dir) / "METADATA.json" diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py index 04934aea1a..53dfd4d79f 100644 --- a/workers/tests/test_answer_prompt.py +++ b/workers/tests/test_answer_prompt.py @@ -11,11 +11,8 @@ from executor.executors.constants import ( PromptServiceConstants as PSKeys, - RetrievalStrategy, ) -from executor.executors.exceptions import LegacyExecutorError from unstract.sdk1.execution.context import ExecutionContext, Operation -from unstract.sdk1.execution.result import ExecutionResult # --------------------------------------------------------------------------- diff --git a/workers/tests/test_legacy_executor_extract.py b/workers/tests/test_legacy_executor_extract.py index a8218f80cb..0711d2255a 100644 --- a/workers/tests/test_legacy_executor_extract.py +++ b/workers/tests/test_legacy_executor_extract.py @@ -20,11 +20,10 @@ import pytest from executor.executors.constants import ( - ExecutionSource, FileStorageKeys, IndexingConstants as IKeys, ) -from executor.executors.exceptions import ExtractionError, LegacyExecutorError +from executor.executors.exceptions import LegacyExecutorError from unstract.sdk1.adapters.x2text.constants import X2TextConstants from unstract.sdk1.adapters.x2text.dto import ( TextExtractionMetadata, diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py index 52fa92e127..f2d9935f9b 100644 --- a/workers/tests/test_legacy_executor_scaffold.py +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -16,7 +16,6 @@ import pytest from unstract.sdk1.execution.context import ExecutionContext, Operation -from unstract.sdk1.execution.orchestrator import ExecutionOrchestrator from unstract.sdk1.execution.registry import ExecutorRegistry from unstract.sdk1.execution.result import ExecutionResult diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index 37d1e7d327..c20c2c04ef 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -13,7 +13,6 @@ from unittest.mock import MagicMock, patch -import pytest from unstract.sdk1.constants import LogLevel from unstract.sdk1.execution.context import ExecutionContext diff --git a/workers/tests/test_phase2h.py b/workers/tests/test_phase2h.py index 5d41ee1e15..cf02c767b2 100644 --- a/workers/tests/test_phase2h.py +++ b/workers/tests/test_phase2h.py @@ -10,7 +10,7 @@ import pytest import requests as real_requests -from executor.executors.constants import VariableConstants, VariableType +from executor.executors.constants import VariableType from executor.executors.exceptions import CustomDataError, LegacyExecutorError from executor.executors.postprocessor import ( _validate_structured_output, diff --git a/workers/tests/test_phase5d.py b/workers/tests/test_phase5d.py index a61403ee2e..c5b0a0640a 100644 --- a/workers/tests/test_phase5d.py +++ b/workers/tests/test_phase5d.py @@ -5,8 +5,6 @@ executor invocation. """ -import datetime -from pathlib import Path from unittest.mock import MagicMock, patch import pytest diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index ec70153185..1bcd61f36e 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -7,14 +7,11 @@ are assembled and the result is written to filesystem. """ -import json -from pathlib import Path from unittest.mock import MagicMock, patch import pytest from shared.enums.task_enums import TaskName -from unstract.sdk1.execution.context import ExecutionContext from unstract.sdk1.execution.result import ExecutionResult # --------------------------------------------------------------------------- @@ -897,4 +894,3 @@ def test_should_skip_extraction_with_json_schema(self): _should_skip_extraction_for_smart_table("file.xlsx", outputs) is True ) - diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py index bce773dec0..f57ef0af28 100644 --- a/workers/tests/test_sanity_phase4.py +++ b/workers/tests/test_sanity_phase4.py @@ -16,16 +16,14 @@ All tests use execution_source="ide" to match the real IDE path. """ -import json from unittest.mock import MagicMock, patch import pytest from executor.executors.constants import ( - IndexingConstants as IKeys, PromptServiceConstants as PSKeys, ) -from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.context import ExecutionContext from unstract.sdk1.execution.dispatcher import ExecutionDispatcher from unstract.sdk1.execution.registry import ExecutorRegistry from unstract.sdk1.execution.result import ExecutionResult diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py index 62d534d04c..a7da15d1fb 100644 --- a/workers/tests/test_sanity_phase5.py +++ b/workers/tests/test_sanity_phase5.py @@ -14,7 +14,6 @@ import pytest from executor.executors.constants import ( - IndexingConstants as IKeys, PromptServiceConstants as PSKeys, ) from unstract.sdk1.execution.context import ExecutionContext, Operation diff --git a/workers/tests/test_sanity_phase6c.py b/workers/tests/test_sanity_phase6c.py index 87a55d309c..32dbb2bb39 100644 --- a/workers/tests/test_sanity_phase6c.py +++ b/workers/tests/test_sanity_phase6c.py @@ -10,7 +10,7 @@ 7. Highlight metadata populated when plugin provides data via process_text """ -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch import pytest from executor.executors.answer_prompt import AnswerPromptService diff --git a/workers/tests/test_sanity_phase6d.py b/workers/tests/test_sanity_phase6d.py index 0465abcd59..7dee6226a8 100644 --- a/workers/tests/test_sanity_phase6d.py +++ b/workers/tests/test_sanity_phase6d.py @@ -14,7 +14,7 @@ 11. Challenge mutates structured_output (via mock) """ -from unittest.mock import MagicMock, patch, call +from unittest.mock import MagicMock, patch import pytest from executor.executors.answer_prompt import AnswerPromptService diff --git a/workers/tests/test_sanity_phase6e.py b/workers/tests/test_sanity_phase6e.py index 5a33de50a5..85d4d60d65 100644 --- a/workers/tests/test_sanity_phase6e.py +++ b/workers/tests/test_sanity_phase6e.py @@ -9,9 +9,8 @@ 6. Cloud executor entry point name matches pyproject.toml """ -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock -import pytest from unstract.sdk1.execution.context import ExecutionContext, Operation from unstract.sdk1.execution.dispatcher import ExecutionDispatcher @@ -40,7 +39,6 @@ def test_table_extract_in_operation_values(self): class TestTasksLogComponent: def test_table_extract_log_component(self): """tasks.py builds correct log_component for table_extract.""" - from executor.tasks import execute_extraction # Build a mock context dict ctx_dict = { @@ -86,7 +84,6 @@ def test_table_extract_log_component(self): class TestTableExtractorExecutorRegistration: def test_mock_table_executor_discovered_via_entry_point(self): """Simulate cloud executor discovery via entry point.""" - from executor.executors.plugins.loader import ExecutorPluginLoader from unstract.sdk1.execution.executor import BaseExecutor # Create a mock TableExtractorExecutor diff --git a/workers/tests/test_sanity_phase6f.py b/workers/tests/test_sanity_phase6f.py index 8eb0636792..4a8432f6ef 100644 --- a/workers/tests/test_sanity_phase6f.py +++ b/workers/tests/test_sanity_phase6f.py @@ -11,7 +11,6 @@ from unittest.mock import MagicMock -import pytest from unstract.sdk1.execution.context import ExecutionContext, Operation from unstract.sdk1.execution.dispatcher import ExecutionDispatcher diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py index e93e893a8e..73bb738911 100644 --- a/workers/tests/test_sanity_phase6g.py +++ b/workers/tests/test_sanity_phase6g.py @@ -12,7 +12,6 @@ from unittest.mock import MagicMock -import pytest from unstract.sdk1.execution.context import ExecutionContext, Operation from unstract.sdk1.execution.dispatcher import ExecutionDispatcher diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py index 570fba004c..1c43b3d78b 100644 --- a/workers/tests/test_sanity_phase6h.py +++ b/workers/tests/test_sanity_phase6h.py @@ -200,7 +200,6 @@ def test_legacy_returns_failure_for_agentic_summarize(self): class TestStructureToolAgenticRouting: def test_structure_tool_dispatches_agentic_extract(self): """Verify _run_agentic_extraction sends executor_name='agentic'.""" - from unittest.mock import patch from file_processing.structure_tool_task import _run_agentic_extraction diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py index 7aaa553e1b..2fecc76713 100644 --- a/workers/tests/test_usage.py +++ b/workers/tests/test_usage.py @@ -12,7 +12,6 @@ from unittest.mock import MagicMock, patch -import pytest from executor.executors.usage import UsageHelper From a3dc91236432dcefc563e275e3bef41bcbab9a9b Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 14:24:40 +0530 Subject: [PATCH 12/64] Removed redundant envs --- .../src/components/custom-tools/prompt-card/PromptCard.jsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx index 13bdc08083..8c481d4c9c 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx @@ -76,11 +76,7 @@ const PromptCard = memo( (item?.component?.prompt_id === promptDetailsState?.prompt_id || item?.component?.prompt_key === promptKey || item?.component?.tool_id === details?.tool_id) && -<<<<<<< HEAD - (item?.level === "INFO" || item?.level === "ERROR") -======= (item?.level === "INFO" || item?.level === "ERROR"), ->>>>>>> 2f4f2dc874e8d6e080175c5d3b2cc2f2aa4b87e7 ); // If no matching message is found, return early From 0216b59f054164db80e2569e3afdf75e88c4d3d7 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 18:09:23 +0530 Subject: [PATCH 13/64] Removed redundant envs --- workers/run-worker-docker.sh | 2 +- workers/shared/workflow/execution/service.py | 18 ++++++- workers/tests/test_sanity_phase3.py | 51 ++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index 16668a919e..fdc4e81315 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -46,7 +46,7 @@ declare -A PLUGGABLE_WORKERS=() # Worker queue mappings declare -A WORKER_QUEUES=( ["api_deployment"]="celery_api_deployments" - ["general"]="celery" + ["general"]="celery,celery_prompt_studio" ["file_processing"]="file_processing,api_file_processing" ["callback"]="file_processing_callback,api_file_processing_callback" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" diff --git a/workers/shared/workflow/execution/service.py b/workers/shared/workflow/execution/service.py index 96ea110bc2..e4250ec881 100644 --- a/workers/shared/workflow/execution/service.py +++ b/workers/shared/workflow/execution/service.py @@ -991,12 +991,26 @@ def _build_and_execute_workflow( def _is_structure_tool_workflow( self, execution_service: WorkflowExecutionService ) -> bool: - """Check if workflow uses the structure tool.""" + """Check if workflow uses the structure tool. + + Compares the base image name (last path component without tag) + to handle registry prefixes like gcr.io/project/tool-structure + vs the default unstract/tool-structure. + """ structure_image = os.environ.get( "STRUCTURE_TOOL_IMAGE_NAME", "unstract/tool-structure" ) + structure_base = structure_image.split(":")[0].rsplit("/", 1)[-1] for ti in execution_service.tool_instances: - if ti.image_name == structure_image: + ti_name = str(ti.image_name) if ti.image_name else "" + if not ti_name: + continue + ti_base = ti_name.split(":")[0].rsplit("/", 1)[-1] + if ti_name == structure_image or ti_base == structure_base: + logger.info( + "Detected structure tool workflow " + f"(image={ti_name}, expected={structure_image})" + ) return True return False diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index 1bcd61f36e..5a40ba98be 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -800,6 +800,57 @@ def test_custom_structure_image_name(self): result = service._is_structure_tool_workflow(mock_exec_service) assert result is True + def test_registry_prefix_match(self): + """Image from backend with registry prefix matches default base name.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + # Worker uses default "unstract/tool-structure", but backend sends + # image with registry prefix (common in K8s deployments) + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "gcr.io/my-project/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + def test_registry_prefix_with_tag_match(self): + """Image with registry prefix and tag still matches.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "us.gcr.io/prod/tool-structure:v1.2.3" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + + @patch.dict("os.environ", {"STRUCTURE_TOOL_IMAGE_NAME": "gcr.io/prod/tool-structure"}) + def test_env_has_registry_prefix_instance_has_different_prefix(self): + """Both env and instance have different registry prefixes, same base.""" + from shared.workflow.execution.service import ( + WorkerWorkflowExecutionService, + ) + + service = WorkerWorkflowExecutionService() + + mock_exec_service = MagicMock() + ti = MagicMock() + ti.image_name = "ecr.aws/other/tool-structure" + mock_exec_service.tool_instances = [ti] + + result = service._is_structure_tool_workflow(mock_exec_service) + assert result is True + class TestStructureToolParamsPassthrough: """Task receives correct params from WorkerWorkflowExecutionService.""" From db81b9dcfa6543b1b083b4bcb570e2fa5671f13d Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 19:53:49 +0530 Subject: [PATCH 14/64] Removed redundant envs --- backend/backend/celery_config.py | 17 ++++------------- workers/run-worker-docker.sh | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/backend/backend/celery_config.py b/backend/backend/celery_config.py index 9ffe71464f..5bac942d10 100644 --- a/backend/backend/celery_config.py +++ b/backend/backend/celery_config.py @@ -32,16 +32,7 @@ class CeleryConfig: task_acks_late = True - # Route long-running Prompt Studio IDE tasks to a dedicated queue - # so they don't compete with beat/logging/API-deployment tasks. - task_routes = { - "prompt_studio_index_document": {"queue": "celery_prompt_studio"}, - "prompt_studio_fetch_response": {"queue": "celery_prompt_studio"}, - "prompt_studio_single_pass": {"queue": "celery_prompt_studio"}, - # Phase 5B: Fire-and-forget callback tasks (sub-second, run on - # same queue as the old blocking tasks they replace). - "ide_index_complete": {"queue": "celery_prompt_studio"}, - "ide_index_error": {"queue": "celery_prompt_studio"}, - "ide_prompt_complete": {"queue": "celery_prompt_studio"}, - "ide_prompt_error": {"queue": "celery_prompt_studio"}, - } + # Prompt Studio IDE tasks run on the default "celery" queue. + # The callback tasks (ide_*) are sub-second ORM writes + Socket.IO + # emits. The legacy tasks are kept for backward compatibility. + # No explicit routing needed — all go to default "celery" queue. diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index fdc4e81315..16668a919e 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -46,7 +46,7 @@ declare -A PLUGGABLE_WORKERS=() # Worker queue mappings declare -A WORKER_QUEUES=( ["api_deployment"]="celery_api_deployments" - ["general"]="celery,celery_prompt_studio" + ["general"]="celery" ["file_processing"]="file_processing,api_file_processing" ["callback"]="file_processing_callback,api_file_processing_callback" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" From e1da202db4e0c99ead40312bfea6b72b7f609556 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 20:23:13 +0530 Subject: [PATCH 15/64] Removed redundant envs --- docker/dockerfiles/worker-unified.Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/dockerfiles/worker-unified.Dockerfile b/docker/dockerfiles/worker-unified.Dockerfile index 4b7e88198a..5365a0a863 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile +++ b/docker/dockerfiles/worker-unified.Dockerfile @@ -83,14 +83,16 @@ RUN uv sync --group deploy --locked && \ touch requirements.txt && \ { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } -# Install executor plugins from prompt-service (cloud-only, no-op for OSS). +# Install executor plugins from workers/plugins/ (cloud-only, no-op for OSS). # Each plugin with an "unstract.executor.executors" entry point gets installed # so that importlib.metadata.entry_points() can discover it at worker startup. -RUN for plugin_dir in /unstract/prompt-service/src/unstract/prompt_service/plugins/*/; do \ +# Editable installs (-e) ensure Path(__file__) resolves to the source directory, +# giving plugins access to non-Python assets (.md prompts, .txt templates, etc.). +RUN for plugin_dir in /app/plugins/*/; do \ if [ -f "$plugin_dir/pyproject.toml" ] && \ grep -q 'unstract.executor.executors' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ echo "Installing executor plugin: $(basename $plugin_dir)" && \ - uv pip install "$plugin_dir" || true; \ + uv pip install -e "$plugin_dir" || true; \ fi; \ done From d119797b30aeb346cfed334235f1bdc377a659f2 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 20:40:19 +0530 Subject: [PATCH 16/64] Removed redundant envs --- backend/prompt_studio/prompt_studio_core_v2/views.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index e208ca481d..2a9850db3e 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -396,12 +396,12 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: on_success=signature( "ide_index_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), on_error=signature( "ide_index_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), task_id=executor_task_id, ) @@ -477,12 +477,12 @@ def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: on_success=signature( "ide_prompt_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), on_error=signature( "ide_prompt_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), task_id=executor_task_id, ) @@ -553,12 +553,12 @@ def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: on_success=signature( "ide_prompt_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), on_error=signature( "ide_prompt_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery_prompt_studio", + queue="celery", ), task_id=executor_task_id, ) From fbadbf894dbce84383dea67124d839dd637d5216 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 3 Mar 2026 20:46:58 +0530 Subject: [PATCH 17/64] Removed redundant envs --- workers/shared/workflow/execution/service.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workers/shared/workflow/execution/service.py b/workers/shared/workflow/execution/service.py index e4250ec881..f605c02014 100644 --- a/workers/shared/workflow/execution/service.py +++ b/workers/shared/workflow/execution/service.py @@ -1037,13 +1037,18 @@ def _execute_structure_tool_workflow( except Exception as e: logger.warning(f"Could not read workflow metadata: {e}") + # Get API key from the same source used to create execution_service + platform_api_key = self._get_platform_service_api_key( + execution_service.organization_id + ) + params = { "organization_id": execution_service.organization_id, "workflow_id": execution_service.workflow_id, "execution_id": execution_service.execution_id, "file_execution_id": execution_service.file_execution_id, "tool_instance_metadata": tool_instance.metadata, - "platform_service_api_key": execution_service.platform_service_api_key, + "platform_service_api_key": platform_api_key, "input_file_path": str(file_handler.infile), "output_dir_path": str(file_handler.execution_dir), "source_file_name": str( From 882296e39908fefa8290a66451a5dce1711135da Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 4 Mar 2026 14:38:08 +0530 Subject: [PATCH 18/64] Removed redundant envs --- .../file_processing/structure_tool_task.py | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 0ed3d1f9d7..0c6fed3793 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -18,6 +18,7 @@ import json import logging import os +import time from pathlib import Path from typing import Any @@ -388,7 +389,10 @@ def _execute_structure_tool_impl(params: dict) -> dict: "summarize_params": summarize_params, }, ) + pipeline_start = time.monotonic() pipeline_result = dispatcher.dispatch(pipeline_ctx, timeout=EXECUTOR_TIMEOUT) + pipeline_elapsed = time.monotonic() - pipeline_start + if not pipeline_result.success: return pipeline_result.to_dict() @@ -400,12 +404,20 @@ def _execute_structure_tool_impl(params: dict) -> dict: output_path = Path(output_dir_path) / f"{Path(source_file_name).stem}.json" logger.info("Writing output to %s", output_path) fs.json_dump(path=output_path, data=structured_output) + + # Overwrite INFILE with JSON output (matches Docker-based tool behavior). + # The destination connector reads from INFILE and checks MIME type — + # if we don't overwrite it, INFILE still has the original PDF. + logger.info("Overwriting INFILE with structured output: %s", input_file_path) + fs.json_dump(path=input_file_path, data=structured_output) + logger.info("Output written successfully to workflow storage") except (OSError, json.JSONDecodeError) as e: return ExecutionResult.failure(error=f"Error writing output file: {e}").to_dict() - # Write tool result to METADATA.json - _write_tool_result(fs, execution_data_dir, structured_output) + # Write tool result + tool_metadata to METADATA.json + # (destination connector reads output_type from tool_metadata) + _write_tool_result(fs, execution_data_dir, structured_output, pipeline_elapsed) return ExecutionResult(success=True, data=structured_output).to_dict() @@ -542,8 +554,17 @@ def _run_agentic_extraction( return agentic_result.to_dict() -def _write_tool_result(fs: Any, execution_data_dir: str, data: dict) -> None: - """Write tool result to METADATA.json (matches BaseTool.write_tool_result).""" +def _write_tool_result( + fs: Any, execution_data_dir: str, data: dict, elapsed_time: float = 0.0 +) -> None: + """Write tool result and tool_metadata to METADATA.json. + + Matches BaseTool._update_exec_metadata() + write_tool_result(): + - tool_metadata: list of dicts with tool_name, output_type, elapsed_time + (destination connector reads output_type from here) + - total_elapsed_time: cumulative elapsed time + - tool_result: the structured output data + """ try: metadata_path = Path(execution_data_dir) / "METADATA.json" @@ -557,6 +578,22 @@ def _write_tool_result(fs: Any, execution_data_dir: str, data: dict) -> None: except Exception: pass + # Add tool_metadata (matches BaseTool._update_exec_metadata) + # The destination connector reads output_type from tool_metadata[-1] + tool_meta_entry = { + "tool_name": "structure", + "output_type": "JSON", + "elapsed_time": elapsed_time, + } + if "tool_metadata" not in existing: + existing["tool_metadata"] = [tool_meta_entry] + else: + existing["tool_metadata"].append(tool_meta_entry) + + existing["total_elapsed_time"] = existing.get( + "total_elapsed_time", 0.0 + ) + elapsed_time + # Add tool result existing["tool_result"] = data fs.write( From 6d3bbbf2206538f4703146817c88a8f508f669ec Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 4 Mar 2026 14:38:29 +0530 Subject: [PATCH 19/64] Removed redundant envs --- workers/tests/test_sanity_phase3.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index 5a40ba98be..30f88565a4 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -198,7 +198,8 @@ def test_structure_tool_single_dispatch( assert result["success"] is True assert result["data"]["output"]["field_a"] == "$1M" assert result["data"]["metadata"]["file_name"] == "test.pdf" - mock_fs.json_dump.assert_called_once() + # json_dump called twice: output file + INFILE overwrite + assert mock_fs.json_dump.call_count == 2 # Single dispatch with structure_pipeline assert dispatcher_instance.dispatch.call_count == 1 @@ -658,15 +659,26 @@ def test_structure_tool_output_written( assert result["success"] is True - # Check json_dump was called with correct path - json_dump_call = mock_fs.json_dump.call_args - output_path = json_dump_call.kwargs.get( - "path", json_dump_call[1].get("path") if len(json_dump_call) > 1 else None + # json_dump called twice: once for output file, once for INFILE overwrite + assert mock_fs.json_dump.call_count == 2 + + # First call: output file (execution_dir/{stem}.json) + first_call = mock_fs.json_dump.call_args_list[0] + first_path = first_call.kwargs.get( + "path", first_call[1].get("path") if len(first_call) > 1 else None ) - if output_path is None: - output_path = json_dump_call[0][0] if json_dump_call[0] else None + if first_path is None: + first_path = first_call[0][0] if first_call[0] else None + assert str(first_path).endswith("test.json") - assert str(output_path).endswith("test.json") + # Second call: INFILE overwrite (so destination connector reads JSON, not PDF) + second_call = mock_fs.json_dump.call_args_list[1] + second_path = second_call.kwargs.get( + "path", second_call[1].get("path") if len(second_call) > 1 else None + ) + if second_path is None: + second_path = second_call[0][0] if second_call[0] else None + assert str(second_path) == base_params["input_file_path"] class TestStructureToolMetadataFileName: From 292460bd7811adab1c83c655801276ae20b772c3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Mar 2026 09:08:41 +0000 Subject: [PATCH 20/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/file_processing/structure_tool_task.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 0c6fed3793..eb75088a5f 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -590,9 +590,9 @@ def _write_tool_result( else: existing["tool_metadata"].append(tool_meta_entry) - existing["total_elapsed_time"] = existing.get( - "total_elapsed_time", 0.0 - ) + elapsed_time + existing["total_elapsed_time"] = ( + existing.get("total_elapsed_time", 0.0) + elapsed_time + ) # Add tool result existing["tool_result"] = data From f35c0e610234e5dc05d7c49346f6908570bee125 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 4 Mar 2026 14:39:38 +0530 Subject: [PATCH 21/64] Removed redundant envs --- workers/executor/tasks.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 759a463d67..32928b40a5 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -108,4 +108,12 @@ def execute_extraction(self, execution_context_dict: dict) -> dict: context.request_id, result.success, ) - return result.to_dict() + + # Strip sensitive/bulky fields before returning via Celery result + # backend. Celery's trace logger prints the full return value, so + # customer document content must not appear in logs. + result_dict = result.to_dict() + metadata = result_dict.get("data", {}).get("metadata", {}) + metadata.pop("extracted_text", None) + metadata.pop("context", None) + return result_dict From 0cbd10a6bcab1e80c476026e4b61c8f47a53a4ea Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 4 Mar 2026 21:19:10 +0530 Subject: [PATCH 22/64] adding worker for callbacks --- backend/backend/celery_config.py | 8 +++---- .../prompt_studio_core_v2/tasks.py | 9 ++++++- .../prompt_studio_core_v2/views.py | 12 +++++----- docker/docker-compose.yaml | 24 +++++++++++++++++++ docker/sample.compose.override.yaml | 17 +++++++++++++ 5 files changed, 59 insertions(+), 11 deletions(-) diff --git a/backend/backend/celery_config.py b/backend/backend/celery_config.py index 5bac942d10..563fe9126f 100644 --- a/backend/backend/celery_config.py +++ b/backend/backend/celery_config.py @@ -32,7 +32,7 @@ class CeleryConfig: task_acks_late = True - # Prompt Studio IDE tasks run on the default "celery" queue. - # The callback tasks (ide_*) are sub-second ORM writes + Socket.IO - # emits. The legacy tasks are kept for backward compatibility. - # No explicit routing needed — all go to default "celery" queue. + # Prompt Studio IDE callback tasks (ide_index_complete, ide_prompt_complete, etc.) + # run on the "prompt_studio_callback" queue, processed by a dedicated Django + # backend Celery worker (worker-prompt-studio-callback in docker-compose). + # These are sub-second ORM writes + Socket.IO emits after executor completion. diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index 1f9d917359..be0715b74c 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -97,7 +97,7 @@ def _emit_error( # # These are lightweight callbacks invoked by Celery `link` / `link_error` # after the executor worker finishes. They run on the backend -# (celery_prompt_studio queue) and do only post-ORM writes + socket +# (prompt_studio_callback queue) and do only post-ORM writes + socket # emission — no heavy computation. # ------------------------------------------------------------------ @@ -171,6 +171,13 @@ def ide_index_complete( return result except Exception as e: logger.exception("ide_index_complete callback failed") + # Clear the indexing flag so subsequent requests are not blocked + try: + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + except Exception: + logger.exception("Failed to clear indexing flag for %s", doc_id_key) _emit_error( log_events_id, executor_task_id, diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 2a9850db3e..5cfa30b30e 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -396,12 +396,12 @@ def index_document(self, request: HttpRequest, pk: Any = None) -> Response: on_success=signature( "ide_index_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), on_error=signature( "ide_index_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), task_id=executor_task_id, ) @@ -477,12 +477,12 @@ def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: on_success=signature( "ide_prompt_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), on_error=signature( "ide_prompt_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), task_id=executor_task_id, ) @@ -553,12 +553,12 @@ def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: on_success=signature( "ide_prompt_complete", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), on_error=signature( "ide_prompt_error", kwargs={"callback_kwargs": cb_kwargs}, - queue="celery", + queue="prompt_studio_callback", ), task_id=executor_task_id, ) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index dfe0fad479..1318d515d6 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -57,6 +57,30 @@ services: labels: - traefik.enable=false + # Celery worker for Prompt Studio IDE callbacks. + # Processes post-execution ORM updates and Socket.IO notifications + # after executor workers complete tasks (ide_index_complete, ide_prompt_complete, etc.). + worker-prompt-studio-callback: + image: unstract/backend:${VERSION} + container_name: unstract-worker-prompt-studio-callback + restart: unless-stopped + entrypoint: .venv/bin/celery + command: "-A backend worker --loglevel=info -Q prompt_studio_callback --autoscale=${WORKER_PROMPT_STUDIO_CALLBACK_AUTOSCALE:-4,1}" + env_file: + - ../backend/.env + - ./essentials.env + depends_on: + - db + - redis + - rabbitmq + environment: + - ENVIRONMENT=development + - APPLICATION_NAME=unstract-worker-prompt-studio-callback + labels: + - traefik.enable=false + volumes: + - prompt_studio_data:/app/prompt-studio-data + # Celery Flower celery-flower: image: unstract/backend:${VERSION} diff --git a/docker/sample.compose.override.yaml b/docker/sample.compose.override.yaml index eeb728c822..32f5d3573d 100644 --- a/docker/sample.compose.override.yaml +++ b/docker/sample.compose.override.yaml @@ -319,6 +319,23 @@ services: - action: rebuild path: ../workers/uv.lock + ######################################################################################################### + # Prompt Studio callback worker (Django backend, processes prompt_studio_callback queue) + worker-prompt-studio-callback: + build: + dockerfile: docker/dockerfiles/backend.Dockerfile + context: .. + develop: + watch: + - action: sync+restart + path: ../backend/ + target: /app + ignore: [.venv/, __pycache__/, "*.pyc", .pytest_cache/, .mypy_cache/] + - action: sync+restart + path: ../unstract/ + target: /unstract + ignore: [.venv/, __pycache__/, "*.pyc", .pytest_cache/, .mypy_cache/] + # V1 workers disabled by default (use workers-v2 profile instead) worker: profiles: From 2b1ab1e4982ba01cf5e45ee2f90d07260d378d45 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 5 Mar 2026 16:27:45 +0530 Subject: [PATCH 23/64] adding worker for callbacks --- docker/dockerfiles/worker-unified.Dockerfile | 7 +++-- workers/executor/executors/answer_prompt.py | 2 +- workers/executor/executors/legacy_executor.py | 26 ++++++++++++++++- .../executor/executors/plugins/protocols.py | 28 +++++++++++++------ workers/executor/tasks.py | 10 +++++-- workers/tests/test_sanity_phase6c.py | 4 +-- 6 files changed, 58 insertions(+), 19 deletions(-) diff --git a/docker/dockerfiles/worker-unified.Dockerfile b/docker/dockerfiles/worker-unified.Dockerfile index 5365a0a863..0ea425b623 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile +++ b/docker/dockerfiles/worker-unified.Dockerfile @@ -84,13 +84,14 @@ RUN uv sync --group deploy --locked && \ { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } # Install executor plugins from workers/plugins/ (cloud-only, no-op for OSS). -# Each plugin with an "unstract.executor.executors" entry point gets installed -# so that importlib.metadata.entry_points() can discover it at worker startup. +# Plugins register via setuptools entry points in two groups: +# - unstract.executor.executors (executor classes, e.g. table_extractor) +# - unstract.executor.plugins (utility plugins, e.g. highlight-data, challenge) # Editable installs (-e) ensure Path(__file__) resolves to the source directory, # giving plugins access to non-Python assets (.md prompts, .txt templates, etc.). RUN for plugin_dir in /app/plugins/*/; do \ if [ -f "$plugin_dir/pyproject.toml" ] && \ - grep -q 'unstract.executor.executors' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ + grep -qE 'unstract\.executor\.(executors|plugins)' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ echo "Installing executor plugin: $(basename $plugin_dir)" && \ uv pip install -e "$plugin_dir" || true; \ fi; \ diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py index 925cf95e5e..0cb1635332 100644 --- a/workers/executor/executors/answer_prompt.py +++ b/workers/executor/executors/answer_prompt.py @@ -92,7 +92,7 @@ def extract_variable( ) if promptx != output[PSKeys.PROMPT]: - logger.info("Prompt after variable replacement: %s", promptx) + logger.debug("Prompt modified by variable replacement for: %s", output.get(PSKeys.NAME, "")) return promptx @staticmethod diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 18ddbe0490..324878970f 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -979,6 +979,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: # ---- Initialize highlight plugin (if enabled + installed) ---------- process_text_fn = None enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) + enable_word_confidence = tool_settings.get(PSKeys.ENABLE_WORD_CONFIDENCE, False) if enable_highlight: from executor.executors.plugins import ExecutorPluginLoader @@ -990,7 +991,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: highlight_instance = highlight_cls( file_path=file_path, fs_instance=fs_instance, - execution_source=execution_source, + enable_word_confidence=enable_word_confidence, ) process_text_fn = highlight_instance.run logger.info( @@ -998,6 +999,29 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: doc_name, ) + # ---- Merge tool_settings as defaults into each prompt output -------- + # Single-pass payloads carry adapter IDs and chunk config in + # tool_settings only (not per-prompt), while answer_prompt payloads + # carry them per-prompt. Merging tool_settings as a base ensures + # both paths work. + _ts_defaults = { + k: v + for k, v in tool_settings.items() + if k + in { + PSKeys.CHUNK_SIZE, + PSKeys.CHUNK_OVERLAP, + PSKeys.LLM, + PSKeys.VECTOR_DB, + PSKeys.EMBEDDING, + PSKeys.X2TEXT_ADAPTER, + PSKeys.RETRIEVAL_STRATEGY, + PSKeys.SIMILARITY_TOP_K, + } + } + if _ts_defaults: + prompts = [{**_ts_defaults, **p} for p in prompts] + # ---- First pass: collect variable names + required fields ---------- for output in prompts: variable_names.append(output[PSKeys.NAME]) diff --git a/workers/executor/executors/plugins/protocols.py b/workers/executor/executors/plugins/protocols.py index 4a8ebd7d5f..bdb656a206 100644 --- a/workers/executor/executors/plugins/protocols.py +++ b/workers/executor/executors/plugins/protocols.py @@ -10,23 +10,33 @@ @runtime_checkable class HighlightDataProtocol(Protocol): - """Cross-cutting: source attribution from LLMWhisperer metadata.""" + """Cross-cutting: source attribution from LLMWhisperer metadata. + + Matches the cloud ``HighlightData`` plugin constructor which + accepts ``enable_word_confidence`` (not ``execution_source``). + The filesystem instance is determined by the caller and passed in. + """ def __init__( self, file_path: str, - fs_instance: Any, - execution_source: str = "", + fs_instance: Any = None, + enable_word_confidence: bool = False, **kwargs: Any, ) -> None: ... - def run(self, response: str, is_json: bool = False, **kwargs: Any) -> dict: ... - - def get_highlight_data(self) -> Any: ... - - def get_confidence_data(self) -> Any: ... + def run( + self, + response: Any = None, + is_json: bool = False, + original_text: str = "", + **kwargs: Any, + ) -> dict: ... - def extract_word_confidence(self, **kwargs: Any) -> dict: ... + @staticmethod + def extract_word_confidence( + original_text: str, is_json: bool = False + ) -> dict: ... @runtime_checkable diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 32928b40a5..88cf68b15b 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -110,10 +110,14 @@ def execute_extraction(self, execution_context_dict: dict) -> dict: ) # Strip sensitive/bulky fields before returning via Celery result - # backend. Celery's trace logger prints the full return value, so - # customer document content must not appear in logs. + # backend. The trace logger is already suppressed (worker.py sets + # celery.app.trace to WARNING), but we still remove raw extracted + # text to avoid bloating the result backend storage. + # + # NOTE: Do NOT strip "context" or "highlight_data" — the backend + # (PromptStudioHelper / OutputManagerHelper) reads these from the + # result to persist in the database and return to the IDE. result_dict = result.to_dict() metadata = result_dict.get("data", {}).get("metadata", {}) metadata.pop("extracted_text", None) - metadata.pop("context", None) return result_dict diff --git a/workers/tests/test_sanity_phase6c.py b/workers/tests/test_sanity_phase6c.py index 32dbb2bb39..54388f6fee 100644 --- a/workers/tests/test_sanity_phase6c.py +++ b/workers/tests/test_sanity_phase6c.py @@ -316,11 +316,11 @@ def test_highlight_plugin_initialized_when_enabled( result = executor._handle_answer_prompt(ctx) assert result.success - # Verify highlight plugin was instantiated + # Verify highlight plugin was instantiated with correct args mock_highlight_cls.assert_called_once_with( file_path="/data/doc.txt", fs_instance=mock_fs, - execution_source="ide", + enable_word_confidence=False, ) # Verify process_text was the highlight instance's run method llm_complete_call = mock_llm.complete.call_args From 4122f08512bda0afbf1fcb411edaf7235d780c6b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Mar 2026 10:59:18 +0000 Subject: [PATCH 24/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/executor/executors/answer_prompt.py | 5 ++++- workers/executor/executors/plugins/protocols.py | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py index 0cb1635332..859f205bba 100644 --- a/workers/executor/executors/answer_prompt.py +++ b/workers/executor/executors/answer_prompt.py @@ -92,7 +92,10 @@ def extract_variable( ) if promptx != output[PSKeys.PROMPT]: - logger.debug("Prompt modified by variable replacement for: %s", output.get(PSKeys.NAME, "")) + logger.debug( + "Prompt modified by variable replacement for: %s", + output.get(PSKeys.NAME, ""), + ) return promptx @staticmethod diff --git a/workers/executor/executors/plugins/protocols.py b/workers/executor/executors/plugins/protocols.py index bdb656a206..fb4d676b37 100644 --- a/workers/executor/executors/plugins/protocols.py +++ b/workers/executor/executors/plugins/protocols.py @@ -34,9 +34,7 @@ def run( ) -> dict: ... @staticmethod - def extract_word_confidence( - original_text: str, is_json: bool = False - ) -> dict: ... + def extract_word_confidence(original_text: str, is_json: bool = False) -> dict: ... @runtime_checkable From 1ceb352da07388eb2686f7592242a741a89070f6 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 5 Mar 2026 19:25:34 +0530 Subject: [PATCH 25/64] adding worker for callbacks --- .../prompt_studio_helper.py | 29 +++++++++---------- .../prompt_studio_core_v2/views.py | 17 ++++++++++- workers/executor/executors/legacy_executor.py | 6 ++++ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index f11231415f..fcc146b347 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -546,14 +546,6 @@ def build_fetch_response_payload( tool=util, ) - if DocumentIndexingService.is_document_indexing( - org_id=org_id, user_id=user_id, doc_id_key=doc_id - ): - return None, { - "status": IndexingStatus.PENDING_STATUS.value, - "message": IndexingStatus.DOCUMENT_BEING_INDEXED.value, - } - # Extract (blocking, usually cached) extracted_text = PromptStudioHelper.dynamic_extractor( profile_manager=profile_manager, @@ -794,6 +786,9 @@ def build_single_pass_payload( settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" ), TSPKeys.SUMMARIZE_AS_SOURCE: tool.summarize_as_source, + TSPKeys.RETRIEVAL_STRATEGY: default_profile.retrieval_strategy + or TSPKeys.SIMPLE, + TSPKeys.SIMILARITY_TOP_K: default_profile.similarity_top_k, } for p in prompts: @@ -1450,13 +1445,6 @@ def _fetch_response( fs=fs_instance, tool=util, ) - if DocumentIndexingService.is_document_indexing( - org_id=org_id, user_id=user_id, doc_id_key=doc_id - ): - return { - "status": IndexingStatus.PENDING_STATUS.value, - "output": IndexingStatus.DOCUMENT_BEING_INDEXED.value, - } logger.info(f"Extracting text from {file_path} for {doc_id}") extracted_text = PromptStudioHelper.dynamic_extractor( profile_manager=profile_manager, @@ -1772,6 +1760,13 @@ def dynamic_indexer( ) return {"status": IndexingStatus.COMPLETED_STATUS.value, "output": doc_id} except (IndexingError, IndexingAPIError, SdkError) as e: + # Clear the indexing flag so subsequent requests are not blocked + try: + DocumentIndexingService.remove_document_indexing( + org_id=org_id, user_id=user_id, doc_id_key=doc_id_key + ) + except Exception: + logger.exception("Failed to clear indexing flag for %s", doc_id_key) msg = str(e) if isinstance(e, SdkError) and hasattr(e.actual_err, "response"): msg = e.actual_err.response.json().get("error", str(e)) @@ -1862,6 +1857,10 @@ def _fetch_single_pass_response( settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), "" ) tool_settings[TSPKeys.SUMMARIZE_AS_SOURCE] = tool.summarize_as_source + tool_settings[TSPKeys.RETRIEVAL_STRATEGY] = ( + default_profile.retrieval_strategy or TSPKeys.SIMPLE + ) + tool_settings[TSPKeys.SIMILARITY_TOP_K] = default_profile.similarity_top_k for prompt in prompts: if not prompt.prompt: raise EmptyPromptError() diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 5cfa30b30e..b28d9159cf 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -524,12 +524,27 @@ def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: document: DocumentManager = DocumentManager.objects.get(pk=document_id) doc_path = str(Path(doc_path) / document.document_name) - # Fetch all active prompts + # Fetch prompts eligible for single-pass extraction. + # Mirrors the filtering in _execute_prompts_in_single_pass: + # only active, non-NOTES, non-TABLE/RECORD prompts. prompts = list( ToolStudioPrompt.objects.filter(tool_id=custom_tool.tool_id).order_by( "sequence_number" ) ) + prompts = [ + p + for p in prompts + if p.prompt_type != ToolStudioPromptKeys.NOTES + and p.active + and p.enforce_type != ToolStudioPromptKeys.TABLE + and p.enforce_type != ToolStudioPromptKeys.RECORD + ] + if not prompts: + return Response( + {"error": "No active prompts found for single pass extraction."}, + status=status.HTTP_400_BAD_REQUEST, + ) context, cb_kwargs = PromptStudioHelper.build_single_pass_payload( tool=custom_tool, diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 324878970f..4e078e43bd 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -998,6 +998,12 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: "Highlight plugin initialized for file=%s", doc_name, ) + else: + logger.warning( + "Highlight is enabled but highlight-data plugin is not " + "installed. Coordinates will not be produced. Install " + "the plugin via: pip install -e " + ) # ---- Merge tool_settings as defaults into each prompt output -------- # Single-pass payloads carry adapter IDs and chunk config in From 7c1266bc09500ddec3fd561d1a462bbe913c66e5 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 5 Mar 2026 20:10:37 +0530 Subject: [PATCH 26/64] adding worker for callbacks --- .../file_processing/structure_tool_task.py | 2 +- workers/tests/test_sanity_phase4.py | 35 +++++++++++----- workers/tests/test_sanity_phase6a.py | 42 +++++++++++++++---- workers/tests/test_sanity_phase6j.py | 4 +- 4 files changed, 63 insertions(+), 20 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index eb75088a5f..32b3a6bcbc 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -581,7 +581,7 @@ def _write_tool_result( # Add tool_metadata (matches BaseTool._update_exec_metadata) # The destination connector reads output_type from tool_metadata[-1] tool_meta_entry = { - "tool_name": "structure", + "tool_name": "structure_tool", "output_type": "JSON", "elapsed_time": elapsed_time, } diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py index f57ef0af28..2d5e72715c 100644 --- a/workers/tests/test_sanity_phase4.py +++ b/workers/tests/test_sanity_phase4.py @@ -47,6 +47,9 @@ _PATCH_INDEX_UTILS = ( "unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key" ) +_PATCH_PLUGIN_LOADER = ( + "executor.executors.plugins.loader.ExecutorPluginLoader.get" +) # --------------------------------------------------------------------------- # Fixtures @@ -497,10 +500,13 @@ def test_ide_index_failure(self, mock_deps, mock_get_fs, eager_app): class TestIDEAnswerPrompt: """IDE answer_prompt payload → executor → {output, metadata, metrics}.""" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) - def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, _mock_idx, eager_app): + def test_ide_answer_prompt_text( + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app + ): """IDE text prompt → output dict with prompt_key → answer.""" llm = _mock_llm("INV-2024-001") mock_deps.return_value = _mock_prompt_deps(llm) @@ -517,11 +523,12 @@ def test_ide_answer_prompt_text(self, mock_shim_cls, mock_deps, _mock_idx, eager assert "metrics" in result.data assert result.data["output"]["invoice_number"] == "INV-2024-001" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_metadata_has_run_id( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """IDE response metadata contains run_id and file_name.""" llm = _mock_llm("answer") @@ -536,11 +543,12 @@ def test_ide_answer_prompt_metadata_has_run_id( assert metadata["run_id"] == "run-ide-ap" assert metadata["file_name"] == "invoice.pdf" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_with_eval_settings( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """Prompt with eval_settings passes through to executor cleanly.""" llm = _mock_llm("answer") @@ -560,11 +568,12 @@ def test_ide_answer_prompt_with_eval_settings( assert result.success is True + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_platform_key_reaches_shim( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """PLATFORM_SERVICE_API_KEY in payload reaches ExecutorToolShim.""" llm = _mock_llm("answer") @@ -579,11 +588,12 @@ def test_ide_answer_prompt_platform_key_reaches_shim( call_kwargs = mock_shim_cls.call_args assert call_kwargs.kwargs.get("platform_api_key") == "pk-ide-test" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_answer_prompt_webhook_settings( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """Prompt with webhook settings passes through cleanly.""" llm = _mock_llm("answer") @@ -604,11 +614,12 @@ def test_ide_answer_prompt_webhook_settings( class TestIDESinglePass: """IDE single_pass_extraction → executor → same shape as answer_prompt.""" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_single_pass_multi_prompt( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """Single pass with multiple prompts → all fields in output.""" llm = _mock_llm("single pass value") @@ -624,11 +635,12 @@ def test_ide_single_pass_multi_prompt( assert "revenue" in result.data["output"] assert "date" in result.data["output"] + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_single_pass_has_metadata( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """Single pass returns metadata with run_id.""" llm = _mock_llm("value") @@ -688,11 +700,12 @@ def test_dispatcher_extract_round_trip( assert result.success is True assert result.data["extracted_text"] == "dispatcher extracted" + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_dispatcher_answer_prompt_round_trip( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """ExecutionDispatcher.dispatch() → answer_prompt → ExecutionResult.""" llm = _mock_llm("dispatcher answer") @@ -711,11 +724,12 @@ def test_dispatcher_answer_prompt_round_trip( assert result.data["output"]["invoice_number"] == "dispatcher answer" assert "metadata" in result.data + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_dispatcher_single_pass_round_trip( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """ExecutionDispatcher.dispatch() → single_pass → ExecutionResult.""" llm = _mock_llm("sp dispatch") @@ -789,11 +803,12 @@ def test_ide_source_reaches_extract_handler( # This is verified by the fact that no dump_json was called # on the fs mock. In IDE mode, whisper_hash metadata is skipped. + @patch(_PATCH_PLUGIN_LOADER, return_value=None) @patch(_PATCH_INDEX_UTILS, return_value="doc-id-ide") @patch(_PATCH_PROMPT_DEPS) @patch(_PATCH_SHIM) def test_ide_source_in_answer_prompt_enables_variable_replacement( - self, mock_shim_cls, mock_deps, _mock_idx, eager_app + self, mock_shim_cls, mock_deps, _mock_idx, _mock_plugin, eager_app ): """execution_source='ide' in payload sets is_ide=True for variable replacement.""" llm = _mock_llm("var answer") diff --git a/workers/tests/test_sanity_phase6a.py b/workers/tests/test_sanity_phase6a.py index eb7e2c6c2d..d35833fc2c 100644 --- a/workers/tests/test_sanity_phase6a.py +++ b/workers/tests/test_sanity_phase6a.py @@ -32,26 +32,50 @@ def _reset_plugin_loader(): class TestPluginLoaderNoPlugins: - """When no cloud plugins are installed, loader returns None / empty.""" + """When no cloud plugins are installed, loader returns None / empty. - def test_get_returns_none_for_unknown_plugin(self): + Mocks entry_points to simulate a clean OSS environment where + no cloud executor plugins are pip-installed. + """ + + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_unknown_plugin(self, _mock_eps): result = ExecutorPluginLoader.get("nonexistent-plugin") assert result is None - def test_get_returns_none_for_highlight_data(self): + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_highlight_data(self, _mock_eps): """highlight-data is a cloud plugin, not installed in OSS.""" result = ExecutorPluginLoader.get("highlight-data") assert result is None - def test_get_returns_none_for_challenge(self): + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_challenge(self, _mock_eps): result = ExecutorPluginLoader.get("challenge") assert result is None - def test_get_returns_none_for_evaluation(self): + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_get_returns_none_for_evaluation(self, _mock_eps): result = ExecutorPluginLoader.get("evaluation") assert result is None - def test_discover_executors_returns_empty(self): + @patch( + "importlib.metadata.entry_points", + return_value=[], + ) + def test_discover_executors_returns_empty(self, _mock_eps): discovered = ExecutorPluginLoader.discover_executors() assert discovered == [] @@ -60,7 +84,8 @@ def test_discover_executors_returns_empty(self): class TestPluginLoaderClear: - def test_clear_resets_plugins(self): + @patch("importlib.metadata.entry_points", return_value=[]) + def test_clear_resets_plugins(self, _mock_eps): # Force discovery (caches empty dict) ExecutorPluginLoader.get("anything") assert ExecutorPluginLoader._plugins is not None @@ -68,7 +93,8 @@ def test_clear_resets_plugins(self): ExecutorPluginLoader.clear() assert ExecutorPluginLoader._plugins is None - def test_get_after_clear_re_discovers(self): + @patch("importlib.metadata.entry_points", return_value=[]) + def test_get_after_clear_re_discovers(self, _mock_eps): """After clear(), next get() re-runs discovery.""" ExecutorPluginLoader.get("x") assert ExecutorPluginLoader._plugins == {} diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py index 7e900652d9..2336b65d05 100644 --- a/workers/tests/test_sanity_phase6j.py +++ b/workers/tests/test_sanity_phase6j.py @@ -373,10 +373,12 @@ def test_unregistered_executor_returns_failure(self, eager_app): # --------------------------------------------------------------------------- class TestCrossCuttingHighlight: - def test_highlight_plugin_not_installed_no_error(self): + @patch("importlib.metadata.entry_points", return_value=[]) + def test_highlight_plugin_not_installed_no_error(self, _mock_eps): """When highlight plugin not installed, extraction still works.""" from executor.executors.plugins.loader import ExecutorPluginLoader + ExecutorPluginLoader.clear() assert ExecutorPluginLoader.get("highlight-data") is None # No error — graceful degradation From 0b84d9e430503fb505083b5b729f040e80d64b49 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 5 Mar 2026 21:59:54 +0530 Subject: [PATCH 27/64] adding worker for callbacks --- backend/workflow_manager/workflow_v2/dto.py | 4 +- workers/executor/executors/legacy_executor.py | 84 +++++++++++++++---- workers/tests/test_sanity_phase6d.py | 46 +++++++++- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index b2398e883e..aa6d6039a5 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -61,11 +61,11 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: for item in self.result: if not isinstance(item, dict): - break + continue result = item.get("result") if not isinstance(result, dict): - break + continue self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 4e078e43bd..db4c77dba9 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1100,6 +1100,73 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: file_path=file_path, ) + # TABLE/RECORD: delegate to TableExtractorExecutor in-process. + # The table executor plugin handles PDF table detection, + # header extraction, and CSV-to-JSON post-processing. + if output.get(PSKeys.TYPE) in (PSKeys.TABLE, PSKeys.RECORD): + from unstract.sdk1.execution.registry import ExecutorRegistry + + try: + table_executor = ExecutorRegistry.get("table") + except KeyError: + raise LegacyExecutorError( + message=( + "TABLE extraction requires the table executor " + "plugin. Install the table_extractor plugin." + ) + ) + + table_ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id=run_id, + execution_source=execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + executor_params={ + "llm_adapter_instance_id": output.get(PSKeys.LLM, ""), + "table_settings": output.get(PSKeys.TABLE_SETTINGS, {}), + "prompt": output.get(PSKeys.PROMPT, ""), + "PLATFORM_SERVICE_API_KEY": platform_api_key, + "execution_id": execution_id, + "tool_id": tool_id, + "file_name": doc_name, + }, + ) + table_ctx._log_component = self._log_component + table_ctx.log_events_id = self._log_events_id + + shim.stream_log(f"Running table extraction for: {prompt_name}") + table_result = table_executor.execute(table_ctx) + + if table_result.success: + structured_output[prompt_name] = table_result.data.get( + "output", "" + ) + table_metrics = ( + table_result.data.get("metadata", {}).get("metrics", {}) + ) + metrics.setdefault(prompt_name, {}).update( + {"table_extraction": table_metrics} + ) + logger.info( + "TABLE extraction completed: prompt=%s", prompt_name + ) + else: + structured_output[prompt_name] = "" + logger.error( + "TABLE extraction failed for prompt=%s: %s", + prompt_name, + table_result.error, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") + continue + + if output.get(PSKeys.TYPE) == PSKeys.LINE_ITEM: + raise LegacyExecutorError( + message="LINE_ITEM extraction is not supported." + ) + # Create adapters try: usage_kwargs = { @@ -1134,23 +1201,6 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: status_code = getattr(e, "status_code", None) or 500 raise LegacyExecutorError(message=msg, code=status_code) from e - # TABLE type is handled by TableExtractorExecutor (separate - # queue). LINE_ITEM is not supported. The backend dispatcher - # must route these types to the correct executor; if they - # reach LegacyExecutor it's a mis-route. - if output[PSKeys.TYPE] == PSKeys.TABLE: - raise LegacyExecutorError( - message=( - "TABLE extraction is handled by " - "TableExtractorExecutor. Route TABLE prompts " - "with executor_name='table'." - ) - ) - if output[PSKeys.TYPE] == PSKeys.LINE_ITEM: - raise LegacyExecutorError( - message="LINE_ITEM extraction is not supported." - ) - # ---- Retrieval + Answer ---------------------------------------- context_list: list[str] = [] try: diff --git a/workers/tests/test_sanity_phase6d.py b/workers/tests/test_sanity_phase6d.py index 7dee6226a8..91cc8cf72c 100644 --- a/workers/tests/test_sanity_phase6d.py +++ b/workers/tests/test_sanity_phase6d.py @@ -20,6 +20,7 @@ from executor.executors.answer_prompt import AnswerPromptService from executor.executors.constants import PromptServiceConstants as PSKeys from executor.executors.exceptions import LegacyExecutorError +from unstract.sdk1.execution.result import ExecutionResult # --------------------------------------------------------------------------- @@ -145,18 +146,57 @@ class TestTableLineItemGuard: @patch("executor.executors.legacy_executor.ExecutorToolShim") @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", return_value="doc-id-1") - def test_table_type_raises_with_routing_guidance( + def test_table_type_delegates_to_table_executor( self, mock_key, mock_shim_cls ): + """TABLE prompts are delegated to TableExtractorExecutor in-process.""" mock_shim_cls.return_value = MagicMock() executor = _get_executor() ctx = _make_context(output_type=PSKeys.TABLE) # "table" llm = _mock_llm() patches = _standard_patches(executor, llm) + mock_table_executor = MagicMock() + mock_table_executor.execute.return_value = ExecutionResult( + success=True, + data={"output": {"table_data": "extracted"}, "metadata": {"metrics": {}}}, + ) + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: - with pytest.raises(LegacyExecutorError, match="TableExtractorExecutor"): - executor._handle_answer_prompt(ctx) + with patch( + "unstract.sdk1.execution.registry.ExecutorRegistry.get", + return_value=mock_table_executor, + ): + result = executor._handle_answer_prompt(ctx) + + assert result.success + assert result.data["output"]["field1"] == {"table_data": "extracted"} + mock_table_executor.execute.assert_called_once() + # Verify the sub-context was built with table executor params + sub_ctx = mock_table_executor.execute.call_args[0][0] + assert sub_ctx.executor_name == "table" + assert sub_ctx.operation == "table_extract" + + @patch("executor.executors.legacy_executor.ExecutorToolShim") + @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", + return_value="doc-id-1") + def test_table_type_raises_when_plugin_missing( + self, mock_key, mock_shim_cls + ): + """TABLE prompts raise error when table executor plugin is not installed.""" + mock_shim_cls.return_value = MagicMock() + executor = _get_executor() + ctx = _make_context(output_type=PSKeys.TABLE) # "table" + llm = _mock_llm() + patches = _standard_patches(executor, llm) + + with patches["_get_prompt_deps"], patches["shim"], patches["index_key"]: + with patch( + "unstract.sdk1.execution.registry.ExecutorRegistry.get", + side_effect=KeyError("No executor registered with name 'table'"), + ): + with pytest.raises(LegacyExecutorError, match="table executor plugin"): + executor._handle_answer_prompt(ctx) @patch("executor.executors.legacy_executor.ExecutorToolShim") @patch("unstract.sdk1.utils.indexing.IndexingUtils.generate_index_key", From 5b0629d41d555e813f8d6d514a1bb71bf6e77a38 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Mar 2026 16:30:45 +0000 Subject: [PATCH 28/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/executor/executors/legacy_executor.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index db4c77dba9..728e1cf4ac 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1140,18 +1140,14 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: table_result = table_executor.execute(table_ctx) if table_result.success: - structured_output[prompt_name] = table_result.data.get( - "output", "" - ) - table_metrics = ( - table_result.data.get("metadata", {}).get("metrics", {}) + structured_output[prompt_name] = table_result.data.get("output", "") + table_metrics = table_result.data.get("metadata", {}).get( + "metrics", {} ) metrics.setdefault(prompt_name, {}).update( {"table_extraction": table_metrics} ) - logger.info( - "TABLE extraction completed: prompt=%s", prompt_name - ) + logger.info("TABLE extraction completed: prompt=%s", prompt_name) else: structured_output[prompt_name] = "" logger.error( From 98ee4b94adf33599cdc62e9f7302a593ce7c6eb5 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Fri, 6 Mar 2026 18:15:21 +0530 Subject: [PATCH 29/64] Pluggable apps and plugins to fit the new async prompt execution architecture --- backend/api_v2/api_deployment_views.py | 4 +- backend/api_v2/deployment_helper.py | 33 +++++++++- .../prompt_studio_core_v2/tasks.py | 33 +++++----- backend/usage_v2/helper.py | 62 +++++++++++++++++++ .../manage-docs-modal/ManageDocsModal.jsx | 8 ++- .../x2text/llm_whisperer_v2/src/helper.py | 8 ++- .../llm_whisperer_v2/src/llm_whisperer_v2.py | 4 ++ unstract/sdk1/src/unstract/sdk1/platform.py | 6 +- unstract/sdk1/src/unstract/sdk1/vector_db.py | 2 +- workers/executor/executor_tool_shim.py | 7 +++ workers/executor/executors/legacy_executor.py | 23 +++++++ workers/executor/worker.py | 7 +++ .../file_processing/structure_tool_task.py | 6 ++ workers/tests/test_phase1_log_streaming.py | 13 +++- 14 files changed, 188 insertions(+), 28 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index 7462864f04..b232fdfdc9 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -231,7 +231,9 @@ def get( if not enable_highlight: response.remove_result_metadata_keys(["highlight_data"]) response.remove_result_metadata_keys(["extracted_text"]) - if not include_metadata: + if include_metadata: + DeploymentHelper._enrich_result_with_usage_metadata(response) + else: response.remove_result_metadata_keys() if not include_metrics: response.remove_result_metrics() diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index bfbff58b7b..55c38ef4a1 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -273,7 +273,9 @@ def execute_workflow( if not enable_highlight: result.remove_result_metadata_keys(["highlight_data"]) result.remove_result_metadata_keys(["extracted_text"]) - if not include_metadata: + if include_metadata: + cls._enrich_result_with_usage_metadata(result) + else: result.remove_result_metadata_keys() if not include_metrics: result.remove_result_metrics() @@ -293,6 +295,35 @@ def execute_workflow( ) return APIExecutionResponseSerializer(result).data + @staticmethod + def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: + """Enrich each file result's metadata with per-model usage breakdown. + + Queries the Usage table for each file_execution_id and injects + cost arrays (extraction_llm, challenge_llm, embedding) into the + result metadata, matching the legacy prompt-service response format. + """ + if not isinstance(result.result, list): + return + + from usage_v2.helper import UsageHelper + + for item in result.result: + if not isinstance(item, dict): + continue + file_exec_id = item.get("file_execution_id") + if not file_exec_id: + continue + inner_result = item.get("result") + if not isinstance(inner_result, dict): + continue + metadata = inner_result.get("metadata") + if not isinstance(metadata, dict): + continue + usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) + if usage_by_model: + metadata.update(usage_by_model) + @staticmethod def get_execution_status(execution_id: str) -> ExecutionResponse: """Current status of api execution. diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index be0715b74c..1a4c0ca712 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -1,6 +1,7 @@ import json import logging import uuid +from datetime import date, datetime from typing import Any from account_v2.constants import Common @@ -14,23 +15,24 @@ PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result" -class _UUIDEncoder(json.JSONEncoder): - """JSON encoder that converts uuid.UUID objects to strings.""" +class _SafeEncoder(json.JSONEncoder): + """JSON encoder that converts uuid.UUID and datetime objects to strings.""" def default(self, obj: Any) -> Any: if isinstance(obj, uuid.UUID): return str(obj) + if isinstance(obj, (datetime, date)): + return obj.isoformat() return super().default(obj) def _json_safe(data: Any) -> Any: - """Round-trip through JSON to convert non-serializable types (UUID → str). + """Round-trip through JSON to convert non-serializable types. - DRF serializers return uuid.UUID objects for PrimaryKeyRelatedField - and UUIDField. Socket.IO's pubsub uses stdlib json.dumps which - cannot handle them, so we sanitize here before emitting. + Handles uuid.UUID (from DRF serializers) and datetime/date objects + (from plugins or ORM fields) that stdlib json.dumps cannot handle. """ - return json.loads(json.dumps(data, cls=_UUIDEncoder)) + return json.loads(json.dumps(data, cls=_SafeEncoder)) def _setup_state_store(log_events_id: str, request_id: str, org_id: str = "") -> None: @@ -171,13 +173,6 @@ def ide_index_complete( return result except Exception as e: logger.exception("ide_index_complete callback failed") - # Clear the indexing flag so subsequent requests are not blocked - try: - DocumentIndexingService.remove_document_indexing( - org_id=org_id, user_id=user_id, doc_id_key=doc_id_key - ) - except Exception: - logger.exception("Failed to clear indexing flag for %s", doc_id_key) _emit_error( log_events_id, executor_task_id, @@ -288,6 +283,12 @@ def ide_prompt_complete( data = result_dict.get("data", {}) + # Sanitize outputs and metadata so that any non-JSON-safe + # values (e.g. datetime from plugins) are converted before + # they reach Django JSONField saves. + outputs = _json_safe(data.get("output", {})) + metadata = _json_safe(data.get("metadata", {})) + # Re-fetch prompt ORM objects for OutputManagerHelper prompts = list( ToolStudioPrompt.objects.filter(prompt_id__in=prompt_ids).order_by( @@ -298,11 +299,11 @@ def ide_prompt_complete( response = OutputManagerHelper.handle_prompt_output_update( run_id=run_id, prompts=prompts, - outputs=data.get("output", []), + outputs=outputs, document_id=document_id, is_single_pass_extract=is_single_pass, profile_manager_id=profile_manager_id, - metadata=data.get("metadata", {}), + metadata=metadata, ) _emit_result(log_events_id, executor_task_id, operation, response) diff --git a/backend/usage_v2/helper.py b/backend/usage_v2/helper.py index 8cefb3b403..2e707e711f 100644 --- a/backend/usage_v2/helper.py +++ b/backend/usage_v2/helper.py @@ -74,6 +74,68 @@ def get_aggregated_token_count(run_id: str) -> dict: logger.error(f"An unexpected error occurred for run_id {run_id}: {str(e)}") raise APIException("Error while aggregating token counts") + @staticmethod + def get_usage_by_model(run_id: str) -> dict[str, list[dict[str, Any]]]: + """Get per-model usage breakdown matching prompt-service format. + + Groups usage data by (usage_type, llm_usage_reason, model_name) and + returns cost arrays keyed as 'extraction_llm', 'challenge_llm', + 'embedding', etc. — matching the legacy prompt-service response. + + Args: + run_id: The file_execution_id / run_id to query. + + Returns: + Dict with keys like 'extraction_llm', 'embedding' mapping to + lists of per-model cost entries. Empty dict on error. + """ + try: + rows = ( + Usage.objects.filter(run_id=run_id) + .values("usage_type", "llm_usage_reason", "model_name") + .annotate( + sum_input_tokens=Sum("prompt_tokens"), + sum_output_tokens=Sum("completion_tokens"), + sum_total_tokens=Sum("total_tokens"), + sum_embedding_tokens=Sum("embedding_tokens"), + sum_cost=Sum("cost_in_dollars"), + ) + ) + result: dict[str, list[dict[str, Any]]] = {} + for row in rows: + usage_type = row["usage_type"] + llm_reason = row["llm_usage_reason"] + cost_str = UsageHelper._format_float_positional( + row["sum_cost"] or 0.0 + ) + + key = usage_type + item: dict[str, Any] = { + "model_name": row["model_name"], + "cost_in_dollars": cost_str, + } + if llm_reason: + key = f"{llm_reason}_{usage_type}" + item["input_tokens"] = row["sum_input_tokens"] or 0 + item["output_tokens"] = row["sum_output_tokens"] or 0 + item["total_tokens"] = row["sum_total_tokens"] or 0 + else: + item["embedding_tokens"] = row["sum_embedding_tokens"] or 0 + + result.setdefault(key, []).append(item) + return result + except Exception as e: + logger.error( + "Error querying per-model usage for run_id %s: %s", run_id, e + ) + return {} + + @staticmethod + def _format_float_positional(value: float, precision: int = 10) -> str: + """Format float without scientific notation, stripping trailing zeros.""" + formatted: str = f"{value:.{precision}f}" + return formatted.rstrip("0").rstrip(".") if "." in formatted else formatted + @staticmethod def aggregate_usage_metrics(queryset: QuerySet) -> dict[str, Any]: """Aggregate usage metrics from a queryset of Usage objects. diff --git a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx index 29d7d533e2..c1ccfb898c 100644 --- a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx +++ b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx @@ -219,9 +219,13 @@ function ManageDocsModal({ newMessages = newMessages.slice(0, lastIndex); } - // Filter only INFO and ERROR logs + // Filter only INFO and ERROR logs that are NOT from answer_prompt. + // Answer prompt messages carry a prompt_key in their component; + // indexing messages do not. newMessages = newMessages.filter( - (item) => item?.level === "INFO" || item?.level === "ERROR", + (item) => + (item?.level === "INFO" || item?.level === "ERROR") && + !item?.component?.prompt_key, ); // If there are no new INFO or ERROR messages, return early diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 8fda907903..0ab4b88429 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -203,6 +203,12 @@ def get_whisperer_params( ), WhispererConfig.ADD_LINE_NOS: extra_params.enable_highlight, WhispererConfig.INCLUDE_LINE_CONFIDENCE: extra_params.enable_highlight, + } + logger.info( + "HIGHLIGHT_DEBUG whisper params: ADD_LINE_NOS=%s", + params.get(WhispererConfig.ADD_LINE_NOS), + ) + params.update({ # Not providing default value to maintain legacy compatablity # these are optional params and identifiers for audit WhispererConfig.TAG: extra_params.tag @@ -219,7 +225,7 @@ def get_whisperer_params( WhispererDefaults.WAIT_TIMEOUT, ), WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, - } + }) if params[WhispererConfig.MODE] == Modes.LOW_COST.value: params.update( { diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py index 892339a9be..3a48a57647 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py @@ -82,6 +82,10 @@ def process( if fs is None: fs = FileStorage(provider=FileStorageProvider.LOCAL) enable_highlight = kwargs.get(X2TextConstants.ENABLE_HIGHLIGHT, False) + logger.info( + "HIGHLIGHT_DEBUG LLMWhispererV2.process: enable_highlight=%s", + enable_highlight, + ) extra_params = WhispererRequestParams( tag=kwargs.get(X2TextConstants.TAGS), enable_highlight=enable_highlight, diff --git a/unstract/sdk1/src/unstract/sdk1/platform.py b/unstract/sdk1/src/unstract/sdk1/platform.py index a7995164c4..e5ce7fc172 100644 --- a/unstract/sdk1/src/unstract/sdk1/platform.py +++ b/unstract/sdk1/src/unstract/sdk1/platform.py @@ -140,8 +140,8 @@ def _get_adapter_configuration( provider = adapter_data.get("adapter_id", "").split("|")[0] # TODO: Print metadata after redacting sensitive information tool.stream_log( - f"Retrieved config for '{adapter_instance_id}', type: " - f"'{adapter_type}', provider: '{provider}', name: '{adapter_name}'", + f"Retrieved adapter config — name: '{adapter_name}', " + f"type: '{adapter_type}', provider: '{provider}'", level=LogLevel.DEBUG, ) except HTTPError as e: @@ -188,7 +188,7 @@ def get_adapter_config( return adapter_metadata tool.stream_log( - f"Retrieving config from DB for '{adapter_instance_id}'", + "Retrieving adapter configuration from platform service", level=LogLevel.DEBUG, ) diff --git a/unstract/sdk1/src/unstract/sdk1/vector_db.py b/unstract/sdk1/src/unstract/sdk1/vector_db.py index 9638faf358..c46b1c0cb0 100644 --- a/unstract/sdk1/src/unstract/sdk1/vector_db.py +++ b/unstract/sdk1/src/unstract/sdk1/vector_db.py @@ -110,7 +110,7 @@ def _get_vector_db(self) -> BasePydanticVectorStore | VectorStore: return self.vector_db_adapter_class.get_vector_db_instance() except Exception as e: self._tool.stream_log( - log=f"Unable to get vector_db {self._adapter_instance_id}: {e}", + log=f"Unable to get vector database: {e}", level=LogLevel.ERROR, ) raise VectorDBError(f"Error getting vectorDB instance: {e}") from e diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py index 8b7789e201..f62d5ef7bc 100644 --- a/workers/executor/executor_tool_shim.py +++ b/workers/executor/executor_tool_shim.py @@ -131,6 +131,13 @@ def stream_log( py_level = _LEVEL_MAP.get(level, logging.INFO) logger.log(py_level, log) + # Respect log level threshold for frontend publishing (matches + # StreamMixin.stream_log behaviour). Python logging above still + # captures everything for debugging. + _levels = [LogLevel.DEBUG, LogLevel.INFO, LogLevel.WARN, LogLevel.ERROR, LogLevel.FATAL] + if _levels.index(level) < _levels.index(self.log_level): + return + # Publish progress to frontend via the log consumer queue. if self.log_events_id: try: diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index db4c77dba9..61cdb0fe8b 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -180,6 +180,14 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: Path(file_path).name, context.run_id, ) + logger.info( + "HIGHLIGHT_DEBUG _handle_extract: enable_highlight=%s " + "x2text_type=%s file=%s run_id=%s", + enable_highlight, + type(x2text.x2text_instance).__name__, + Path(file_path).name, + context.run_id, + ) shim.stream_log("Initializing text extractor...") try: @@ -209,6 +217,21 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: fs=fs, ) + has_metadata = bool( + process_response.extraction_metadata + and process_response.extraction_metadata.line_metadata + ) + logger.info( + "HIGHLIGHT_DEBUG extraction result: has_line_metadata=%s " + "whisper_hash=%s run_id=%s", + has_metadata, + getattr( + process_response.extraction_metadata, "whisper_hash", None + ) + if process_response.extraction_metadata + else None, + context.run_id, + ) logger.info( "Text extraction completed: file=%s run_id=%s", Path(file_path).name, diff --git a/workers/executor/worker.py b/workers/executor/worker.py index 4b80c089da..ecef4e6873 100644 --- a/workers/executor/worker.py +++ b/workers/executor/worker.py @@ -4,6 +4,8 @@ Routes execute_extraction tasks to registered executors. """ +import logging + from shared.enums.worker_enums import WorkerType from shared.infrastructure.config.builder import WorkerBuilder from shared.infrastructure.config.registry import WorkerRegistry @@ -13,6 +15,11 @@ logger = WorkerLogger.setup(WorkerType.EXECUTOR) app, config = WorkerBuilder.build_celery_app(WorkerType.EXECUTOR) +# Suppress Celery trace logging of task return values. +# The trace logger prints the full result dict on task success, which +# can contain sensitive customer data (extracted text, summaries, etc.). +logging.getLogger("celery.app.trace").setLevel(logging.WARNING) + def check_executor_health(): """Custom health check for executor worker.""" diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 32b3a6bcbc..d89e86bb16 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -273,6 +273,12 @@ def _execute_structure_tool_impl(params: dict) -> dict: is_single_pass_enabled = settings.get(_SK.SINGLE_PASS_EXTRACTION_MODE, False) challenge_llm = settings.get(_SK.CHALLENGE_LLM_ADAPTER_ID, "") is_highlight_enabled = settings.get(_SK.ENABLE_HIGHLIGHT, False) + logger.info( + "HIGHLIGHT_DEBUG structure_tool: is_highlight_enabled=%s " + "from settings keys=%s", + is_highlight_enabled, + list(settings.keys()), + ) tool_id = tool_metadata[_SK.TOOL_ID] tool_settings = tool_metadata[_SK.TOOL_SETTINGS] diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index c20c2c04ef..903449d75a 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -198,14 +198,21 @@ def test_level_mapping(self, mock_lp): component={}, ) - cases = [ - (LogLevel.DEBUG, "INFO"), + # DEBUG is below the shim's log_level (INFO) so it should NOT + # be published to the frontend. + shim.stream_log("msg", level=LogLevel.DEBUG) + assert not mock_lp.log_progress.called, ( + "DEBUG should be filtered out (below INFO threshold)" + ) + + # INFO and above should be published with the correct mapped level. + published_cases = [ (LogLevel.INFO, "INFO"), (LogLevel.WARN, "WARN"), (LogLevel.ERROR, "ERROR"), (LogLevel.FATAL, "ERROR"), ] - for sdk_level, expected_wf_level in cases: + for sdk_level, expected_wf_level in published_cases: mock_lp.reset_mock() shim.stream_log("msg", level=sdk_level) call_kwargs = mock_lp.log_progress.call_args From 3b35fb2a02cd73ba5945f94b7aa56bf7a321f6d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Mar 2026 12:46:22 +0000 Subject: [PATCH 30/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/usage_v2/helper.py | 8 +--- .../x2text/llm_whisperer_v2/src/helper.py | 38 ++++++++++--------- workers/executor/executor_tool_shim.py | 8 +++- workers/executor/executors/legacy_executor.py | 4 +- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/backend/usage_v2/helper.py b/backend/usage_v2/helper.py index 2e707e711f..04f256ff9b 100644 --- a/backend/usage_v2/helper.py +++ b/backend/usage_v2/helper.py @@ -105,9 +105,7 @@ def get_usage_by_model(run_id: str) -> dict[str, list[dict[str, Any]]]: for row in rows: usage_type = row["usage_type"] llm_reason = row["llm_usage_reason"] - cost_str = UsageHelper._format_float_positional( - row["sum_cost"] or 0.0 - ) + cost_str = UsageHelper._format_float_positional(row["sum_cost"] or 0.0) key = usage_type item: dict[str, Any] = { @@ -125,9 +123,7 @@ def get_usage_by_model(run_id: str) -> dict[str, list[dict[str, Any]]]: result.setdefault(key, []).append(item) return result except Exception as e: - logger.error( - "Error querying per-model usage for run_id %s: %s", run_id, e - ) + logger.error("Error querying per-model usage for run_id %s: %s", run_id, e) return {} @staticmethod diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 0ab4b88429..14790065ae 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -208,24 +208,26 @@ def get_whisperer_params( "HIGHLIGHT_DEBUG whisper params: ADD_LINE_NOS=%s", params.get(WhispererConfig.ADD_LINE_NOS), ) - params.update({ - # Not providing default value to maintain legacy compatablity - # these are optional params and identifiers for audit - WhispererConfig.TAG: extra_params.tag - or config.get( - WhispererConfig.TAG, - WhispererDefaults.TAG, - ), - WhispererConfig.USE_WEBHOOK: config.get(WhispererConfig.USE_WEBHOOK, ""), - WhispererConfig.WEBHOOK_METADATA: config.get( - WhispererConfig.WEBHOOK_METADATA - ), - WhispererConfig.WAIT_TIMEOUT: config.get( - WhispererConfig.WAIT_TIMEOUT, - WhispererDefaults.WAIT_TIMEOUT, - ), - WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, - }) + params.update( + { + # Not providing default value to maintain legacy compatablity + # these are optional params and identifiers for audit + WhispererConfig.TAG: extra_params.tag + or config.get( + WhispererConfig.TAG, + WhispererDefaults.TAG, + ), + WhispererConfig.USE_WEBHOOK: config.get(WhispererConfig.USE_WEBHOOK, ""), + WhispererConfig.WEBHOOK_METADATA: config.get( + WhispererConfig.WEBHOOK_METADATA + ), + WhispererConfig.WAIT_TIMEOUT: config.get( + WhispererConfig.WAIT_TIMEOUT, + WhispererDefaults.WAIT_TIMEOUT, + ), + WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, + } + ) if params[WhispererConfig.MODE] == Modes.LOW_COST.value: params.update( { diff --git a/workers/executor/executor_tool_shim.py b/workers/executor/executor_tool_shim.py index f62d5ef7bc..63f48dd253 100644 --- a/workers/executor/executor_tool_shim.py +++ b/workers/executor/executor_tool_shim.py @@ -134,7 +134,13 @@ def stream_log( # Respect log level threshold for frontend publishing (matches # StreamMixin.stream_log behaviour). Python logging above still # captures everything for debugging. - _levels = [LogLevel.DEBUG, LogLevel.INFO, LogLevel.WARN, LogLevel.ERROR, LogLevel.FATAL] + _levels = [ + LogLevel.DEBUG, + LogLevel.INFO, + LogLevel.WARN, + LogLevel.ERROR, + LogLevel.FATAL, + ] if _levels.index(level) < _levels.index(self.log_level): return diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index d740b306b4..3d8623d020 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -225,9 +225,7 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: "HIGHLIGHT_DEBUG extraction result: has_line_metadata=%s " "whisper_hash=%s run_id=%s", has_metadata, - getattr( - process_response.extraction_metadata, "whisper_hash", None - ) + getattr(process_response.extraction_metadata, "whisper_hash", None) if process_response.extraction_metadata else None, context.run_id, From 1ab6031b64cf8fffffca80fc0ff4a74340654c9d Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Fri, 6 Mar 2026 21:07:35 +0530 Subject: [PATCH 31/64] Pluggable apps and plugins to fit the new async prompt execution architecture --- .../prompt_studio_core_v2/tasks.py | 11 ++- backend/workflow_manager/workflow_v2/dto.py | 19 ++++-- workers/executor/executors/legacy_executor.py | 67 +++++++++++++++++++ workers/executor/tasks.py | 13 +--- .../file_processing/structure_tool_task.py | 6 +- 5 files changed, 96 insertions(+), 20 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index 1a4c0ca712..325ae28a0f 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -307,7 +307,10 @@ def ide_prompt_complete( ) _emit_result(log_events_id, executor_task_id, operation, response) - return response + # Return minimal status — full data is sent via websocket above. + # Returning the full response would cause Celery to log sensitive + # extracted data in its "Task succeeded" message. + return {"status": "completed", "operation": operation} except Exception as e: logger.exception("ide_prompt_complete callback failed") _emit_error(log_events_id, executor_task_id, operation, str(e)) @@ -435,7 +438,8 @@ def run_fetch_response( profile_manager_id=profile_manager_id, ) _emit_result(log_events_id, self.request.id, "fetch_response", response) - return response + # Return minimal status to avoid logging sensitive extracted data + return {"status": "completed", "operation": "fetch_response"} except Exception as e: logger.exception("run_fetch_response failed") _emit_error(log_events_id, self.request.id, "fetch_response", str(e)) @@ -469,7 +473,8 @@ def run_single_pass_extraction( run_id=run_id, ) _emit_result(log_events_id, self.request.id, "single_pass_extraction", response) - return response + # Return minimal status to avoid logging sensitive extracted data + return {"status": "completed", "operation": "single_pass_extraction"} except Exception as e: logger.exception("run_single_pass_extraction failed") _emit_error(log_events_id, self.request.id, "single_pass_extraction", str(e)) diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index aa6d6039a5..cb59d30dfc 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -63,11 +63,22 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: if not isinstance(item, dict): continue + # Handle metadata nested inside item["result"]["metadata"] result = item.get("result") - if not isinstance(result, dict): - continue - - self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) + if isinstance(result, dict): + self._remove_specific_keys( + result=result, keys_to_remove=keys_to_remove + ) + + # Handle top-level item["metadata"] (workers cache path) + if "metadata" in item: + if keys_to_remove: + item_metadata = item["metadata"] + if isinstance(item_metadata, dict): + for key in keys_to_remove: + item_metadata.pop(key, None) + else: + item.pop("metadata", None) def remove_result_metrics(self) -> None: """Removes the 'metrics' key from the 'result' dictionary within each diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index d740b306b4..49c814f73e 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -189,12 +189,16 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: context.run_id, ) shim.stream_log("Initializing text extractor...") + shim.stream_log( + f"Using text extractor: {type(x2text.x2text_instance).__name__}" + ) try: shim.stream_log("Extracting text from document...") if enable_highlight and isinstance( x2text.x2text_instance, (LLMWhisperer, LLMWhispererV2) ): + shim.stream_log("Extracting text with highlight support enabled...") process_response: TextExtractionResult = x2text.process( input_file_path=file_path, output_file_path=output_file_path, @@ -247,6 +251,7 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: process_response.extraction_metadata and process_response.extraction_metadata.line_metadata ): + shim.stream_log("Saving extraction metadata...") result_data["highlight_metadata"] = ( process_response.extraction_metadata.line_metadata ) @@ -430,8 +435,19 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu extracted_text = "" index_metrics: dict = {} + shim = ExecutorToolShim( + platform_api_key=extract_params.get("platform_api_key", ""), + log_events_id=self._log_events_id, + component=self._log_component, + ) + step = 1 + # ---- Step 1: Extract ---- if not skip_extraction: + shim.stream_log( + f"Pipeline step {step}: Extracting text from document..." + ) + step += 1 extract_ctx = ExecutionContext( executor_name=context.executor_name, operation=Operation.EXTRACT.value, @@ -449,6 +465,10 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 2: Summarize (if enabled) ---- if is_summarization: + shim.stream_log( + f"Pipeline step {step}: Summarizing extracted text..." + ) + step += 1 summarize_result = self._run_pipeline_summarize( context=context, summarize_params=summarize_params or {}, @@ -462,6 +482,10 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu answer_params["file_path"] = input_file_path elif not is_single_pass: # ---- Step 3: Index per output with dedup ---- + shim.stream_log( + f"Pipeline step {step}: Indexing document into vector store..." + ) + step += 1 index_metrics = self._run_pipeline_index( context=context, index_template=index_template, @@ -486,6 +510,10 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu output["table_settings"] = table_settings # ---- Step 5: Answer prompt / Single pass ---- + mode_label = "single pass" if is_single_pass else "prompt" + shim.stream_log( + f"Pipeline step {step}: Running {mode_label} execution..." + ) operation = ( Operation.SINGLE_PASS_EXTRACTION.value if is_single_pass @@ -523,6 +551,7 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu merged = self._merge_pipeline_metrics(existing_metrics, index_metrics) structured_output["metrics"] = merged + shim.stream_log("Pipeline completed successfully") return ExecutionResult(success=True, data=structured_output) def _run_pipeline_summarize( @@ -818,6 +847,9 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: chunking_config = ChunkingConfig( chunk_size=chunk_size, chunk_overlap=chunk_overlap ) + shim.stream_log( + f"Configured chunking: size={chunk_size}, overlap={chunk_overlap}" + ) Index, EmbeddingCompat, VectorDB = self._get_indexing_deps() @@ -845,6 +877,7 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: adapter_instance_id=vector_db_instance_id, embedding=embedding, ) + shim.stream_log("Initialized embedding and vector DB adapters") doc_id_found = index.is_document_indexed( doc_id=doc_id, embedding=embedding, vector_db=vector_db @@ -855,6 +888,10 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: doc_id_found, reindex, ) + if doc_id_found and reindex: + shim.stream_log("Document already indexed, re-indexing...") + elif not doc_id_found: + shim.stream_log("Indexing document for the first time...") shim.stream_log("Indexing document into vector store...") index.perform_indexing( vector_db=vector_db, @@ -1003,6 +1040,11 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: process_text_fn = None enable_highlight = tool_settings.get(PSKeys.ENABLE_HIGHLIGHT, False) enable_word_confidence = tool_settings.get(PSKeys.ENABLE_WORD_CONFIDENCE, False) + pipeline_shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component=self._log_component, + ) if enable_highlight: from executor.executors.plugins import ExecutorPluginLoader @@ -1021,12 +1063,14 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: "Highlight plugin initialized for file=%s", doc_name, ) + pipeline_shim.stream_log("Highlight data plugin ready") else: logger.warning( "Highlight is enabled but highlight-data plugin is not " "installed. Coordinates will not be produced. Install " "the plugin via: pip install -e " ) + pipeline_shim.stream_log("Highlight data plugin not available") # ---- Merge tool_settings as defaults into each prompt output -------- # Single-pass payloads carry adapter IDs and chunk config in @@ -1096,6 +1140,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: custom_data=custom_data, is_ide=is_ide, ) + shim.stream_log( + f"Resolved template variables for: {prompt_name}" + ) logger.info( "Executing prompt: tool_id=%s name=%s run_id=%s", @@ -1170,6 +1217,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: metrics.setdefault(prompt_name, {}).update( {"table_extraction": table_metrics} ) + shim.stream_log( + f"Table extraction completed for: {prompt_name}" + ) logger.info("TABLE extraction completed: prompt=%s", prompt_name) else: structured_output[prompt_name] = "" @@ -1214,6 +1264,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: adapter_instance_id=output[PSKeys.VECTOR_DB], embedding=embedding, ) + shim.stream_log( + f"Initialized LLM and retrieval adapters for: {prompt_name}" + ) except Exception as e: msg = f"Couldn't fetch adapter. {e}" logger.error(msg) @@ -1252,6 +1305,10 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: context_retrieval_metrics=context_retrieval_metrics, ) metadata[PSKeys.CONTEXT][prompt_name] = context_list + shim.stream_log( + f"Retrieved {len(context_list)} context chunks" + f" for: {prompt_name}" + ) logger.debug( "Retrieved %d context chunks for prompt: %s", len(context_list), @@ -1292,6 +1349,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: tool_id=tool_id, doc_name=doc_name, ) + shim.stream_log( + f"Applied type conversion for: {prompt_name}" + ) # ---- Challenge (quality verification) ---------------------- if tool_settings.get(PSKeys.ENABLE_CHALLENGE): @@ -1325,6 +1385,10 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: metadata=metadata, ) challenger.run() + shim.stream_log( + f"Challenge verification completed" + f" for: {prompt_name}" + ) logger.info( "Challenge completed: prompt=%s", prompt_name, @@ -1375,6 +1439,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: if vector_db: vector_db.close() + pipeline_shim.stream_log( + f"All {len(prompts)} prompts processed successfully" + ) logger.info( "All prompts processed: tool_id=%s prompt_count=%d file=%s", tool_id, diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index 88cf68b15b..a729870e1c 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -109,15 +109,4 @@ def execute_extraction(self, execution_context_dict: dict) -> dict: result.success, ) - # Strip sensitive/bulky fields before returning via Celery result - # backend. The trace logger is already suppressed (worker.py sets - # celery.app.trace to WARNING), but we still remove raw extracted - # text to avoid bloating the result backend storage. - # - # NOTE: Do NOT strip "context" or "highlight_data" — the backend - # (PromptStudioHelper / OutputManagerHelper) reads these from the - # result to persist in the database and return to the IDE. - result_dict = result.to_dict() - metadata = result_dict.get("data", {}).get("metadata", {}) - metadata.pop("extracted_text", None) - return result_dict + return result.to_dict() diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index d89e86bb16..e3c5a9b0ea 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -65,6 +65,7 @@ class _SK: ENABLE_SINGLE_PASS_EXTRACTION = "enable_single_pass_extraction" SUMMARIZE_AS_SOURCE = "summarize_as_source" ENABLE_HIGHLIGHT = "enable_highlight" + ENABLE_WORD_CONFIDENCE = "enable_word_confidence" SUMMARIZE_PROMPT = "summarize_prompt" TABLE_SETTINGS = "table_settings" INPUT_FILE = "input_file" @@ -273,10 +274,12 @@ def _execute_structure_tool_impl(params: dict) -> dict: is_single_pass_enabled = settings.get(_SK.SINGLE_PASS_EXTRACTION_MODE, False) challenge_llm = settings.get(_SK.CHALLENGE_LLM_ADAPTER_ID, "") is_highlight_enabled = settings.get(_SK.ENABLE_HIGHLIGHT, False) + is_word_confidence_enabled = settings.get(_SK.ENABLE_WORD_CONFIDENCE, False) logger.info( "HIGHLIGHT_DEBUG structure_tool: is_highlight_enabled=%s " - "from settings keys=%s", + "is_word_confidence_enabled=%s from settings keys=%s", is_highlight_enabled, + is_word_confidence_enabled, list(settings.keys()), ) @@ -290,6 +293,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: tool_settings[_SK.ENABLE_SINGLE_PASS_EXTRACTION] = is_single_pass_enabled tool_settings[_SK.SUMMARIZE_AS_SOURCE] = is_summarization_enabled tool_settings[_SK.ENABLE_HIGHLIGHT] = is_highlight_enabled + tool_settings[_SK.ENABLE_WORD_CONFIDENCE] = is_word_confidence_enabled _, file_name = os.path.split(input_file_path) if is_summarization_enabled: From 7ae1a7432ff43795172b71294a58c424153f1d58 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Mar 2026 15:39:17 +0000 Subject: [PATCH 32/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/workflow_manager/workflow_v2/dto.py | 4 +-- workers/executor/executors/legacy_executor.py | 35 +++++-------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index cb59d30dfc..0dc83d0097 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -66,9 +66,7 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: # Handle metadata nested inside item["result"]["metadata"] result = item.get("result") if isinstance(result, dict): - self._remove_specific_keys( - result=result, keys_to_remove=keys_to_remove - ) + self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) # Handle top-level item["metadata"] (workers cache path) if "metadata" in item: diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index a1b1fdc37c..419bc9dc11 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -189,9 +189,7 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: context.run_id, ) shim.stream_log("Initializing text extractor...") - shim.stream_log( - f"Using text extractor: {type(x2text.x2text_instance).__name__}" - ) + shim.stream_log(f"Using text extractor: {type(x2text.x2text_instance).__name__}") try: shim.stream_log("Extracting text from document...") @@ -442,9 +440,7 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 1: Extract ---- if not skip_extraction: - shim.stream_log( - f"Pipeline step {step}: Extracting text from document..." - ) + shim.stream_log(f"Pipeline step {step}: Extracting text from document...") step += 1 extract_ctx = ExecutionContext( executor_name=context.executor_name, @@ -463,9 +459,7 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 2: Summarize (if enabled) ---- if is_summarization: - shim.stream_log( - f"Pipeline step {step}: Summarizing extracted text..." - ) + shim.stream_log(f"Pipeline step {step}: Summarizing extracted text...") step += 1 summarize_result = self._run_pipeline_summarize( context=context, @@ -509,9 +503,7 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 5: Answer prompt / Single pass ---- mode_label = "single pass" if is_single_pass else "prompt" - shim.stream_log( - f"Pipeline step {step}: Running {mode_label} execution..." - ) + shim.stream_log(f"Pipeline step {step}: Running {mode_label} execution...") operation = ( Operation.SINGLE_PASS_EXTRACTION.value if is_single_pass @@ -1138,9 +1130,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: custom_data=custom_data, is_ide=is_ide, ) - shim.stream_log( - f"Resolved template variables for: {prompt_name}" - ) + shim.stream_log(f"Resolved template variables for: {prompt_name}") logger.info( "Executing prompt: tool_id=%s name=%s run_id=%s", @@ -1215,9 +1205,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: metrics.setdefault(prompt_name, {}).update( {"table_extraction": table_metrics} ) - shim.stream_log( - f"Table extraction completed for: {prompt_name}" - ) + shim.stream_log(f"Table extraction completed for: {prompt_name}") logger.info("TABLE extraction completed: prompt=%s", prompt_name) else: structured_output[prompt_name] = "" @@ -1347,9 +1335,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: tool_id=tool_id, doc_name=doc_name, ) - shim.stream_log( - f"Applied type conversion for: {prompt_name}" - ) + shim.stream_log(f"Applied type conversion for: {prompt_name}") # ---- Challenge (quality verification) ---------------------- if tool_settings.get(PSKeys.ENABLE_CHALLENGE): @@ -1384,8 +1370,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: ) challenger.run() shim.stream_log( - f"Challenge verification completed" - f" for: {prompt_name}" + f"Challenge verification completed" f" for: {prompt_name}" ) logger.info( "Challenge completed: prompt=%s", @@ -1437,9 +1422,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: if vector_db: vector_db.close() - pipeline_shim.stream_log( - f"All {len(prompts)} prompts processed successfully" - ) + pipeline_shim.stream_log(f"All {len(prompts)} prompts processed successfully") logger.info( "All prompts processed: tool_id=%s prompt_count=%d file=%s", tool_id, From fbf9c29cf613c45d9b394ff19b4336b26007847a Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 14:59:44 +0530 Subject: [PATCH 33/64] Pluggable apps and plugins to fit the new async prompt execution architecture --- backend/api_v2/api_deployment_views.py | 2 +- backend/api_v2/deployment_helper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index b232fdfdc9..d31fb21678 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -233,7 +233,7 @@ def get( response.remove_result_metadata_keys(["extracted_text"]) if include_metadata: DeploymentHelper._enrich_result_with_usage_metadata(response) - else: + if not include_metadata and not include_metrics: response.remove_result_metadata_keys() if not include_metrics: response.remove_result_metrics() diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 55c38ef4a1..8e4a655e6e 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -275,7 +275,7 @@ def execute_workflow( result.remove_result_metadata_keys(["extracted_text"]) if include_metadata: cls._enrich_result_with_usage_metadata(result) - else: + if not include_metadata and not include_metrics: result.remove_result_metadata_keys() if not include_metrics: result.remove_result_metrics() From d6a3c5ea37ea4feb30dad6c65113907dfc1f838a Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 15:18:59 +0530 Subject: [PATCH 34/64] adding worker for callbacks --- backend/api_v2/api_deployment_views.py | 2 +- backend/api_v2/deployment_helper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index d31fb21678..b3afe68bad 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -231,7 +231,7 @@ def get( if not enable_highlight: response.remove_result_metadata_keys(["highlight_data"]) response.remove_result_metadata_keys(["extracted_text"]) - if include_metadata: + if include_metadata or include_metrics: DeploymentHelper._enrich_result_with_usage_metadata(response) if not include_metadata and not include_metrics: response.remove_result_metadata_keys() diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 8e4a655e6e..307a05bbf6 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -273,7 +273,7 @@ def execute_workflow( if not enable_highlight: result.remove_result_metadata_keys(["highlight_data"]) result.remove_result_metadata_keys(["extracted_text"]) - if include_metadata: + if include_metadata or include_metrics: cls._enrich_result_with_usage_metadata(result) if not include_metadata and not include_metrics: result.remove_result_metadata_keys() From 5c23ab0a38159cf24a4e5134bfc98e5e161246de Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 17:29:49 +0530 Subject: [PATCH 35/64] adding worker for callbacks --- backend/api_v2/api_deployment_views.py | 2 +- backend/api_v2/deployment_helper.py | 36 +++++++++++++-------- backend/workflow_manager/workflow_v2/dto.py | 19 +++++++++++ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index b3afe68bad..76a1cffce6 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -234,7 +234,7 @@ def get( if include_metadata or include_metrics: DeploymentHelper._enrich_result_with_usage_metadata(response) if not include_metadata and not include_metrics: - response.remove_result_metadata_keys() + response.remove_inner_result_metadata() if not include_metrics: response.remove_result_metrics() return Response( diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 307a05bbf6..a001473301 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -276,7 +276,7 @@ def execute_workflow( if include_metadata or include_metrics: cls._enrich_result_with_usage_metadata(result) if not include_metadata and not include_metrics: - result.remove_result_metadata_keys() + result.remove_inner_result_metadata() if not include_metrics: result.remove_result_metrics() except Exception as error: @@ -297,11 +297,13 @@ def execute_workflow( @staticmethod def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: - """Enrich each file result's metadata with per-model usage breakdown. + """Enrich each file result's metadata with usage data. - Queries the Usage table for each file_execution_id and injects - cost arrays (extraction_llm, challenge_llm, embedding) into the - result metadata, matching the legacy prompt-service response format. + For each file_execution_id: + 1. Injects per-model cost arrays (extraction_llm, challenge_llm, + embedding) into item["result"]["metadata"]. + 2. Injects aggregated usage totals into item["metadata"]["usage"], + matching the legacy response format. """ if not isinstance(result.result, list): return @@ -314,15 +316,23 @@ def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: file_exec_id = item.get("file_execution_id") if not file_exec_id: continue + + # Enrich inner result metadata with per-model breakdown inner_result = item.get("result") - if not isinstance(inner_result, dict): - continue - metadata = inner_result.get("metadata") - if not isinstance(metadata, dict): - continue - usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) - if usage_by_model: - metadata.update(usage_by_model) + if isinstance(inner_result, dict): + metadata = inner_result.get("metadata") + if isinstance(metadata, dict): + usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) + if usage_by_model: + metadata.update(usage_by_model) + + # Enrich top-level item metadata with aggregated usage + item_metadata = item.get("metadata") + if isinstance(item_metadata, dict): + aggregated = UsageHelper.get_aggregated_token_count(file_exec_id) + if aggregated: + aggregated["file_execution_id"] = file_exec_id + item_metadata["usage"] = aggregated @staticmethod def get_execution_status(execution_id: str) -> ExecutionResponse: diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index 0dc83d0097..7c06126db8 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -78,6 +78,25 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: else: item.pop("metadata", None) + def remove_inner_result_metadata(self) -> None: + """Removes only the inner item["result"]["metadata"] dict (extraction + metadata like highlight_data, per-model costs, etc.) while preserving + the outer item["metadata"] dict which contains workflow identification + keys (source_name, source_hash, workflow_id, etc.). + + Use this instead of remove_result_metadata_keys() when you want to + strip extraction metadata but keep workflow identification metadata. + """ + if not isinstance(self.result, list): + return + + for item in self.result: + if not isinstance(item, dict): + continue + result = item.get("result") + if isinstance(result, dict): + result.pop("metadata", None) + def remove_result_metrics(self) -> None: """Removes the 'metrics' key from the 'result' dictionary within each 'result' dictionary in the 'result' list attribute of the instance. From 525024f51a1734dfb606c85001166fc7d1ef52e4 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 18:53:45 +0530 Subject: [PATCH 36/64] adding worker for callbacks --- backend/api_v2/api_deployment_views.py | 7 ++- backend/api_v2/deployment_helper.py | 80 +++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index 76a1cffce6..25d264ba25 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -214,9 +214,14 @@ def get( response_status = status.HTTP_422_UNPROCESSABLE_ENTITY if execution_status_value == CeleryTaskState.COMPLETED.value: response_status = status.HTTP_200_OK - # Check if highlight data should be removed using configuration registry + # Ensure workflow identification keys are always in item metadata api_deployment = deployment_execution_dto.api organization = api_deployment.organization if api_deployment else None + org_id = str(organization.organization_id) if organization else "" + DeploymentHelper._enrich_result_with_workflow_metadata( + response, organization_id=org_id + ) + # Check if highlight data should be removed using configuration registry enable_highlight = False # Safe default if the key is unavailable (e.g., OSS) # Check if the configuration key exists (Cloud deployment) or use settings (OSS) from configuration.config_registry import ConfigurationRegistry diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index a001473301..8b1c540347 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -258,8 +258,11 @@ def execute_workflow( result.status_api = DeploymentHelper.construct_status_endpoint( api_endpoint=api.api_endpoint, execution_id=execution_id ) - # Check if highlight data should be removed using configuration registry + # Ensure workflow identification keys are always in item metadata organization = api.organization if api else None + org_id = str(organization.organization_id) if organization else "" + cls._enrich_result_with_workflow_metadata(result, organization_id=org_id) + # Check if highlight data should be removed using configuration registry enable_highlight = False # Safe default if the key is unavailable (e.g., OSS) from configuration.config_registry import ConfigurationRegistry @@ -334,6 +337,81 @@ def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: aggregated["file_execution_id"] = file_exec_id item_metadata["usage"] = aggregated + @staticmethod + def _enrich_result_with_workflow_metadata( + result: ExecutionResponse, + organization_id: str, + ) -> None: + """Ensure workflow identification keys are always present in item metadata. + + Uses setdefault() — fills in MISSING keys only, never overwrites + values already present from the workers cache. + """ + if not isinstance(result.result, list): + return + + from workflow_manager.file_execution.models import WorkflowFileExecution + + # 1. Collect file_execution_ids + file_exec_ids = [ + item.get("file_execution_id") + for item in result.result + if isinstance(item, dict) and item.get("file_execution_id") + ] + if not file_exec_ids: + return + + # 2. Batch query (single JOIN query for all file executions) + fe_lookup = { + str(fe.id): fe + for fe in WorkflowFileExecution.objects.filter( + id__in=file_exec_ids + ).select_related("workflow_execution") + } + + # 3. Get execution-level data (tags) — one M2M query + workflow_execution = None + tag_names: list[str] = [] + if fe_lookup: + first_fe = next(iter(fe_lookup.values())) + workflow_execution = first_fe.workflow_execution + tag_names = list( + workflow_execution.tags.values_list("name", flat=True) + ) + + # 4. Enrich each item + for item in result.result: + if not isinstance(item, dict): + continue + file_exec_id = item.get("file_execution_id") + if not file_exec_id: + continue + + # Ensure metadata dict exists + if not isinstance(item.get("metadata"), dict): + item["metadata"] = {} + metadata = item["metadata"] + + fe = fe_lookup.get(str(file_exec_id)) + we = fe.workflow_execution if fe else workflow_execution + + # Fill MISSING keys only (setdefault won't overwrite) + if fe: + metadata.setdefault("source_name", fe.file_name) + metadata.setdefault("source_hash", fe.file_hash or "") + metadata.setdefault("file_execution_id", str(fe.id)) + metadata.setdefault("total_elapsed_time", fe.execution_time) + if we: + metadata.setdefault("workflow_id", str(we.workflow_id)) + metadata.setdefault("execution_id", str(we.id)) + metadata.setdefault( + "workflow_start_time", + we.created_at.timestamp() if we.created_at else None, + ) + + metadata.setdefault("organization_id", organization_id) + metadata.setdefault("tags", tag_names) + @staticmethod def get_execution_status(execution_id: str) -> ExecutionResponse: """Current status of api execution. From a8cbce192112acf2f406267544af90bb45485475 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 19:54:05 +0530 Subject: [PATCH 37/64] adding worker for callbacks --- docker/dockerfiles/worker-unified.Dockerfile.dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/dockerfiles/worker-unified.Dockerfile.dockerignore b/docker/dockerfiles/worker-unified.Dockerfile.dockerignore index fca472f1f1..110627ea61 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile.dockerignore +++ b/docker/dockerfiles/worker-unified.Dockerfile.dockerignore @@ -51,7 +51,6 @@ Thumbs.db # Documentation **/docs/ -**/*.md !README.md !unstract !unstract/** From 549f17aa19f90788f2b4860265b444d3d35dd6f2 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Mon, 9 Mar 2026 23:31:31 +0530 Subject: [PATCH 38/64] adding worker for callbacks --- workers/executor/executors/legacy_executor.py | 15 +++++++++++ .../executors/retrievers/automerging.py | 26 ++++++++++++++----- .../executor/executors/retrievers/fusion.py | 16 +++++++++--- .../executors/retrievers/keyword_table.py | 16 +++++++++--- .../executors/retrievers/recursive.py | 16 +++++++++--- .../executor/executors/retrievers/router.py | 16 +++++++++--- .../executors/retrievers/subquestion.py | 18 ++++++++++--- 7 files changed, 97 insertions(+), 26 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 419bc9dc11..ec44444556 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -25,6 +25,7 @@ from executor.executors.file_utils import FileUtils from unstract.sdk1.adapters.exceptions import AdapterError +from unstract.sdk1.constants import LogLevel from unstract.sdk1.adapters.x2text.constants import X2TextConstants from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 @@ -119,7 +120,21 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: elapsed, type(exc).__name__, exc.message, + exc_info=True, ) + # Stream error to FE so the user sees the failure in real-time + if self._log_events_id: + try: + shim = ExecutorToolShim( + log_events_id=self._log_events_id, + component=self._log_component, + ) + shim.stream_log( + f"Error: {exc.message or type(exc).__name__}", + level=LogLevel.ERROR, + ) + except Exception: + pass # Best-effort — don't mask the original error return ExecutionResult.failure(error=exc.message) # ------------------------------------------------------------------ diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py index 3527458494..c3472964ed 100644 --- a/workers/executor/executors/retrievers/automerging.py +++ b/workers/executor/executors/retrievers/automerging.py @@ -55,8 +55,14 @@ def retrieve(self) -> set[str]: nodes = auto_merging_retriever.retrieve(self.prompt) except Exception as e: - logger.error(f"AutoMergingRetriever failed : {e}") - raise RetrievalError(f"AutoMergingRetriever failed: {str(e)}") from e + logger.error( + "AutoMergingRetriever failed: %s: %s", + type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError( + f"AutoMergingRetriever failed: {type(e).__name__}: {e}" + ) from e # Extract unique text chunks chunks: set[str] = set() @@ -75,10 +81,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during auto-merging retrieval for {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during auto-merging retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( - f"Unexpected error during auto-merging retrieval for {self.doc_id}: {e}" + "Unexpected error during auto-merging retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, ) - raise RetrievalError(f"Unexpected error: {str(e)}") from e + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py index 476e5fa6da..a9b27e2eb0 100644 --- a/workers/executor/executors/retrievers/fusion.py +++ b/workers/executor/executors/retrievers/fusion.py @@ -84,10 +84,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during fusion retrieval for {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during fusion retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( - f"Unexpected error during fusion retrieval for {self.doc_id}: {e}" + "Unexpected error during fusion retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, ) - raise RetrievalError(f"Unexpected error: {str(e)}") from e + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py index dfaaff1db3..2f1d345c02 100644 --- a/workers/executor/executors/retrievers/keyword_table.py +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -69,10 +69,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during keyword retrieval for {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during keyword retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( - f"Unexpected error during keyword retrieval for {self.doc_id}: {e}" + "Unexpected error during keyword retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, ) - raise RetrievalError(f"Unexpected error: {str(e)}") from e + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py index d1c8d0e786..b49bf298c2 100644 --- a/workers/executor/executors/retrievers/recursive.py +++ b/workers/executor/executors/retrievers/recursive.py @@ -67,10 +67,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during recursive retrieval for {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during recursive retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( - f"Unexpected error during recursive retrieval for {self.doc_id}: {e}" + "Unexpected error during recursive retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, ) - raise RetrievalError(f"Unexpected error: {str(e)}") from e + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py index b995dc9043..8dae80271c 100644 --- a/workers/executor/executors/retrievers/router.py +++ b/workers/executor/executors/retrievers/router.py @@ -147,10 +147,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during router retrieval for {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during router retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( - f"Unexpected error during router retrieval for {self.doc_id}: {e}" + "Unexpected error during router retrieval for %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, ) - raise RetrievalError(f"Unexpected error: {str(e)}") from e + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py index 729abbb2cd..de0d5047d3 100644 --- a/workers/executor/executors/retrievers/subquestion.py +++ b/workers/executor/executors/retrievers/subquestion.py @@ -51,8 +51,18 @@ def retrieve(self) -> set[str]: return chunks except (ValueError, AttributeError, KeyError, ImportError) as e: - logger.error(f"Error during retrieving chunks {self.doc_id}: {e}") - raise RetrievalError(str(e)) from e + logger.error( + "Error during retrieving chunks %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: - logger.error(f"Unexpected error during retrieving chunks {self.doc_id}: {e}") - raise RetrievalError(f"Unexpected error: {str(e)}") from e + logger.error( + "Unexpected error during retrieving chunks %s: %s: %s", + self.doc_id, type(e).__name__, e, + exc_info=True, + ) + raise RetrievalError( + f"Unexpected error: {type(e).__name__}: {e}" + ) from e From f9b86a9cce7f7971952efe40b7f17d4afd361fe4 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 10 Mar 2026 11:24:10 +0530 Subject: [PATCH 39/64] adding worker for callbacks --- .../prompt_studio_core_v2/tasks.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/tasks.py b/backend/prompt_studio/prompt_studio_core_v2/tasks.py index 325ae28a0f..1ccaad8a0b 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tasks.py @@ -278,7 +278,17 @@ def ide_prompt_complete( if not result_dict.get("success", False): error_msg = result_dict.get("error", "Unknown executor error") logger.error("ide_prompt executor reported failure: %s", error_msg) - _emit_error(log_events_id, executor_task_id, operation, error_msg) + _emit_error( + log_events_id, + executor_task_id, + operation, + error_msg, + extra={ + "prompt_ids": prompt_ids, + "document_id": document_id, + "profile_manager_id": profile_manager_id, + }, + ) return {"status": "failed", "error": error_msg} data = result_dict.get("data", {}) @@ -313,7 +323,17 @@ def ide_prompt_complete( return {"status": "completed", "operation": operation} except Exception as e: logger.exception("ide_prompt_complete callback failed") - _emit_error(log_events_id, executor_task_id, operation, str(e)) + _emit_error( + log_events_id, + executor_task_id, + operation, + str(e), + extra={ + "prompt_ids": prompt_ids, + "document_id": document_id, + "profile_manager_id": profile_manager_id, + }, + ) raise finally: _clear_state_store() @@ -351,7 +371,17 @@ def ide_prompt_error( except Exception: pass - _emit_error(log_events_id, executor_task_id, operation, error_msg) + _emit_error( + log_events_id, + executor_task_id, + operation, + error_msg, + extra={ + "prompt_ids": cb.get("prompt_ids", []), + "document_id": cb.get("document_id", ""), + "profile_manager_id": cb.get("profile_manager_id"), + }, + ) except Exception: logger.exception("ide_prompt_error callback failed") finally: From 5369e5a3485f01947a96bb6aaa7cb41c020f4325 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 10 Mar 2026 13:51:46 +0530 Subject: [PATCH 40/64] adding worker for callbacks --- frontend/src/hooks/usePromptStudioSocket.js | 41 ++++++++++++++++++- frontend/src/store/prompt-run-status-store.js | 7 ++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/frontend/src/hooks/usePromptStudioSocket.js b/frontend/src/hooks/usePromptStudioSocket.js index 142f0c750e..c5ffa3c765 100644 --- a/frontend/src/hooks/usePromptStudioSocket.js +++ b/frontend/src/hooks/usePromptStudioSocket.js @@ -19,7 +19,8 @@ const PROMPT_STUDIO_RESULT_EVENT = "prompt_studio_result"; */ const usePromptStudioSocket = () => { const socket = useContext(SocketContext); - const { removePromptStatus } = usePromptRunStatusStore(); + const { removePromptStatus, clearPromptStatusById } = + usePromptRunStatusStore(); const { updateCustomTool, deleteIndexDoc } = useCustomToolStore(); const { setAlertDetails } = useAlertStore(); const handleException = useExceptionHandler(); @@ -47,11 +48,19 @@ const usePromptStudioSocket = () => { const data = Array.isArray(result) ? result : []; updatePromptOutputState(data, false); clearResultStatuses(data); + setAlertDetails({ + type: "success", + content: "Prompt execution completed successfully.", + }); } else if (operation === "single_pass_extraction") { const data = Array.isArray(result) ? result : []; updatePromptOutputState(data, false); updateCustomTool({ isSinglePassExtractLoading: false }); clearResultStatuses(data); + setAlertDetails({ + type: "success", + content: "Single pass extraction completed successfully.", + }); } else if (operation === "index_document") { const docId = result?.document_id; if (docId) deleteIndexDoc(docId); @@ -82,8 +91,36 @@ const usePromptStudioSocket = () => { const docId = extra?.document_id; if (docId) deleteIndexDoc(docId); } + + // Clear spinner for prompt operations so buttons re-enable + if ( + operation === "fetch_response" || + operation === "single_pass_extraction" + ) { + const promptIds = extra?.prompt_ids || []; + const docId = extra?.document_id; + const profileId = extra?.profile_manager_id; + if (docId && profileId) { + // Specific clearing (ideal path) + const statusKey = generateApiRunStatusId(docId, profileId); + promptIds.forEach((promptId) => { + removePromptStatus(promptId, statusKey); + }); + } else { + // Fallback: clear ALL statuses for these prompts + promptIds.forEach((promptId) => { + clearPromptStatusById(promptId); + }); + } + } }, - [setAlertDetails, updateCustomTool, deleteIndexDoc] + [ + setAlertDetails, + updateCustomTool, + deleteIndexDoc, + removePromptStatus, + clearPromptStatusById, + ] ); const onResult = useCallback( diff --git a/frontend/src/store/prompt-run-status-store.js b/frontend/src/store/prompt-run-status-store.js index dcc852a502..8c55e27ac9 100644 --- a/frontend/src/store/prompt-run-status-store.js +++ b/frontend/src/store/prompt-run-status-store.js @@ -26,6 +26,13 @@ const usePromptRunStatusStore = create((setState, getState) => ({ return { promptRunStatus: newStatus }; }); }, + clearPromptStatusById: (promptId) => { + setState((state) => { + const newStatus = { ...state.promptRunStatus }; + delete newStatus[promptId]; + return { promptRunStatus: newStatus }; + }); + }, removePromptStatus: (promptId, key) => { setState((state) => { const currentStatus = state.promptRunStatus || {}; From b5205ffa3decdd5755c5ea46aec379f8a9993671 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Tue, 10 Mar 2026 15:57:54 +0530 Subject: [PATCH 41/64] adding worker for callbacks --- .../file_processing/structure_tool_task.py | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index e3c5a9b0ea..ee1604658c 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -544,9 +544,45 @@ def _run_agentic_extraction( ) -> dict: """Execute agentic extraction pipeline via dispatcher. - Routes to AgenticPromptStudioExecutor (cloud plugin) which handles - the full multi-agent extraction pipeline using AutoGen. + Unpacks metadata, extracts document text via X2Text, then dispatches + with flat executor_params matching what AgenticPromptStudioExecutor + expects (adapter_instance_id, document_text, etc.). """ + from unstract.sdk1.x2txt import X2Text + + # 1. Unpack agentic project metadata (matches registry_helper export format) + adapter_config = tool_metadata.get("adapter_config", {}) + prompt_text = tool_metadata.get("prompt_text", "") + json_schema = tool_metadata.get("json_schema", {}) + enable_highlight = tool_instance_metadata.get( + "enable_highlight", + tool_metadata.get("enable_highlight", False), + ) + + # 2. Get adapter IDs: workflow UI overrides → exported defaults + # (mirrors tools/structure/src/main.py) + extractor_llm = tool_instance_metadata.get( + "extractor_llm_adapter_id", adapter_config.get("extractor_llm", "") + ) + llmwhisperer = tool_instance_metadata.get( + "llmwhisperer_adapter_id", adapter_config.get("llmwhisperer", "") + ) + platform_service_api_key = shim.platform_api_key + + # 3. Extract text from document using X2Text/LLMWhisperer + x2text = X2Text(tool=shim, adapter_instance_id=llmwhisperer) + extraction_result = x2text.process( + input_file_path=input_file_path, + enable_highlight=enable_highlight, + fs=fs, + ) + document_text = extraction_result.extracted_text + + # Parse json_schema if stored as string + if isinstance(json_schema, str): + json_schema = json.loads(json_schema) + + # 4. Dispatch with flat executor_params matching executor expectations agentic_ctx = ExecutionContext( executor_name="agentic", operation="agentic_extract", @@ -555,9 +591,13 @@ def _run_agentic_extraction( organization_id=organization_id, request_id=file_execution_id, executor_params={ - "tool_metadata": tool_metadata, - "input_file_path": input_file_path, - "tool_instance_metadata": tool_instance_metadata, + "document_id": file_execution_id, + "document_text": document_text, + "prompt_text": prompt_text, + "schema": json_schema, + "adapter_instance_id": extractor_llm, + "PLATFORM_SERVICE_API_KEY": platform_service_api_key, + "include_source_refs": enable_highlight, }, ) agentic_result = dispatcher.dispatch(agentic_ctx, timeout=EXECUTOR_TIMEOUT) From 9659661794c3ef91492c089fc4fb3ab39f8bd304 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 14:05:01 +0530 Subject: [PATCH 42/64] fix: write output files in agentic extraction pipeline Agentic extraction returned early without writing INFILE (JSON) or METADATA.json, causing destination connectors to read the original PDF and fail with "Expected tool output type: TXT, got: application/pdf". Co-Authored-By: Claude Sonnet 4.6 --- .../file_processing/structure_tool_task.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index ee1604658c..6775a298a4 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -262,6 +262,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: organization_id=organization_id, source_file_name=source_file_name, fs=fs, + execution_data_dir=execution_data_dir, ) # ---- Step 3: Profile overrides ---- @@ -541,6 +542,7 @@ def _run_agentic_extraction( organization_id: str, source_file_name: str, fs: Any, + execution_data_dir: str = "", ) -> dict: """Execute agentic extraction pipeline via dispatcher. @@ -583,6 +585,7 @@ def _run_agentic_extraction( json_schema = json.loads(json_schema) # 4. Dispatch with flat executor_params matching executor expectations + start_time = time.monotonic() agentic_ctx = ExecutionContext( executor_name="agentic", operation="agentic_extract", @@ -601,7 +604,31 @@ def _run_agentic_extraction( }, ) agentic_result = dispatcher.dispatch(agentic_ctx, timeout=EXECUTOR_TIMEOUT) - return agentic_result.to_dict() + + if not agentic_result.success: + return agentic_result.to_dict() + + structured_output = agentic_result.data + elapsed = time.monotonic() - start_time + + # Write output files (matches regular pipeline path) + try: + output_path = Path(output_dir_path) / f"{Path(source_file_name).stem}.json" + logger.info("Writing agentic output to %s", output_path) + fs.json_dump(path=output_path, data=structured_output) + + # Overwrite INFILE with JSON output so destination connector reads JSON, not PDF + logger.info("Overwriting INFILE with agentic output: %s", input_file_path) + fs.json_dump(path=input_file_path, data=structured_output) + except Exception as e: + return ExecutionResult.failure( + error=f"Error writing agentic output: {e}" + ).to_dict() + + # Write tool result + tool_metadata to METADATA.json + _write_tool_result(fs, execution_data_dir, structured_output, elapsed) + + return ExecutionResult(success=True, data=structured_output).to_dict() def _write_tool_result( @@ -652,4 +679,4 @@ def _write_tool_result( data=json.dumps(existing, indent=2), ) except Exception as e: - logger.warning("Failed to write tool result to METADATA.json: %s", e) + logger.warning("Failed to write tool result to METADATA.json: %s", e) \ No newline at end of file From 67eef62bf9a75fcb4b48d1c4164ced506e37e1b3 Mon Sep 17 00:00:00 2001 From: harini-venkataraman <115449948+harini-venkataraman@users.noreply.github.com> Date: Wed, 11 Mar 2026 20:04:47 +0530 Subject: [PATCH 43/64] UN-3266 fix: replace hardcoded /tmp paths with secure temp dirs in tests (#1850) * UN-3266 fix: replace hardcoded /tmp paths with secure temp dirs in tests Replace hardcoded /tmp/ paths (SonarCloud S5443 security hotspots) with pytest's tmp_path fixture or module-level tempfile.mkdtemp() constants in all affected test files to avoid world-writable directory vulnerabilities. Co-Authored-By: Claude Sonnet 4.6 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Claude Sonnet 4.6 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- backend/api_v2/deployment_helper.py | 4 +--- unstract/sdk1/tests/test_execution.py | 6 +++++- workers/executor/executors/legacy_executor.py | 2 +- .../executor/executors/retrievers/automerging.py | 15 +++++++++------ workers/executor/executors/retrievers/fusion.py | 12 +++++++----- .../executors/retrievers/keyword_table.py | 12 +++++++----- .../executor/executors/retrievers/recursive.py | 12 +++++++----- workers/executor/executors/retrievers/router.py | 12 +++++++----- .../executor/executors/retrievers/subquestion.py | 12 +++++++----- workers/file_processing/structure_tool_task.py | 2 +- workers/tests/test_legacy_executor_scaffold.py | 7 ++++--- workers/tests/test_phase1_log_streaming.py | 8 ++++---- workers/tests/test_sanity_phase6g.py | 4 ++-- workers/tests/test_sanity_phase6h.py | 6 +++--- workers/tests/test_sanity_phase6j.py | 6 +++--- workers/tests/test_usage.py | 4 ++-- 16 files changed, 70 insertions(+), 54 deletions(-) diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 8b1c540347..8971f21c62 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -375,9 +375,7 @@ def _enrich_result_with_workflow_metadata( if fe_lookup: first_fe = next(iter(fe_lookup.values())) workflow_execution = first_fe.workflow_execution - tag_names = list( - workflow_execution.tags.values_list("name", flat=True) - ) + tag_names = list(workflow_execution.tags.values_list("name", flat=True)) # 4. Enrich each item for item in result.result: diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 3839a01073..458c7a8f10 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -2,6 +2,8 @@ import json import logging +import os +import tempfile from typing import Any, Self from unittest.mock import MagicMock @@ -19,6 +21,8 @@ from unstract.sdk1.execution.registry import ExecutorRegistry from unstract.sdk1.execution.result import ExecutionResult +_TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf") + class TestExecutionContext: """Tests for ExecutionContext serialization and validation.""" @@ -31,7 +35,7 @@ def _make_context(self, **overrides: Any) -> ExecutionContext: "run_id": "run-001", "execution_source": "tool", "organization_id": "org-123", - "executor_params": {"file_path": "/tmp/test.pdf"}, + "executor_params": {"file_path": _TEST_FILE_PATH}, "request_id": "req-abc", } defaults.update(overrides) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index ec44444556..0d8978b431 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -25,10 +25,10 @@ from executor.executors.file_utils import FileUtils from unstract.sdk1.adapters.exceptions import AdapterError -from unstract.sdk1.constants import LogLevel from unstract.sdk1.adapters.x2text.constants import X2TextConstants from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 +from unstract.sdk1.constants import LogLevel from unstract.sdk1.execution.context import ExecutionContext, Operation from unstract.sdk1.execution.executor import BaseExecutor from unstract.sdk1.execution.registry import ExecutorRegistry diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py index c3472964ed..7df911f44f 100644 --- a/workers/executor/executors/retrievers/automerging.py +++ b/workers/executor/executors/retrievers/automerging.py @@ -57,7 +57,8 @@ def retrieve(self) -> set[str]: except Exception as e: logger.error( "AutoMergingRetriever failed: %s: %s", - type(e).__name__, e, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError( @@ -83,16 +84,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during auto-merging retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during auto-merging retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py index a9b27e2eb0..82c6c32967 100644 --- a/workers/executor/executors/retrievers/fusion.py +++ b/workers/executor/executors/retrievers/fusion.py @@ -86,16 +86,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during fusion retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during fusion retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py index 2f1d345c02..2a61dfd227 100644 --- a/workers/executor/executors/retrievers/keyword_table.py +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -71,16 +71,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during keyword retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during keyword retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py index b49bf298c2..0ad09a6b78 100644 --- a/workers/executor/executors/retrievers/recursive.py +++ b/workers/executor/executors/retrievers/recursive.py @@ -69,16 +69,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during recursive retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during recursive retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py index 8dae80271c..0bbb424b3e 100644 --- a/workers/executor/executors/retrievers/router.py +++ b/workers/executor/executors/retrievers/router.py @@ -149,16 +149,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during router retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during router retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py index de0d5047d3..635fa133ac 100644 --- a/workers/executor/executors/retrievers/subquestion.py +++ b/workers/executor/executors/retrievers/subquestion.py @@ -53,16 +53,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during retrieving chunks %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during retrieving chunks %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 6775a298a4..82c1962b43 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -679,4 +679,4 @@ def _write_tool_result( data=json.dumps(existing, indent=2), ) except Exception as e: - logger.warning("Failed to write tool result to METADATA.json: %s", e) \ No newline at end of file + logger.warning("Failed to write tool result to METADATA.json: %s", e) diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py index f2d9935f9b..48789c218d 100644 --- a/workers/tests/test_legacy_executor_scaffold.py +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -203,11 +203,12 @@ def test_chunking_config_zero_raises(self): with pytest.raises(ValueError, match="zero chunks"): ChunkingConfig(chunk_size=0, chunk_overlap=0) - def test_file_info(self): + def test_file_info(self, tmp_path): from executor.executors.dto import FileInfo - fi = FileInfo(file_path="/tmp/test.pdf", file_hash="abc123") - assert fi.file_path == "/tmp/test.pdf" + test_path = str(tmp_path / "test.pdf") + fi = FileInfo(file_path=test_path, file_hash="abc123") + assert fi.file_path == test_path def test_instance_identifiers(self): from executor.executors.dto import InstanceIdentifiers diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index 903449d75a..95de9b21bc 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -313,7 +313,7 @@ class TestLegacyExecutorLogPassthrough: @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_passes_log_info_to_shim( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -337,7 +337,7 @@ def test_extract_passes_log_info_to_shim( log_events_id="session-abc", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) @@ -357,7 +357,7 @@ def test_extract_passes_log_info_to_shim( @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_no_log_info_when_absent( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -380,7 +380,7 @@ def test_extract_no_log_info_when_absent( execution_source="tool", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py index 73bb738911..8b175f8eec 100644 --- a/workers/tests/test_sanity_phase6g.py +++ b/workers/tests/test_sanity_phase6g.py @@ -154,7 +154,7 @@ def test_dispatch_sends_to_sps_queue(self): call_kwargs = mock_app.send_task.call_args assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" - def test_dispatch_sps_index_to_correct_queue(self): + def test_dispatch_sps_index_to_correct_queue(self, tmp_path): mock_app = MagicMock() mock_result = MagicMock() mock_result.get.return_value = ExecutionResult( @@ -168,7 +168,7 @@ def test_dispatch_sps_index_to_correct_queue(self): operation="sps_index", run_id="run-1", execution_source="tool", - executor_params={"output": {}, "file_path": "/tmp/test.pdf"}, + executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")}, ) result = dispatcher.dispatch(ctx) diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py index 1c43b3d78b..c4249fb05a 100644 --- a/workers/tests/test_sanity_phase6h.py +++ b/workers/tests/test_sanity_phase6h.py @@ -198,7 +198,7 @@ def test_legacy_returns_failure_for_agentic_summarize(self): # --------------------------------------------------------------------------- class TestStructureToolAgenticRouting: - def test_structure_tool_dispatches_agentic_extract(self): + def test_structure_tool_dispatches_agentic_extract(self, tmp_path): """Verify _run_agentic_extraction sends executor_name='agentic'.""" from file_processing.structure_tool_task import _run_agentic_extraction @@ -210,8 +210,8 @@ def test_structure_tool_dispatches_agentic_extract(self): result = _run_agentic_extraction( tool_metadata={"name": "test"}, - input_file_path="/tmp/test.pdf", - output_dir_path="/tmp/output", + input_file_path=str(tmp_path / "test.pdf"), + output_dir_path=str(tmp_path / "output"), tool_instance_metadata={}, dispatcher=mock_dispatcher, shim=MagicMock(), diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py index 2336b65d05..c52dcdf490 100644 --- a/workers/tests/test_sanity_phase6j.py +++ b/workers/tests/test_sanity_phase6j.py @@ -382,7 +382,7 @@ def test_highlight_plugin_not_installed_no_error(self, _mock_eps): assert ExecutorPluginLoader.get("highlight-data") is None # No error — graceful degradation - def test_mock_highlight_plugin_shared_across_executors(self): + def test_mock_highlight_plugin_shared_across_executors(self, tmp_path): """Multiple executors can use the same highlight plugin instance.""" from executor.executors.plugins.loader import ExecutorPluginLoader @@ -412,8 +412,8 @@ def get_confidence_data(self): assert cls is FakeHighlight # Both legacy and agentic contexts can create instances - legacy_hl = cls(file_path="/tmp/doc.txt", execution_source="ide") - agentic_hl = cls(file_path="/tmp/other.txt", execution_source="tool") + legacy_hl = cls(file_path=str(tmp_path / "doc.txt"), execution_source="ide") + agentic_hl = cls(file_path=str(tmp_path / "other.txt"), execution_source="tool") assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]} assert agentic_hl.get_confidence_data() == {"confidence": 0.95} diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py index 2fecc76713..fc08ac825b 100644 --- a/workers/tests/test_usage.py +++ b/workers/tests/test_usage.py @@ -223,7 +223,7 @@ class TestMetricsInResult: ) @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_answer_prompt_returns_metrics( - self, mock_shim_cls, mock_get_deps, _mock_idx + self, mock_shim_cls, mock_get_deps, _mock_idx, tmp_path ): """answer_prompt result includes metrics dict.""" from unstract.sdk1.execution.context import ExecutionContext @@ -298,7 +298,7 @@ def test_answer_prompt_returns_metrics( ], "tool_id": "tool-1", "file_hash": "hash123", - "file_path": "/tmp/test.txt", + "file_path": str(tmp_path / "test.txt"), "file_name": "test.txt", "PLATFORM_SERVICE_API_KEY": "test-key", }, From a563a35510d51c1107fbb63fbe43dc54f75ad6b6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 14:38:30 +0000 Subject: [PATCH 44/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/prompt_studio/prompt_studio_core_v2/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index bbacb3e46b..d18120c4dc 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -59,7 +59,6 @@ PromptStudioDocumentHelper, ) from prompt_studio.prompt_studio_index_manager_v2.models import IndexManager -from prompt_studio.prompt_studio_output_manager_v2.models import PromptStudioOutputManager from prompt_studio.prompt_studio_registry_v2.models import PromptStudioRegistry from prompt_studio.prompt_studio_registry_v2.prompt_studio_registry_helper import ( PromptStudioRegistryHelper, From 9b422daf4f8ec5341f23a44595049b929f951348 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 20:29:43 +0530 Subject: [PATCH 45/64] Update docs --- docker/docker-compose.yaml | 3 +- run-platform.sh | 1 + workers/executor/README.md | 59 ++++++++++++++++++++++++++++++++++++++ workers/sample.env | 8 ++++++ 4 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 workers/executor/README.md diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 1318d515d6..395fc25018 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -540,8 +540,7 @@ services: volumes: - ./workflow_data:/data - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config - profiles: - - workers-v2 + - prompt_studio_data:/app/prompt-studio-data volumes: prompt_studio_data: diff --git a/run-platform.sh b/run-platform.sh index bcacfa82e8..a2b793a131 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -294,6 +294,7 @@ run_services() { python3 "$script_dir/docker/scripts/release-notes/print_release_notes.py" "$current_version" "$target_branch" fi echo -e "\nOnce the services are up, visit ""$blue_text""http://frontend.unstract.localhost""$default_text"" in your browser." + echo -e "The async executor worker is included — Prompt Studio IDE runs are non-blocking." echo -e "\nSee logs with:" echo -e " ""$blue_text""$docker_compose_cmd -f docker/docker-compose.yaml logs -f""$default_text" echo -e "Configure services by updating corresponding ""$yellow_text""/.env""$default_text"" files." diff --git a/workers/executor/README.md b/workers/executor/README.md new file mode 100644 index 0000000000..ab185bd9c4 --- /dev/null +++ b/workers/executor/README.md @@ -0,0 +1,59 @@ +# Executor Worker + +Celery worker that handles LLM extraction, indexing, and prompt execution for the Unstract platform. + +## How It Works + +```text +Browser → Django Backend → RabbitMQ → Executor Worker → Callback → WebSocket → Browser +``` + +1. User clicks "Run" in Prompt Studio IDE → Backend dispatches task to `celery_executor_legacy` queue +2. Executor worker picks up task, runs LLM extraction +3. Result triggers callback on `prompt_studio_callback` queue +4. Callback worker saves results to DB and pushes via Socket.IO +5. Browser receives result in real-time + +## Services Involved + +| Service | Purpose | +|---------|---------| +| `worker-executor-v2` | Runs LLM extraction, indexing, prompts | +| `worker-prompt-studio-callback` | Post-execution ORM writes + Socket.IO events | +| `backend` | Django REST API + Socket.IO | +| `platform-service` | Adapter credential management | +| `prompt-service` | Prompt template service | + +## Configuration + +The executor worker starts automatically with `./run-platform.sh` — no extra configuration needed. + +Key environment variables (in `docker/sample.env` and `workers/sample.env`): + +| Variable | Default | Description | +|----------|---------|-------------| +| `WORKER_EXECUTOR_CONCURRENCY` | `2` | Number of concurrent executor processes | +| `WORKER_EXECUTOR_POOL` | `prefork` | Celery pool type | +| `EXECUTOR_TASK_TIME_LIMIT` | `3600` | Hard timeout per task (seconds) | +| `EXECUTOR_TASK_SOFT_TIME_LIMIT` | `3300` | Soft timeout per task (seconds) | +| `EXECUTOR_RESULT_TIMEOUT` | `3600` | How long callers wait for results | +| `EXECUTOR_AUTOSCALE` | `2,1` | Max,min worker autoscale | + +## Queue + +Listens on: `celery_executor_legacy` + +Configurable via `CELERY_QUEUES_EXECUTOR` environment variable. + +## Docker + +Defined in `docker/docker-compose.yaml` as `worker-executor-v2`. Uses the unified worker image (`unstract/worker-unified`) with `executor` command. + +## Local Development + +```bash +cd workers +cp sample.env .env +# Edit .env: change Docker hostnames to localhost +./run-worker.sh executor +``` diff --git a/workers/sample.env b/workers/sample.env index 516fc242e1..766faee754 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -183,6 +183,14 @@ SCHEDULER_WORKER_NAME=scheduler-worker SCHEDULER_HEALTH_PORT=8087 SCHEDULER_AUTOSCALE=2,1 +# Executor Worker +EXECUTOR_WORKER_NAME=executor-worker +EXECUTOR_HEALTH_PORT=8088 +EXECUTOR_AUTOSCALE=2,1 +EXECUTOR_RESULT_TIMEOUT=3600 +EXECUTOR_TASK_TIME_LIMIT=3600 +EXECUTOR_TASK_SOFT_TIME_LIMIT=3300 + # Notification Worker NOTIFICATION_WORKER_NAME=notification-worker NOTIFICATION_HEALTH_PORT=8085 From 817fc1c9870846398c19c302bea47ea272ceb0f0 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 21:29:39 +0530 Subject: [PATCH 46/64] UN-3266 fix: remove dead code with undefined names in fetch_response Remove unreachable code block after the async callback return in fetch_response that still referenced output_count_before and response from the old synchronous implementation, causing ruff F821 errors. Co-Authored-By: Claude Sonnet 4.6 --- .../prompt_studio_core_v2/views.py | 10 - uv.lock | 1981 +++++++++-------- 2 files changed, 998 insertions(+), 993 deletions(-) diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index d18120c4dc..84e397bd37 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -503,16 +503,6 @@ def fetch_response(self, request: HttpRequest, pk: Any = None) -> Response: status=status.HTTP_202_ACCEPTED, ) - # Notify HubSpot about first prompt run - notify_hubspot_event( - user=request.user, - event_name="PROMPT_RUN", - is_first_for_org=output_count_before == 0, - action_label="prompt run", - ) - - return Response(response, status=status.HTTP_200_OK) - @action(detail=True, methods=["post"]) def single_pass_extraction(self, request: HttpRequest, pk: uuid) -> Response: """API Entry point method for single pass extraction. diff --git a/uv.lock b/uv.lock index 270f48aabf..979fac7f7d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 1 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -14,9 +14,9 @@ dependencies = [ { name = "azure-storage-blob" }, { name = "fsspec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/1e/6d5146676044247af566fa5843b335b1a647e6446070cec9c8b61c31b369/adlfs-2024.7.0.tar.gz", hash = "sha256:106995b91f0eb5e775bcd5957d180d9a14faef3271a063b1f65c66fd5ab05ddf", size = 48588 } +sdist = { url = "https://files.pythonhosted.org/packages/b4/1e/6d5146676044247af566fa5843b335b1a647e6446070cec9c8b61c31b369/adlfs-2024.7.0.tar.gz", hash = "sha256:106995b91f0eb5e775bcd5957d180d9a14faef3271a063b1f65c66fd5ab05ddf", size = 48588, upload-time = "2024-07-22T12:10:33.849Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/51/a71c457bd0bc8af3e522b6999ff300852c7c446e384fd9904b0794f875df/adlfs-2024.7.0-py3-none-any.whl", hash = "sha256:2005c8e124fda3948f2a6abb2dbebb2c936d2d821acaca6afd61932edfa9bc07", size = 41349 }, + { url = "https://files.pythonhosted.org/packages/6f/51/a71c457bd0bc8af3e522b6999ff300852c7c446e384fd9904b0794f875df/adlfs-2024.7.0-py3-none-any.whl", hash = "sha256:2005c8e124fda3948f2a6abb2dbebb2c936d2d821acaca6afd61932edfa9bc07", size = 41349, upload-time = "2024-07-22T12:10:32.226Z" }, ] [[package]] @@ -29,9 +29,9 @@ dependencies = [ { name = "botocore" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cd/d2/d7e46bcc4c0b5b8e751092824d6ca9af5928adae0f864336e43c7f7a436a/aiobotocore-2.13.1.tar.gz", hash = "sha256:134f9606c2f91abde38cbc61c3241113e26ff244633e0c31abb7e09da3581c9b", size = 104475 } +sdist = { url = "https://files.pythonhosted.org/packages/cd/d2/d7e46bcc4c0b5b8e751092824d6ca9af5928adae0f864336e43c7f7a436a/aiobotocore-2.13.1.tar.gz", hash = "sha256:134f9606c2f91abde38cbc61c3241113e26ff244633e0c31abb7e09da3581c9b", size = 104475, upload-time = "2024-06-24T18:30:36.509Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/07/42f884c1600169e4267575cdd261c75dea31782d8fd877bbea358d559416/aiobotocore-2.13.1-py3-none-any.whl", hash = "sha256:1bef121b99841ee3cc788e4ed97c332ba32353b1f00e886d1beb3aae95520858", size = 76864 }, + { url = "https://files.pythonhosted.org/packages/30/07/42f884c1600169e4267575cdd261c75dea31782d8fd877bbea358d559416/aiobotocore-2.13.1-py3-none-any.whl", hash = "sha256:1bef121b99841ee3cc788e4ed97c332ba32353b1f00e886d1beb3aae95520858", size = 76864, upload-time = "2024-06-24T18:30:33.379Z" }, ] [package.optional-dependencies] @@ -43,9 +43,9 @@ boto3 = [ name = "aiohappyeyeballs" version = "2.6.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760 } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265 }, + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, ] [[package]] @@ -61,34 +61,34 @@ dependencies = [ { name = "propcache" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9b/e7/d92a237d8802ca88483906c388f7c201bbe96cd80a165ffd0ac2f6a8d59f/aiohttp-3.12.15.tar.gz", hash = "sha256:4fc61385e9c98d72fcdf47e6dd81833f47b2f77c114c29cd64a361be57a763a2", size = 7823716 } +sdist = { url = "https://files.pythonhosted.org/packages/9b/e7/d92a237d8802ca88483906c388f7c201bbe96cd80a165ffd0ac2f6a8d59f/aiohttp-3.12.15.tar.gz", hash = "sha256:4fc61385e9c98d72fcdf47e6dd81833f47b2f77c114c29cd64a361be57a763a2", size = 7823716, upload-time = "2025-07-29T05:52:32.215Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/97/77cb2450d9b35f517d6cf506256bf4f5bda3f93a66b4ad64ba7fc917899c/aiohttp-3.12.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:802d3868f5776e28f7bf69d349c26fc0efadb81676d0afa88ed00d98a26340b7", size = 702333 }, - { url = "https://files.pythonhosted.org/packages/83/6d/0544e6b08b748682c30b9f65640d006e51f90763b41d7c546693bc22900d/aiohttp-3.12.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2800614cd560287be05e33a679638e586a2d7401f4ddf99e304d98878c29444", size = 476948 }, - { url = "https://files.pythonhosted.org/packages/3a/1d/c8c40e611e5094330284b1aea8a4b02ca0858f8458614fa35754cab42b9c/aiohttp-3.12.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8466151554b593909d30a0a125d638b4e5f3836e5aecde85b66b80ded1cb5b0d", size = 469787 }, - { url = "https://files.pythonhosted.org/packages/38/7d/b76438e70319796bfff717f325d97ce2e9310f752a267bfdf5192ac6082b/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e5a495cb1be69dae4b08f35a6c4579c539e9b5706f606632102c0f855bcba7c", size = 1716590 }, - { url = "https://files.pythonhosted.org/packages/79/b1/60370d70cdf8b269ee1444b390cbd72ce514f0d1cd1a715821c784d272c9/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6404dfc8cdde35c69aaa489bb3542fb86ef215fc70277c892be8af540e5e21c0", size = 1699241 }, - { url = "https://files.pythonhosted.org/packages/a3/2b/4968a7b8792437ebc12186db31523f541943e99bda8f30335c482bea6879/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ead1c00f8521a5c9070fcb88f02967b1d8a0544e6d85c253f6968b785e1a2ab", size = 1754335 }, - { url = "https://files.pythonhosted.org/packages/fb/c1/49524ed553f9a0bec1a11fac09e790f49ff669bcd14164f9fab608831c4d/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6990ef617f14450bc6b34941dba4f12d5613cbf4e33805932f853fbd1cf18bfb", size = 1800491 }, - { url = "https://files.pythonhosted.org/packages/de/5e/3bf5acea47a96a28c121b167f5ef659cf71208b19e52a88cdfa5c37f1fcc/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd736ed420f4db2b8148b52b46b88ed038d0354255f9a73196b7bbce3ea97545", size = 1719929 }, - { url = "https://files.pythonhosted.org/packages/39/94/8ae30b806835bcd1cba799ba35347dee6961a11bd507db634516210e91d8/aiohttp-3.12.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c5092ce14361a73086b90c6efb3948ffa5be2f5b6fbcf52e8d8c8b8848bb97c", size = 1635733 }, - { url = "https://files.pythonhosted.org/packages/7a/46/06cdef71dd03acd9da7f51ab3a9107318aee12ad38d273f654e4f981583a/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aaa2234bb60c4dbf82893e934d8ee8dea30446f0647e024074237a56a08c01bd", size = 1696790 }, - { url = "https://files.pythonhosted.org/packages/02/90/6b4cfaaf92ed98d0ec4d173e78b99b4b1a7551250be8937d9d67ecb356b4/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6d86a2fbdd14192e2f234a92d3b494dd4457e683ba07e5905a0b3ee25389ac9f", size = 1718245 }, - { url = "https://files.pythonhosted.org/packages/2e/e6/2593751670fa06f080a846f37f112cbe6f873ba510d070136a6ed46117c6/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a041e7e2612041a6ddf1c6a33b883be6a421247c7afd47e885969ee4cc58bd8d", size = 1658899 }, - { url = "https://files.pythonhosted.org/packages/8f/28/c15bacbdb8b8eb5bf39b10680d129ea7410b859e379b03190f02fa104ffd/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5015082477abeafad7203757ae44299a610e89ee82a1503e3d4184e6bafdd519", size = 1738459 }, - { url = "https://files.pythonhosted.org/packages/00/de/c269cbc4faa01fb10f143b1670633a8ddd5b2e1ffd0548f7aa49cb5c70e2/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56822ff5ddfd1b745534e658faba944012346184fbfe732e0d6134b744516eea", size = 1766434 }, - { url = "https://files.pythonhosted.org/packages/52/b0/4ff3abd81aa7d929b27d2e1403722a65fc87b763e3a97b3a2a494bfc63bc/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2acbbfff69019d9014508c4ba0401822e8bae5a5fdc3b6814285b71231b60f3", size = 1726045 }, - { url = "https://files.pythonhosted.org/packages/71/16/949225a6a2dd6efcbd855fbd90cf476052e648fb011aa538e3b15b89a57a/aiohttp-3.12.15-cp312-cp312-win32.whl", hash = "sha256:d849b0901b50f2185874b9a232f38e26b9b3d4810095a7572eacea939132d4e1", size = 423591 }, - { url = "https://files.pythonhosted.org/packages/2b/d8/fa65d2a349fe938b76d309db1a56a75c4fb8cc7b17a398b698488a939903/aiohttp-3.12.15-cp312-cp312-win_amd64.whl", hash = "sha256:b390ef5f62bb508a9d67cb3bba9b8356e23b3996da7062f1a57ce1a79d2b3d34", size = 450266 }, + { url = "https://files.pythonhosted.org/packages/63/97/77cb2450d9b35f517d6cf506256bf4f5bda3f93a66b4ad64ba7fc917899c/aiohttp-3.12.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:802d3868f5776e28f7bf69d349c26fc0efadb81676d0afa88ed00d98a26340b7", size = 702333, upload-time = "2025-07-29T05:50:46.507Z" }, + { url = "https://files.pythonhosted.org/packages/83/6d/0544e6b08b748682c30b9f65640d006e51f90763b41d7c546693bc22900d/aiohttp-3.12.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2800614cd560287be05e33a679638e586a2d7401f4ddf99e304d98878c29444", size = 476948, upload-time = "2025-07-29T05:50:48.067Z" }, + { url = "https://files.pythonhosted.org/packages/3a/1d/c8c40e611e5094330284b1aea8a4b02ca0858f8458614fa35754cab42b9c/aiohttp-3.12.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8466151554b593909d30a0a125d638b4e5f3836e5aecde85b66b80ded1cb5b0d", size = 469787, upload-time = "2025-07-29T05:50:49.669Z" }, + { url = "https://files.pythonhosted.org/packages/38/7d/b76438e70319796bfff717f325d97ce2e9310f752a267bfdf5192ac6082b/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e5a495cb1be69dae4b08f35a6c4579c539e9b5706f606632102c0f855bcba7c", size = 1716590, upload-time = "2025-07-29T05:50:51.368Z" }, + { url = "https://files.pythonhosted.org/packages/79/b1/60370d70cdf8b269ee1444b390cbd72ce514f0d1cd1a715821c784d272c9/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6404dfc8cdde35c69aaa489bb3542fb86ef215fc70277c892be8af540e5e21c0", size = 1699241, upload-time = "2025-07-29T05:50:53.628Z" }, + { url = "https://files.pythonhosted.org/packages/a3/2b/4968a7b8792437ebc12186db31523f541943e99bda8f30335c482bea6879/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ead1c00f8521a5c9070fcb88f02967b1d8a0544e6d85c253f6968b785e1a2ab", size = 1754335, upload-time = "2025-07-29T05:50:55.394Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c1/49524ed553f9a0bec1a11fac09e790f49ff669bcd14164f9fab608831c4d/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6990ef617f14450bc6b34941dba4f12d5613cbf4e33805932f853fbd1cf18bfb", size = 1800491, upload-time = "2025-07-29T05:50:57.202Z" }, + { url = "https://files.pythonhosted.org/packages/de/5e/3bf5acea47a96a28c121b167f5ef659cf71208b19e52a88cdfa5c37f1fcc/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd736ed420f4db2b8148b52b46b88ed038d0354255f9a73196b7bbce3ea97545", size = 1719929, upload-time = "2025-07-29T05:50:59.192Z" }, + { url = "https://files.pythonhosted.org/packages/39/94/8ae30b806835bcd1cba799ba35347dee6961a11bd507db634516210e91d8/aiohttp-3.12.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c5092ce14361a73086b90c6efb3948ffa5be2f5b6fbcf52e8d8c8b8848bb97c", size = 1635733, upload-time = "2025-07-29T05:51:01.394Z" }, + { url = "https://files.pythonhosted.org/packages/7a/46/06cdef71dd03acd9da7f51ab3a9107318aee12ad38d273f654e4f981583a/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aaa2234bb60c4dbf82893e934d8ee8dea30446f0647e024074237a56a08c01bd", size = 1696790, upload-time = "2025-07-29T05:51:03.657Z" }, + { url = "https://files.pythonhosted.org/packages/02/90/6b4cfaaf92ed98d0ec4d173e78b99b4b1a7551250be8937d9d67ecb356b4/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6d86a2fbdd14192e2f234a92d3b494dd4457e683ba07e5905a0b3ee25389ac9f", size = 1718245, upload-time = "2025-07-29T05:51:05.911Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e6/2593751670fa06f080a846f37f112cbe6f873ba510d070136a6ed46117c6/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a041e7e2612041a6ddf1c6a33b883be6a421247c7afd47e885969ee4cc58bd8d", size = 1658899, upload-time = "2025-07-29T05:51:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/8f/28/c15bacbdb8b8eb5bf39b10680d129ea7410b859e379b03190f02fa104ffd/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5015082477abeafad7203757ae44299a610e89ee82a1503e3d4184e6bafdd519", size = 1738459, upload-time = "2025-07-29T05:51:09.56Z" }, + { url = "https://files.pythonhosted.org/packages/00/de/c269cbc4faa01fb10f143b1670633a8ddd5b2e1ffd0548f7aa49cb5c70e2/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56822ff5ddfd1b745534e658faba944012346184fbfe732e0d6134b744516eea", size = 1766434, upload-time = "2025-07-29T05:51:11.423Z" }, + { url = "https://files.pythonhosted.org/packages/52/b0/4ff3abd81aa7d929b27d2e1403722a65fc87b763e3a97b3a2a494bfc63bc/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2acbbfff69019d9014508c4ba0401822e8bae5a5fdc3b6814285b71231b60f3", size = 1726045, upload-time = "2025-07-29T05:51:13.689Z" }, + { url = "https://files.pythonhosted.org/packages/71/16/949225a6a2dd6efcbd855fbd90cf476052e648fb011aa538e3b15b89a57a/aiohttp-3.12.15-cp312-cp312-win32.whl", hash = "sha256:d849b0901b50f2185874b9a232f38e26b9b3d4810095a7572eacea939132d4e1", size = 423591, upload-time = "2025-07-29T05:51:15.452Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d8/fa65d2a349fe938b76d309db1a56a75c4fb8cc7b17a398b698488a939903/aiohttp-3.12.15-cp312-cp312-win_amd64.whl", hash = "sha256:b390ef5f62bb508a9d67cb3bba9b8356e23b3996da7062f1a57ce1a79d2b3d34", size = 450266, upload-time = "2025-07-29T05:51:17.239Z" }, ] [[package]] name = "aioitertools" version = "0.12.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/06/de/38491a84ab323b47c7f86e94d2830e748780525f7a10c8600b67ead7e9ea/aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b", size = 19369 } +sdist = { url = "https://files.pythonhosted.org/packages/06/de/38491a84ab323b47c7f86e94d2830e748780525f7a10c8600b67ead7e9ea/aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b", size = 19369, upload-time = "2024-09-02T03:33:40.349Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/13/58b70a580de00893223d61de8fea167877a3aed97d4a5e1405c9159ef925/aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796", size = 24345 }, + { url = "https://files.pythonhosted.org/packages/85/13/58b70a580de00893223d61de8fea167877a3aed97d4a5e1405c9159ef925/aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796", size = 24345, upload-time = "2024-09-02T03:34:59.454Z" }, ] [[package]] @@ -99,9 +99,9 @@ dependencies = [ { name = "frozenlist" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007 } +sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490 }, + { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] [[package]] @@ -111,9 +111,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454 } +sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454, upload-time = "2025-02-03T07:30:16.235Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792 }, + { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" }, ] [[package]] @@ -123,18 +123,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/fc/ec94a357dfc6683d8c86f8b4cfa5416a4c36b28052ec8260c77aca96a443/amqp-5.3.1.tar.gz", hash = "sha256:cddc00c725449522023bad949f70fff7b48f0b1ade74d170a6f10ab044739432", size = 129013 } +sdist = { url = "https://files.pythonhosted.org/packages/79/fc/ec94a357dfc6683d8c86f8b4cfa5416a4c36b28052ec8260c77aca96a443/amqp-5.3.1.tar.gz", hash = "sha256:cddc00c725449522023bad949f70fff7b48f0b1ade74d170a6f10ab044739432", size = 129013, upload-time = "2024-11-12T19:55:44.051Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/99/fc813cd978842c26c82534010ea849eee9ab3a13ea2b74e95cb9c99e747b/amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2", size = 50944 }, + { url = "https://files.pythonhosted.org/packages/26/99/fc813cd978842c26c82534010ea849eee9ab3a13ea2b74e95cb9c99e747b/amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2", size = 50944, upload-time = "2024-11-12T19:55:41.782Z" }, ] [[package]] name = "annotated-types" version = "0.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] [[package]] @@ -146,61 +146,61 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094 } +sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097 }, + { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" }, ] [[package]] name = "appdirs" version = "1.4.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470 } +sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566 }, + { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" }, ] [[package]] name = "asgiref" version = "3.9.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/bf/0f3ecda32f1cb3bf1dca480aca08a7a8a3bdc4bed2343a103f30731565c9/asgiref-3.9.2.tar.gz", hash = "sha256:a0249afacb66688ef258ffe503528360443e2b9a8d8c4581b6ebefa58c841ef1", size = 36894 } +sdist = { url = "https://files.pythonhosted.org/packages/7f/bf/0f3ecda32f1cb3bf1dca480aca08a7a8a3bdc4bed2343a103f30731565c9/asgiref-3.9.2.tar.gz", hash = "sha256:a0249afacb66688ef258ffe503528360443e2b9a8d8c4581b6ebefa58c841ef1", size = 36894, upload-time = "2025-09-23T15:00:55.136Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/d1/69d02ce34caddb0a7ae088b84c356a625a93cd4ff57b2f97644c03fad905/asgiref-3.9.2-py3-none-any.whl", hash = "sha256:0b61526596219d70396548fc003635056856dba5d0d086f86476f10b33c75960", size = 23788 }, + { url = "https://files.pythonhosted.org/packages/c7/d1/69d02ce34caddb0a7ae088b84c356a625a93cd4ff57b2f97644c03fad905/asgiref-3.9.2-py3-none-any.whl", hash = "sha256:0b61526596219d70396548fc003635056856dba5d0d086f86476f10b33c75960", size = 23788, upload-time = "2025-09-23T15:00:53.627Z" }, ] [[package]] name = "asn1crypto" version = "1.5.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/cf/d547feed25b5244fcb9392e288ff9fdc3280b10260362fc45d37a798a6ee/asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c", size = 121080 } +sdist = { url = "https://files.pythonhosted.org/packages/de/cf/d547feed25b5244fcb9392e288ff9fdc3280b10260362fc45d37a798a6ee/asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c", size = 121080, upload-time = "2022-03-15T14:46:52.889Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67", size = 105045 }, + { url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67", size = 105045, upload-time = "2022-03-15T14:46:51.055Z" }, ] [[package]] name = "asyncpg" version = "0.30.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746 } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162 }, - { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025 }, - { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243 }, - { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059 }, - { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596 }, - { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632 }, - { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186 }, - { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064 }, + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" }, ] [[package]] name = "attrs" version = "25.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] [[package]] @@ -210,9 +210,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ce/bb/73a1f1c64ee527877f64122422dafe5b87a846ccf4ac933fe21bcbb8fee8/authlib-1.6.4.tar.gz", hash = "sha256:104b0442a43061dc8bc23b133d1d06a2b0a9c2e3e33f34c4338929e816287649", size = 164046 } +sdist = { url = "https://files.pythonhosted.org/packages/ce/bb/73a1f1c64ee527877f64122422dafe5b87a846ccf4ac933fe21bcbb8fee8/authlib-1.6.4.tar.gz", hash = "sha256:104b0442a43061dc8bc23b133d1d06a2b0a9c2e3e33f34c4338929e816287649", size = 164046, upload-time = "2025-09-17T09:59:23.897Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/aa/91355b5f539caf1b94f0e66ff1e4ee39373b757fce08204981f7829ede51/authlib-1.6.4-py2.py3-none-any.whl", hash = "sha256:39313d2a2caac3ecf6d8f95fbebdfd30ae6ea6ae6a6db794d976405fdd9aa796", size = 243076 }, + { url = "https://files.pythonhosted.org/packages/0e/aa/91355b5f539caf1b94f0e66ff1e4ee39373b757fce08204981f7829ede51/authlib-1.6.4-py2.py3-none-any.whl", hash = "sha256:39313d2a2caac3ecf6d8f95fbebdfd30ae6ea6ae6a6db794d976405fdd9aa796", size = 243076, upload-time = "2025-09-17T09:59:22.259Z" }, ] [[package]] @@ -224,9 +224,9 @@ dependencies = [ { name = "six" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/15/6b/2653adc0f33adba8f11b1903701e6b1c10d34ce5d8e25dfa13a422f832b0/azure_core-1.35.1.tar.gz", hash = "sha256:435d05d6df0fff2f73fb3c15493bb4721ede14203f1ff1382aa6b6b2bdd7e562", size = 345290 } +sdist = { url = "https://files.pythonhosted.org/packages/15/6b/2653adc0f33adba8f11b1903701e6b1c10d34ce5d8e25dfa13a422f832b0/azure_core-1.35.1.tar.gz", hash = "sha256:435d05d6df0fff2f73fb3c15493bb4721ede14203f1ff1382aa6b6b2bdd7e562", size = 345290, upload-time = "2025-09-11T22:58:04.481Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/52/805980aa1ba18282077c484dba634ef0ede1e84eec8be9c92b2e162d0ed6/azure_core-1.35.1-py3-none-any.whl", hash = "sha256:12da0c9e08e48e198f9158b56ddbe33b421477e1dc98c2e1c8f9e254d92c468b", size = 211800 }, + { url = "https://files.pythonhosted.org/packages/27/52/805980aa1ba18282077c484dba634ef0ede1e84eec8be9c92b2e162d0ed6/azure_core-1.35.1-py3-none-any.whl", hash = "sha256:12da0c9e08e48e198f9158b56ddbe33b421477e1dc98c2e1c8f9e254d92c468b", size = 211800, upload-time = "2025-09-11T22:58:06.281Z" }, ] [[package]] @@ -238,9 +238,9 @@ dependencies = [ { name = "msal" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/22/ff/61369d06422b5ac48067215ff404841342651b14a89b46c8d8e1507c8f17/azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393", size = 71430 } +sdist = { url = "https://files.pythonhosted.org/packages/22/ff/61369d06422b5ac48067215ff404841342651b14a89b46c8d8e1507c8f17/azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393", size = 71430, upload-time = "2023-05-10T21:17:05.665Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/2a/75f56b14f115189155cf12e46b366ad1fe3357af5a1a7c09f7446662d617/azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b", size = 55308 }, + { url = "https://files.pythonhosted.org/packages/88/2a/75f56b14f115189155cf12e46b366ad1fe3357af5a1a7c09f7446662d617/azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b", size = 55308, upload-time = "2023-05-10T21:17:02.629Z" }, ] [[package]] @@ -254,9 +254,9 @@ dependencies = [ { name = "msal-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4e/9e/4c9682a286c3c89e437579bd9f64f311020e5125c1321fd3a653166b5716/azure_identity-1.25.0.tar.gz", hash = "sha256:4177df34d684cddc026e6cf684e1abb57767aa9d84e7f2129b080ec45eee7733", size = 278507 } +sdist = { url = "https://files.pythonhosted.org/packages/4e/9e/4c9682a286c3c89e437579bd9f64f311020e5125c1321fd3a653166b5716/azure_identity-1.25.0.tar.gz", hash = "sha256:4177df34d684cddc026e6cf684e1abb57767aa9d84e7f2129b080ec45eee7733", size = 278507, upload-time = "2025-09-12T01:30:04.418Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/75/54/81683b6756676a22e037b209695b08008258e603f7e47c56834029c5922a/azure_identity-1.25.0-py3-none-any.whl", hash = "sha256:becaec086bbdf8d1a6aa4fb080c2772a0f824a97d50c29637ec8cc4933f1e82d", size = 190861 }, + { url = "https://files.pythonhosted.org/packages/75/54/81683b6756676a22e037b209695b08008258e603f7e47c56834029c5922a/azure_identity-1.25.0-py3-none-any.whl", hash = "sha256:becaec086bbdf8d1a6aa4fb080c2772a0f824a97d50c29637ec8cc4933f1e82d", size = 190861, upload-time = "2025-09-12T01:30:06.474Z" }, ] [[package]] @@ -269,9 +269,9 @@ dependencies = [ { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/95/3e3414491ce45025a1cde107b6ae72bf72049e6021597c201cd6a3029b9a/azure_storage_blob-12.26.0.tar.gz", hash = "sha256:5dd7d7824224f7de00bfeb032753601c982655173061e242f13be6e26d78d71f", size = 583332 } +sdist = { url = "https://files.pythonhosted.org/packages/96/95/3e3414491ce45025a1cde107b6ae72bf72049e6021597c201cd6a3029b9a/azure_storage_blob-12.26.0.tar.gz", hash = "sha256:5dd7d7824224f7de00bfeb032753601c982655173061e242f13be6e26d78d71f", size = 583332, upload-time = "2025-07-16T21:34:07.644Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/64/63dbfdd83b31200ac58820a7951ddfdeed1fbee9285b0f3eae12d1357155/azure_storage_blob-12.26.0-py3-none-any.whl", hash = "sha256:8c5631b8b22b4f53ec5fff2f3bededf34cfef111e2af613ad42c9e6de00a77fe", size = 412907 }, + { url = "https://files.pythonhosted.org/packages/5b/64/63dbfdd83b31200ac58820a7951ddfdeed1fbee9285b0f3eae12d1357155/azure_storage_blob-12.26.0-py3-none-any.whl", hash = "sha256:8c5631b8b22b4f53ec5fff2f3bededf34cfef111e2af613ad42c9e6de00a77fe", size = 412907, upload-time = "2025-07-16T21:34:09.367Z" }, ] [[package]] @@ -285,47 +285,47 @@ dependencies = [ { name = "platformdirs" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/f8/25ef24814f77f3fd7f0fd3bd1ef3749e38a9dbd23502fbb53034de49900c/banks-2.2.0.tar.gz", hash = "sha256:d1446280ce6e00301e3e952dd754fd8cee23ff277d29ed160994a84d0d7ffe62", size = 179052 } +sdist = { url = "https://files.pythonhosted.org/packages/7d/f8/25ef24814f77f3fd7f0fd3bd1ef3749e38a9dbd23502fbb53034de49900c/banks-2.2.0.tar.gz", hash = "sha256:d1446280ce6e00301e3e952dd754fd8cee23ff277d29ed160994a84d0d7ffe62", size = 179052, upload-time = "2025-07-18T16:28:26.892Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/d6/f9168956276934162ec8d48232f9920f2985ee45aa7602e3c6b4bc203613/banks-2.2.0-py3-none-any.whl", hash = "sha256:963cd5c85a587b122abde4f4064078def35c50c688c1b9d36f43c92503854e7d", size = 29244 }, + { url = "https://files.pythonhosted.org/packages/b4/d6/f9168956276934162ec8d48232f9920f2985ee45aa7602e3c6b4bc203613/banks-2.2.0-py3-none-any.whl", hash = "sha256:963cd5c85a587b122abde4f4064078def35c50c688c1b9d36f43c92503854e7d", size = 29244, upload-time = "2025-07-18T16:28:27.835Z" }, ] [[package]] name = "bcrypt" version = "5.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/36/3329e2518d70ad8e2e5817d5a4cac6bba05a47767ec416c7d020a965f408/bcrypt-5.0.0.tar.gz", hash = "sha256:f748f7c2d6fd375cc93d3fba7ef4a9e3a092421b8dbf34d8d4dc06be9492dfdd", size = 25386 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/29/6237f151fbfe295fe3e074ecc6d44228faa1e842a81f6d34a02937ee1736/bcrypt-5.0.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:fc746432b951e92b58317af8e0ca746efe93e66555f1b40888865ef5bf56446b", size = 494553 }, - { url = "https://files.pythonhosted.org/packages/45/b6/4c1205dde5e464ea3bd88e8742e19f899c16fa8916fb8510a851fae985b5/bcrypt-5.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c2388ca94ffee269b6038d48747f4ce8df0ffbea43f31abfa18ac72f0218effb", size = 275009 }, - { url = "https://files.pythonhosted.org/packages/3b/71/427945e6ead72ccffe77894b2655b695ccf14ae1866cd977e185d606dd2f/bcrypt-5.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:560ddb6ec730386e7b3b26b8b4c88197aaed924430e7b74666a586ac997249ef", size = 278029 }, - { url = "https://files.pythonhosted.org/packages/17/72/c344825e3b83c5389a369c8a8e58ffe1480b8a699f46c127c34580c4666b/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d79e5c65dcc9af213594d6f7f1fa2c98ad3fc10431e7aa53c176b441943efbdd", size = 275907 }, - { url = "https://files.pythonhosted.org/packages/0b/7e/d4e47d2df1641a36d1212e5c0514f5291e1a956a7749f1e595c07a972038/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2b732e7d388fa22d48920baa267ba5d97cca38070b69c0e2d37087b381c681fd", size = 296500 }, - { url = "https://files.pythonhosted.org/packages/0f/c3/0ae57a68be2039287ec28bc463b82e4b8dc23f9d12c0be331f4782e19108/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0c8e093ea2532601a6f686edbc2c6b2ec24131ff5c52f7610dd64fa4553b5464", size = 278412 }, - { url = "https://files.pythonhosted.org/packages/45/2b/77424511adb11e6a99e3a00dcc7745034bee89036ad7d7e255a7e47be7d8/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5b1589f4839a0899c146e8892efe320c0fa096568abd9b95593efac50a87cb75", size = 275486 }, - { url = "https://files.pythonhosted.org/packages/43/0a/405c753f6158e0f3f14b00b462d8bca31296f7ecfc8fc8bc7919c0c7d73a/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:89042e61b5e808b67daf24a434d89bab164d4de1746b37a8d173b6b14f3db9ff", size = 277940 }, - { url = "https://files.pythonhosted.org/packages/62/83/b3efc285d4aadc1fa83db385ec64dcfa1707e890eb42f03b127d66ac1b7b/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e3cf5b2560c7b5a142286f69bde914494b6d8f901aaa71e453078388a50881c4", size = 310776 }, - { url = "https://files.pythonhosted.org/packages/95/7d/47ee337dacecde6d234890fe929936cb03ebc4c3a7460854bbd9c97780b8/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f632fd56fc4e61564f78b46a2269153122db34988e78b6be8b32d28507b7eaeb", size = 312922 }, - { url = "https://files.pythonhosted.org/packages/d6/3a/43d494dfb728f55f4e1cf8fd435d50c16a2d75493225b54c8d06122523c6/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:801cad5ccb6b87d1b430f183269b94c24f248dddbbc5c1f78b6ed231743e001c", size = 341367 }, - { url = "https://files.pythonhosted.org/packages/55/ab/a0727a4547e383e2e22a630e0f908113db37904f58719dc48d4622139b5c/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3cf67a804fc66fc217e6914a5635000259fbbbb12e78a99488e4d5ba445a71eb", size = 359187 }, - { url = "https://files.pythonhosted.org/packages/1b/bb/461f352fdca663524b4643d8b09e8435b4990f17fbf4fea6bc2a90aa0cc7/bcrypt-5.0.0-cp38-abi3-win32.whl", hash = "sha256:3abeb543874b2c0524ff40c57a4e14e5d3a66ff33fb423529c88f180fd756538", size = 153752 }, - { url = "https://files.pythonhosted.org/packages/41/aa/4190e60921927b7056820291f56fc57d00d04757c8b316b2d3c0d1d6da2c/bcrypt-5.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:35a77ec55b541e5e583eb3436ffbbf53b0ffa1fa16ca6782279daf95d146dcd9", size = 150881 }, - { url = "https://files.pythonhosted.org/packages/54/12/cd77221719d0b39ac0b55dbd39358db1cd1246e0282e104366ebbfb8266a/bcrypt-5.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:cde08734f12c6a4e28dc6755cd11d3bdfea608d93d958fffbe95a7026ebe4980", size = 144931 }, - { url = "https://files.pythonhosted.org/packages/5d/ba/2af136406e1c3839aea9ecadc2f6be2bcd1eff255bd451dd39bcf302c47a/bcrypt-5.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0c418ca99fd47e9c59a301744d63328f17798b5947b0f791e9af3c1c499c2d0a", size = 495313 }, - { url = "https://files.pythonhosted.org/packages/ac/ee/2f4985dbad090ace5ad1f7dd8ff94477fe089b5fab2040bd784a3d5f187b/bcrypt-5.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddb4e1500f6efdd402218ffe34d040a1196c072e07929b9820f363a1fd1f4191", size = 275290 }, - { url = "https://files.pythonhosted.org/packages/e4/6e/b77ade812672d15cf50842e167eead80ac3514f3beacac8902915417f8b7/bcrypt-5.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7aeef54b60ceddb6f30ee3db090351ecf0d40ec6e2abf41430997407a46d2254", size = 278253 }, - { url = "https://files.pythonhosted.org/packages/36/c4/ed00ed32f1040f7990dac7115f82273e3c03da1e1a1587a778d8cea496d8/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f0ce778135f60799d89c9693b9b398819d15f1921ba15fe719acb3178215a7db", size = 276084 }, - { url = "https://files.pythonhosted.org/packages/e7/c4/fa6e16145e145e87f1fa351bbd54b429354fd72145cd3d4e0c5157cf4c70/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a71f70ee269671460b37a449f5ff26982a6f2ba493b3eabdd687b4bf35f875ac", size = 297185 }, - { url = "https://files.pythonhosted.org/packages/24/b4/11f8a31d8b67cca3371e046db49baa7c0594d71eb40ac8121e2fc0888db0/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8429e1c410b4073944f03bd778a9e066e7fad723564a52ff91841d278dfc822", size = 278656 }, - { url = "https://files.pythonhosted.org/packages/ac/31/79f11865f8078e192847d2cb526e3fa27c200933c982c5b2869720fa5fce/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:edfcdcedd0d0f05850c52ba3127b1fce70b9f89e0fe5ff16517df7e81fa3cbb8", size = 275662 }, - { url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a", size = 278240 }, - { url = "https://files.pythonhosted.org/packages/89/48/44590e3fc158620f680a978aafe8f87a4c4320da81ed11552f0323aa9a57/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:db99dca3b1fdc3db87d7c57eac0c82281242d1eabf19dcb8a6b10eb29a2e72d1", size = 311152 }, - { url = "https://files.pythonhosted.org/packages/5f/85/e4fbfc46f14f47b0d20493669a625da5827d07e8a88ee460af6cd9768b44/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:5feebf85a9cefda32966d8171f5db7e3ba964b77fdfe31919622256f80f9cf42", size = 313284 }, - { url = "https://files.pythonhosted.org/packages/25/ae/479f81d3f4594456a01ea2f05b132a519eff9ab5768a70430fa1132384b1/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3ca8a166b1140436e058298a34d88032ab62f15aae1c598580333dc21d27ef10", size = 341643 }, - { url = "https://files.pythonhosted.org/packages/df/d2/36a086dee1473b14276cd6ea7f61aef3b2648710b5d7f1c9e032c29b859f/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61afc381250c3182d9078551e3ac3a41da14154fbff647ddf52a769f588c4172", size = 359698 }, - { url = "https://files.pythonhosted.org/packages/c0/f6/688d2cd64bfd0b14d805ddb8a565e11ca1fb0fd6817175d58b10052b6d88/bcrypt-5.0.0-cp39-abi3-win32.whl", hash = "sha256:64d7ce196203e468c457c37ec22390f1a61c85c6f0b8160fd752940ccfb3a683", size = 153725 }, - { url = "https://files.pythonhosted.org/packages/9f/b9/9d9a641194a730bda138b3dfe53f584d61c58cd5230e37566e83ec2ffa0d/bcrypt-5.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:64ee8434b0da054d830fa8e89e1c8bf30061d539044a39524ff7dec90481e5c2", size = 150912 }, - { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953 }, +sdist = { url = "https://files.pythonhosted.org/packages/d4/36/3329e2518d70ad8e2e5817d5a4cac6bba05a47767ec416c7d020a965f408/bcrypt-5.0.0.tar.gz", hash = "sha256:f748f7c2d6fd375cc93d3fba7ef4a9e3a092421b8dbf34d8d4dc06be9492dfdd", size = 25386, upload-time = "2025-09-25T19:50:47.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/29/6237f151fbfe295fe3e074ecc6d44228faa1e842a81f6d34a02937ee1736/bcrypt-5.0.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:fc746432b951e92b58317af8e0ca746efe93e66555f1b40888865ef5bf56446b", size = 494553, upload-time = "2025-09-25T19:49:49.006Z" }, + { url = "https://files.pythonhosted.org/packages/45/b6/4c1205dde5e464ea3bd88e8742e19f899c16fa8916fb8510a851fae985b5/bcrypt-5.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c2388ca94ffee269b6038d48747f4ce8df0ffbea43f31abfa18ac72f0218effb", size = 275009, upload-time = "2025-09-25T19:49:50.581Z" }, + { url = "https://files.pythonhosted.org/packages/3b/71/427945e6ead72ccffe77894b2655b695ccf14ae1866cd977e185d606dd2f/bcrypt-5.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:560ddb6ec730386e7b3b26b8b4c88197aaed924430e7b74666a586ac997249ef", size = 278029, upload-time = "2025-09-25T19:49:52.533Z" }, + { url = "https://files.pythonhosted.org/packages/17/72/c344825e3b83c5389a369c8a8e58ffe1480b8a699f46c127c34580c4666b/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d79e5c65dcc9af213594d6f7f1fa2c98ad3fc10431e7aa53c176b441943efbdd", size = 275907, upload-time = "2025-09-25T19:49:54.709Z" }, + { url = "https://files.pythonhosted.org/packages/0b/7e/d4e47d2df1641a36d1212e5c0514f5291e1a956a7749f1e595c07a972038/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2b732e7d388fa22d48920baa267ba5d97cca38070b69c0e2d37087b381c681fd", size = 296500, upload-time = "2025-09-25T19:49:56.013Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c3/0ae57a68be2039287ec28bc463b82e4b8dc23f9d12c0be331f4782e19108/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0c8e093ea2532601a6f686edbc2c6b2ec24131ff5c52f7610dd64fa4553b5464", size = 278412, upload-time = "2025-09-25T19:49:57.356Z" }, + { url = "https://files.pythonhosted.org/packages/45/2b/77424511adb11e6a99e3a00dcc7745034bee89036ad7d7e255a7e47be7d8/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5b1589f4839a0899c146e8892efe320c0fa096568abd9b95593efac50a87cb75", size = 275486, upload-time = "2025-09-25T19:49:59.116Z" }, + { url = "https://files.pythonhosted.org/packages/43/0a/405c753f6158e0f3f14b00b462d8bca31296f7ecfc8fc8bc7919c0c7d73a/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:89042e61b5e808b67daf24a434d89bab164d4de1746b37a8d173b6b14f3db9ff", size = 277940, upload-time = "2025-09-25T19:50:00.869Z" }, + { url = "https://files.pythonhosted.org/packages/62/83/b3efc285d4aadc1fa83db385ec64dcfa1707e890eb42f03b127d66ac1b7b/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e3cf5b2560c7b5a142286f69bde914494b6d8f901aaa71e453078388a50881c4", size = 310776, upload-time = "2025-09-25T19:50:02.393Z" }, + { url = "https://files.pythonhosted.org/packages/95/7d/47ee337dacecde6d234890fe929936cb03ebc4c3a7460854bbd9c97780b8/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f632fd56fc4e61564f78b46a2269153122db34988e78b6be8b32d28507b7eaeb", size = 312922, upload-time = "2025-09-25T19:50:04.232Z" }, + { url = "https://files.pythonhosted.org/packages/d6/3a/43d494dfb728f55f4e1cf8fd435d50c16a2d75493225b54c8d06122523c6/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:801cad5ccb6b87d1b430f183269b94c24f248dddbbc5c1f78b6ed231743e001c", size = 341367, upload-time = "2025-09-25T19:50:05.559Z" }, + { url = "https://files.pythonhosted.org/packages/55/ab/a0727a4547e383e2e22a630e0f908113db37904f58719dc48d4622139b5c/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3cf67a804fc66fc217e6914a5635000259fbbbb12e78a99488e4d5ba445a71eb", size = 359187, upload-time = "2025-09-25T19:50:06.916Z" }, + { url = "https://files.pythonhosted.org/packages/1b/bb/461f352fdca663524b4643d8b09e8435b4990f17fbf4fea6bc2a90aa0cc7/bcrypt-5.0.0-cp38-abi3-win32.whl", hash = "sha256:3abeb543874b2c0524ff40c57a4e14e5d3a66ff33fb423529c88f180fd756538", size = 153752, upload-time = "2025-09-25T19:50:08.515Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/4190e60921927b7056820291f56fc57d00d04757c8b316b2d3c0d1d6da2c/bcrypt-5.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:35a77ec55b541e5e583eb3436ffbbf53b0ffa1fa16ca6782279daf95d146dcd9", size = 150881, upload-time = "2025-09-25T19:50:09.742Z" }, + { url = "https://files.pythonhosted.org/packages/54/12/cd77221719d0b39ac0b55dbd39358db1cd1246e0282e104366ebbfb8266a/bcrypt-5.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:cde08734f12c6a4e28dc6755cd11d3bdfea608d93d958fffbe95a7026ebe4980", size = 144931, upload-time = "2025-09-25T19:50:11.016Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ba/2af136406e1c3839aea9ecadc2f6be2bcd1eff255bd451dd39bcf302c47a/bcrypt-5.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0c418ca99fd47e9c59a301744d63328f17798b5947b0f791e9af3c1c499c2d0a", size = 495313, upload-time = "2025-09-25T19:50:12.309Z" }, + { url = "https://files.pythonhosted.org/packages/ac/ee/2f4985dbad090ace5ad1f7dd8ff94477fe089b5fab2040bd784a3d5f187b/bcrypt-5.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddb4e1500f6efdd402218ffe34d040a1196c072e07929b9820f363a1fd1f4191", size = 275290, upload-time = "2025-09-25T19:50:13.673Z" }, + { url = "https://files.pythonhosted.org/packages/e4/6e/b77ade812672d15cf50842e167eead80ac3514f3beacac8902915417f8b7/bcrypt-5.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7aeef54b60ceddb6f30ee3db090351ecf0d40ec6e2abf41430997407a46d2254", size = 278253, upload-time = "2025-09-25T19:50:15.089Z" }, + { url = "https://files.pythonhosted.org/packages/36/c4/ed00ed32f1040f7990dac7115f82273e3c03da1e1a1587a778d8cea496d8/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f0ce778135f60799d89c9693b9b398819d15f1921ba15fe719acb3178215a7db", size = 276084, upload-time = "2025-09-25T19:50:16.699Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c4/fa6e16145e145e87f1fa351bbd54b429354fd72145cd3d4e0c5157cf4c70/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a71f70ee269671460b37a449f5ff26982a6f2ba493b3eabdd687b4bf35f875ac", size = 297185, upload-time = "2025-09-25T19:50:18.525Z" }, + { url = "https://files.pythonhosted.org/packages/24/b4/11f8a31d8b67cca3371e046db49baa7c0594d71eb40ac8121e2fc0888db0/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8429e1c410b4073944f03bd778a9e066e7fad723564a52ff91841d278dfc822", size = 278656, upload-time = "2025-09-25T19:50:19.809Z" }, + { url = "https://files.pythonhosted.org/packages/ac/31/79f11865f8078e192847d2cb526e3fa27c200933c982c5b2869720fa5fce/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:edfcdcedd0d0f05850c52ba3127b1fce70b9f89e0fe5ff16517df7e81fa3cbb8", size = 275662, upload-time = "2025-09-25T19:50:21.567Z" }, + { url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a", size = 278240, upload-time = "2025-09-25T19:50:23.305Z" }, + { url = "https://files.pythonhosted.org/packages/89/48/44590e3fc158620f680a978aafe8f87a4c4320da81ed11552f0323aa9a57/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:db99dca3b1fdc3db87d7c57eac0c82281242d1eabf19dcb8a6b10eb29a2e72d1", size = 311152, upload-time = "2025-09-25T19:50:24.597Z" }, + { url = "https://files.pythonhosted.org/packages/5f/85/e4fbfc46f14f47b0d20493669a625da5827d07e8a88ee460af6cd9768b44/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:5feebf85a9cefda32966d8171f5db7e3ba964b77fdfe31919622256f80f9cf42", size = 313284, upload-time = "2025-09-25T19:50:26.268Z" }, + { url = "https://files.pythonhosted.org/packages/25/ae/479f81d3f4594456a01ea2f05b132a519eff9ab5768a70430fa1132384b1/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3ca8a166b1140436e058298a34d88032ab62f15aae1c598580333dc21d27ef10", size = 341643, upload-time = "2025-09-25T19:50:28.02Z" }, + { url = "https://files.pythonhosted.org/packages/df/d2/36a086dee1473b14276cd6ea7f61aef3b2648710b5d7f1c9e032c29b859f/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61afc381250c3182d9078551e3ac3a41da14154fbff647ddf52a769f588c4172", size = 359698, upload-time = "2025-09-25T19:50:31.347Z" }, + { url = "https://files.pythonhosted.org/packages/c0/f6/688d2cd64bfd0b14d805ddb8a565e11ca1fb0fd6817175d58b10052b6d88/bcrypt-5.0.0-cp39-abi3-win32.whl", hash = "sha256:64d7ce196203e468c457c37ec22390f1a61c85c6f0b8160fd752940ccfb3a683", size = 153725, upload-time = "2025-09-25T19:50:34.384Z" }, + { url = "https://files.pythonhosted.org/packages/9f/b9/9d9a641194a730bda138b3dfe53f584d61c58cd5230e37566e83ec2ffa0d/bcrypt-5.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:64ee8434b0da054d830fa8e89e1c8bf30061d539044a39524ff7dec90481e5c2", size = 150912, upload-time = "2025-09-25T19:50:35.69Z" }, + { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, ] [[package]] @@ -336,27 +336,27 @@ dependencies = [ { name = "soupsieve" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 } +sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 }, + { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, ] [[package]] name = "bidict" version = "0.23.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093 } +sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093, upload-time = "2024-02-18T19:09:05.748Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764 }, + { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764, upload-time = "2024-02-18T19:09:04.156Z" }, ] [[package]] name = "billiard" version = "4.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b9/6a/1405343016bce8354b29d90aad6b0bf6485b5e60404516e4b9a3a9646cf0/billiard-4.2.2.tar.gz", hash = "sha256:e815017a062b714958463e07ba15981d802dc53d41c5b69d28c5a7c238f8ecf3", size = 155592 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/6a/1405343016bce8354b29d90aad6b0bf6485b5e60404516e4b9a3a9646cf0/billiard-4.2.2.tar.gz", hash = "sha256:e815017a062b714958463e07ba15981d802dc53d41c5b69d28c5a7c238f8ecf3", size = 155592, upload-time = "2025-09-20T14:44:40.456Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/80/ef8dff49aae0e4430f81842f7403e14e0ca59db7bbaf7af41245b67c6b25/billiard-4.2.2-py3-none-any.whl", hash = "sha256:4bc05dcf0d1cc6addef470723aac2a6232f3c7ed7475b0b580473a9145829457", size = 86896 }, + { url = "https://files.pythonhosted.org/packages/a6/80/ef8dff49aae0e4430f81842f7403e14e0ca59db7bbaf7af41245b67c6b25/billiard-4.2.2-py3-none-any.whl", hash = "sha256:4bc05dcf0d1cc6addef470723aac2a6232f3c7ed7475b0b580473a9145829457", size = 86896, upload-time = "2025-09-20T14:44:39.157Z" }, ] [[package]] @@ -368,9 +368,9 @@ dependencies = [ { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/d9/35978a20f6f9a585ff83afb384faf71526a1b25c4131755b1cdb6687b1d9/boto3-1.34.131.tar.gz", hash = "sha256:dab8f72a6c4e62b4fd70da09e08a6b2a65ea2115b27dd63737142005776ef216", size = 108719 } +sdist = { url = "https://files.pythonhosted.org/packages/1f/d9/35978a20f6f9a585ff83afb384faf71526a1b25c4131755b1cdb6687b1d9/boto3-1.34.131.tar.gz", hash = "sha256:dab8f72a6c4e62b4fd70da09e08a6b2a65ea2115b27dd63737142005776ef216", size = 108719, upload-time = "2024-06-20T19:34:56.629Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/ce/f5e3fdab6012f5fa4a8f5e97e86cc42549729382a98faffbc1785f85e89f/boto3-1.34.131-py3-none-any.whl", hash = "sha256:05e388cb937e82be70bfd7eb0c84cf8011ff35cf582a593873ac21675268683b", size = 139172 }, + { url = "https://files.pythonhosted.org/packages/3e/ce/f5e3fdab6012f5fa4a8f5e97e86cc42549729382a98faffbc1785f85e89f/boto3-1.34.131-py3-none-any.whl", hash = "sha256:05e388cb937e82be70bfd7eb0c84cf8011ff35cf582a593873ac21675268683b", size = 139172, upload-time = "2024-06-20T19:34:44.219Z" }, ] [[package]] @@ -382,9 +382,9 @@ dependencies = [ { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/40/74bda5977985383b8ed403dced9d76ad5e1146db7b6c32089726b3130c8b/botocore-1.34.131.tar.gz", hash = "sha256:502ddafe1d627fcf1e4c007c86454e5dd011dba7c58bd8e8a5368a79f3e387dc", size = 12544482 } +sdist = { url = "https://files.pythonhosted.org/packages/41/40/74bda5977985383b8ed403dced9d76ad5e1146db7b6c32089726b3130c8b/botocore-1.34.131.tar.gz", hash = "sha256:502ddafe1d627fcf1e4c007c86454e5dd011dba7c58bd8e8a5368a79f3e387dc", size = 12544482, upload-time = "2024-06-20T19:34:04.853Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/1a/01785fad12a9b1dbeffebd97cd226ea5923114057c64a610dd4eb8a28c7b/botocore-1.34.131-py3-none-any.whl", hash = "sha256:13b011d7b206ce00727dcee26548fa3b550db9046d5a0e90ac25a6e6c8fde6ef", size = 12332729 }, + { url = "https://files.pythonhosted.org/packages/46/1a/01785fad12a9b1dbeffebd97cd226ea5923114057c64a610dd4eb8a28c7b/botocore-1.34.131-py3-none-any.whl", hash = "sha256:13b011d7b206ce00727dcee26548fa3b550db9046d5a0e90ac25a6e6c8fde6ef", size = 12332729, upload-time = "2024-06-20T19:33:51.589Z" }, ] [[package]] @@ -395,9 +395,9 @@ dependencies = [ { name = "boxsdk", extra = ["jwt"] }, { name = "fsspec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/be/de/1c5e0faec600538f6a1d41c7ce7834cacddb2237923e30ddb225254b74b9/boxfs-0.2.1.tar.gz", hash = "sha256:c1889e12f53be3216b44f088237ac0f367a7a759a53b01b0c0edf2b3d694e50f", size = 9523 } +sdist = { url = "https://files.pythonhosted.org/packages/be/de/1c5e0faec600538f6a1d41c7ce7834cacddb2237923e30ddb225254b74b9/boxfs-0.2.1.tar.gz", hash = "sha256:c1889e12f53be3216b44f088237ac0f367a7a759a53b01b0c0edf2b3d694e50f", size = 9523, upload-time = "2023-08-23T19:24:35.233Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/86/bb/243d10169c8397051bad6bdd10beb2407fa490bfe01216f5fad09e066191/boxfs-0.2.1-py3-none-any.whl", hash = "sha256:ae796c30309bd5a02654fff9eddf1ed320356225568fad0e109e1942beaef72a", size = 9358 }, + { url = "https://files.pythonhosted.org/packages/86/bb/243d10169c8397051bad6bdd10beb2407fa490bfe01216f5fad09e066191/boxfs-0.2.1-py3-none-any.whl", hash = "sha256:ae796c30309bd5a02654fff9eddf1ed320356225568fad0e109e1942beaef72a", size = 9358, upload-time = "2023-08-23T19:24:34.066Z" }, ] [[package]] @@ -411,9 +411,9 @@ dependencies = [ { name = "requests-toolbelt" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/d7/c1a95bb602d7f90a85a68d8e6f11954e50c255110d39e2167c7796252622/boxsdk-3.14.0.tar.gz", hash = "sha256:7918b1929368724662474fffa417fa0457a523d089b8185260efbedd28c4f9b1", size = 232630 } +sdist = { url = "https://files.pythonhosted.org/packages/bf/d7/c1a95bb602d7f90a85a68d8e6f11954e50c255110d39e2167c7796252622/boxsdk-3.14.0.tar.gz", hash = "sha256:7918b1929368724662474fffa417fa0457a523d089b8185260efbedd28c4f9b1", size = 232630, upload-time = "2025-04-09T15:07:15.181Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/5d/4e15511e0f4f2f9fbbf4646a8d0e138e5c53a3d428f1724e7dc3c8acf556/boxsdk-3.14.0-py2.py3-none-any.whl", hash = "sha256:0314e2f172b050e98489955f2e9001263de79c3dd751e6feee19f2195fdf7c01", size = 141329 }, + { url = "https://files.pythonhosted.org/packages/4d/5d/4e15511e0f4f2f9fbbf4646a8d0e138e5c53a3d428f1724e7dc3c8acf556/boxsdk-3.14.0-py2.py3-none-any.whl", hash = "sha256:0314e2f172b050e98489955f2e9001263de79c3dd751e6feee19f2195fdf7c01", size = 141329, upload-time = "2025-04-09T15:07:13.295Z" }, ] [package.optional-dependencies] @@ -426,9 +426,9 @@ jwt = [ name = "cachetools" version = "5.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, ] [[package]] @@ -445,18 +445,18 @@ dependencies = [ { name = "python-dateutil" }, { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/7d/6c289f407d219ba36d8b384b42489ebdd0c84ce9c413875a8aae0c85f35b/celery-5.5.3.tar.gz", hash = "sha256:6c972ae7968c2b5281227f01c3a3f984037d21c5129d07bf3550cc2afc6b10a5", size = 1667144 } +sdist = { url = "https://files.pythonhosted.org/packages/bb/7d/6c289f407d219ba36d8b384b42489ebdd0c84ce9c413875a8aae0c85f35b/celery-5.5.3.tar.gz", hash = "sha256:6c972ae7968c2b5281227f01c3a3f984037d21c5129d07bf3550cc2afc6b10a5", size = 1667144, upload-time = "2025-06-01T11:08:12.563Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775 }, + { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" }, ] [[package]] name = "certifi" version = "2025.8.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386 } +sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216 }, + { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" }, ] [[package]] @@ -466,48 +466,48 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pycparser" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, - { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, - { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, - { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, - { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, - { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, - { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, - { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, - { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, - { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, - { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload-time = "2024-09-04T20:44:12.232Z" }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload-time = "2024-09-04T20:44:13.739Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload-time = "2024-09-04T20:44:15.231Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload-time = "2024-09-04T20:44:17.188Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload-time = "2024-09-04T20:44:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload-time = "2024-09-04T20:44:20.248Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload-time = "2024-09-04T20:44:21.673Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload-time = "2024-09-04T20:44:23.245Z" }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload-time = "2024-09-04T20:44:24.757Z" }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" }, ] [[package]] name = "cfgv" version = "3.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, ] [[package]] name = "charset-normalizer" version = "3.4.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371 } +sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655 }, - { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223 }, - { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366 }, - { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104 }, - { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830 }, - { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854 }, - { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670 }, - { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501 }, - { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173 }, - { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822 }, - { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543 }, - { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175 }, + { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" }, + { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" }, + { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" }, + { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" }, + { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" }, + { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" }, + { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" }, + { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" }, + { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" }, + { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" }, + { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" }, ] [[package]] @@ -517,9 +517,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943 } +sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295 }, + { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" }, ] [[package]] @@ -529,9 +529,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/30/ce/217289b77c590ea1e7c24242d9ddd6e249e52c795ff10fac2c50062c48cb/click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463", size = 3089 } +sdist = { url = "https://files.pythonhosted.org/packages/30/ce/217289b77c590ea1e7c24242d9ddd6e249e52c795ff10fac2c50062c48cb/click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463", size = 3089, upload-time = "2024-03-24T08:22:07.499Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/5b/974430b5ffdb7a4f1941d13d83c64a0395114503cc357c6b9ae4ce5047ed/click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c", size = 3631 }, + { url = "https://files.pythonhosted.org/packages/1b/5b/974430b5ffdb7a4f1941d13d83c64a0395114503cc357c6b9ae4ce5047ed/click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c", size = 3631, upload-time = "2024-03-24T08:22:06.356Z" }, ] [[package]] @@ -541,9 +541,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c3/a4/34847b59150da33690a36da3681d6bbc2ec14ee9a846bc30a6746e5984e4/click_plugins-1.1.1.2.tar.gz", hash = "sha256:d7af3984a99d243c131aa1a828331e7630f4a88a9741fd05c927b204bcf92261", size = 8343 } +sdist = { url = "https://files.pythonhosted.org/packages/c3/a4/34847b59150da33690a36da3681d6bbc2ec14ee9a846bc30a6746e5984e4/click_plugins-1.1.1.2.tar.gz", hash = "sha256:d7af3984a99d243c131aa1a828331e7630f4a88a9741fd05c927b204bcf92261", size = 8343, upload-time = "2025-06-25T00:47:37.555Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/9a/2abecb28ae875e39c8cad711eb1186d8d14eab564705325e77e4e6ab9ae5/click_plugins-1.1.1.2-py2.py3-none-any.whl", hash = "sha256:008d65743833ffc1f5417bf0e78e8d2c23aab04d9745ba817bd3e71b0feb6aa6", size = 11051 }, + { url = "https://files.pythonhosted.org/packages/3d/9a/2abecb28ae875e39c8cad711eb1186d8d14eab564705325e77e4e6ab9ae5/click_plugins-1.1.1.2-py2.py3-none-any.whl", hash = "sha256:008d65743833ffc1f5417bf0e78e8d2c23aab04d9745ba817bd3e71b0feb6aa6", size = 11051, upload-time = "2025-06-25T00:47:36.731Z" }, ] [[package]] @@ -554,25 +554,25 @@ dependencies = [ { name = "click" }, { name = "prompt-toolkit" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cb/a2/57f4ac79838cfae6912f997b4d1a64a858fb0c86d7fcaae6f7b58d267fca/click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9", size = 10449 } +sdist = { url = "https://files.pythonhosted.org/packages/cb/a2/57f4ac79838cfae6912f997b4d1a64a858fb0c86d7fcaae6f7b58d267fca/click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9", size = 10449, upload-time = "2023-06-15T12:43:51.141Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812", size = 10289 }, + { url = "https://files.pythonhosted.org/packages/52/40/9d857001228658f0d59e97ebd4c346fe73e138c6de1bce61dc568a57c7f8/click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812", size = 10289, upload-time = "2023-06-15T12:43:48.626Z" }, ] [[package]] name = "colorama" version = "0.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] [[package]] name = "cron-descriptor" version = "1.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/a0/455f5a0181cf9a0d2e84d3a66c88de019dce5644ad9680825d1c8a403335/cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd", size = 29922 } +sdist = { url = "https://files.pythonhosted.org/packages/24/a0/455f5a0181cf9a0d2e84d3a66c88de019dce5644ad9680825d1c8a403335/cron_descriptor-1.4.0.tar.gz", hash = "sha256:b6ff4e3a988d7ca04a4ab150248e9f166fb7a5c828a85090e75bcc25aa93b4dd", size = 29922, upload-time = "2023-05-19T07:46:16.992Z" } [[package]] name = "cryptography" @@ -581,39 +581,39 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/80/ee/04cd4314db26ffc951c1ea90bde30dd226880ab9343759d7abbecef377ee/cryptography-46.0.0.tar.gz", hash = "sha256:99f64a6d15f19f3afd78720ad2978f6d8d4c68cd4eb600fab82ab1a7c2071dca", size = 749158 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/bd/3e935ca6e87dc4969683f5dd9e49adaf2cb5734253d93317b6b346e0bd33/cryptography-46.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:c9c4121f9a41cc3d02164541d986f59be31548ad355a5c96ac50703003c50fb7", size = 7285468 }, - { url = "https://files.pythonhosted.org/packages/c7/ee/dd17f412ce64b347871d7752657c5084940d42af4d9c25b1b91c7ee53362/cryptography-46.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4f70cbade61a16f5e238c4b0eb4e258d177a2fcb59aa0aae1236594f7b0ae338", size = 4308218 }, - { url = "https://files.pythonhosted.org/packages/2f/53/f0b865a971e4e8b3e90e648b6f828950dea4c221bb699421e82ef45f0ef9/cryptography-46.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1eccae15d5c28c74b2bea228775c63ac5b6c36eedb574e002440c0bc28750d3", size = 4571982 }, - { url = "https://files.pythonhosted.org/packages/d4/c8/035be5fd63a98284fd74df9e04156f9fed7aa45cef41feceb0d06cbdadd0/cryptography-46.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1b4fba84166d906a22027f0d958e42f3a4dbbb19c28ea71f0fb7812380b04e3c", size = 4307996 }, - { url = "https://files.pythonhosted.org/packages/aa/4a/dbb6d7d0a48b95984e2d4caf0a4c7d6606cea5d30241d984c0c02b47f1b6/cryptography-46.0.0-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:523153480d7575a169933f083eb47b1edd5fef45d87b026737de74ffeb300f69", size = 4015692 }, - { url = "https://files.pythonhosted.org/packages/65/48/aafcffdde716f6061864e56a0a5908f08dcb8523dab436228957c8ebd5df/cryptography-46.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:f09a3a108223e319168b7557810596631a8cb864657b0c16ed7a6017f0be9433", size = 4982192 }, - { url = "https://files.pythonhosted.org/packages/4c/ab/1e73cfc181afc3054a09e5e8f7753a8fba254592ff50b735d7456d197353/cryptography-46.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c1f6ccd6f2eef3b2eb52837f0463e853501e45a916b3fc42e5d93cf244a4b97b", size = 4603944 }, - { url = "https://files.pythonhosted.org/packages/3a/02/d71dac90b77c606c90c366571edf264dc8bd37cf836e7f902253cbf5aa77/cryptography-46.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:80a548a5862d6912a45557a101092cd6c64ae1475b82cef50ee305d14a75f598", size = 4308149 }, - { url = "https://files.pythonhosted.org/packages/29/e6/4dcb67fdc6addf4e319a99c4bed25776cb691f3aa6e0c4646474748816c6/cryptography-46.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6c39fd5cd9b7526afa69d64b5e5645a06e1b904f342584b3885254400b63f1b3", size = 4947449 }, - { url = "https://files.pythonhosted.org/packages/26/04/91e3fad8ee33aa87815c8f25563f176a58da676c2b14757a4d3b19f0253c/cryptography-46.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:d5c0cbb2fb522f7e39b59a5482a1c9c5923b7c506cfe96a1b8e7368c31617ac0", size = 4603549 }, - { url = "https://files.pythonhosted.org/packages/9c/6e/caf4efadcc8f593cbaacfbb04778f78b6d0dac287b45cec25e5054de38b7/cryptography-46.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6d8945bc120dcd90ae39aa841afddaeafc5f2e832809dc54fb906e3db829dfdc", size = 4435976 }, - { url = "https://files.pythonhosted.org/packages/c1/c0/704710f349db25c5b91965c3662d5a758011b2511408d9451126429b6cd6/cryptography-46.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:88c09da8a94ac27798f6b62de6968ac78bb94805b5d272dbcfd5fdc8c566999f", size = 4709447 }, - { url = "https://files.pythonhosted.org/packages/91/5e/ff63bfd27b75adaf75cc2398de28a0b08105f9d7f8193f3b9b071e38e8b9/cryptography-46.0.0-cp311-abi3-win32.whl", hash = "sha256:3738f50215211cee1974193a1809348d33893696ce119968932ea117bcbc9b1d", size = 3058317 }, - { url = "https://files.pythonhosted.org/packages/46/47/4caf35014c4551dd0b43aa6c2e250161f7ffcb9c3918c9e075785047d5d2/cryptography-46.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:bbaa5eef3c19c66613317dc61e211b48d5f550db009c45e1c28b59d5a9b7812a", size = 3523891 }, - { url = "https://files.pythonhosted.org/packages/98/66/6a0cafb3084a854acf808fccf756cbc9b835d1b99fb82c4a15e2e2ffb404/cryptography-46.0.0-cp311-abi3-win_arm64.whl", hash = "sha256:16b5ac72a965ec9d1e34d9417dbce235d45fa04dac28634384e3ce40dfc66495", size = 2932145 }, - { url = "https://files.pythonhosted.org/packages/f2/5f/0cf967a1dc1419d5dde111bd0e22872038199f4e4655539ea6f4da5ad7f1/cryptography-46.0.0-cp314-abi3-macosx_10_9_universal2.whl", hash = "sha256:91585fc9e696abd7b3e48a463a20dda1a5c0eeeca4ba60fa4205a79527694390", size = 7203952 }, - { url = "https://files.pythonhosted.org/packages/53/06/80e7256a4677c2e9eb762638e8200a51f6dd56d2e3de3e34d0a83c2f5f80/cryptography-46.0.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:1d2073313324226fd846e6b5fc340ed02d43fd7478f584741bd6b791c33c9fee", size = 7257206 }, - { url = "https://files.pythonhosted.org/packages/3d/b8/a5ed987f5c11b242713076121dddfff999d81fb492149c006a579d0e4099/cryptography-46.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83af84ebe7b6e9b6de05050c79f8cc0173c864ce747b53abce6a11e940efdc0d", size = 4301182 }, - { url = "https://files.pythonhosted.org/packages/da/94/f1c1f30110c05fa5247bf460b17acfd52fa3f5c77e94ba19cff8957dc5e6/cryptography-46.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c3cd09b1490c1509bf3892bde9cef729795fae4a2fee0621f19be3321beca7e4", size = 4562561 }, - { url = "https://files.pythonhosted.org/packages/5d/54/8decbf2f707350bedcd525833d3a0cc0203d8b080d926ad75d5c4de701ba/cryptography-46.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d14eaf1569d6252280516bedaffdd65267428cdbc3a8c2d6de63753cf0863d5e", size = 4301974 }, - { url = "https://files.pythonhosted.org/packages/82/63/c34a2f3516c6b05801f129616a5a1c68a8c403b91f23f9db783ee1d4f700/cryptography-46.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ab3a14cecc741c8c03ad0ad46dfbf18de25218551931a23bca2731d46c706d83", size = 4009462 }, - { url = "https://files.pythonhosted.org/packages/cd/c5/92ef920a4cf8ff35fcf9da5a09f008a6977dcb9801c709799ec1bf2873fb/cryptography-46.0.0-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:8e8b222eb54e3e7d3743a7c2b1f7fa7df7a9add790307bb34327c88ec85fe087", size = 4980769 }, - { url = "https://files.pythonhosted.org/packages/a9/8f/1705f7ea3b9468c4a4fef6cce631db14feb6748499870a4772993cbeb729/cryptography-46.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7f3f88df0c9b248dcc2e76124f9140621aca187ccc396b87bc363f890acf3a30", size = 4591812 }, - { url = "https://files.pythonhosted.org/packages/34/b9/2d797ce9d346b8bac9f570b43e6e14226ff0f625f7f6f2f95d9065e316e3/cryptography-46.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9aa85222f03fdb30defabc7a9e1e3d4ec76eb74ea9fe1504b2800844f9c98440", size = 4301844 }, - { url = "https://files.pythonhosted.org/packages/a8/2d/8efc9712997b46aea2ac8f74adc31f780ac4662e3b107ecad0d5c1a0c7f8/cryptography-46.0.0-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:f9aaf2a91302e1490c068d2f3af7df4137ac2b36600f5bd26e53d9ec320412d3", size = 4943257 }, - { url = "https://files.pythonhosted.org/packages/c4/0c/bc365287a97d28aa7feef8810884831b2a38a8dc4cf0f8d6927ad1568d27/cryptography-46.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:32670ca085150ff36b438c17f2dfc54146fe4a074ebf0a76d72fb1b419a974bc", size = 4591154 }, - { url = "https://files.pythonhosted.org/packages/51/3b/0b15107277b0c558c02027da615f4e78c892f22c6a04d29c6ad43fcddca6/cryptography-46.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0f58183453032727a65e6605240e7a3824fd1d6a7e75d2b537e280286ab79a52", size = 4428200 }, - { url = "https://files.pythonhosted.org/packages/cf/24/814d69418247ea2cfc985eec6678239013500d745bc7a0a35a32c2e2f3be/cryptography-46.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4bc257c2d5d865ed37d0bd7c500baa71f939a7952c424f28632298d80ccd5ec1", size = 4699862 }, - { url = "https://files.pythonhosted.org/packages/fb/1e/665c718e0c45281a4e22454fa8a9bd8835f1ceb667b9ffe807baa41cd681/cryptography-46.0.0-cp38-abi3-win32.whl", hash = "sha256:df932ac70388be034b2e046e34d636245d5eeb8140db24a6b4c2268cd2073270", size = 3043766 }, - { url = "https://files.pythonhosted.org/packages/78/7e/12e1e13abff381c702697845d1cf372939957735f49ef66f2061f38da32f/cryptography-46.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:274f8b2eb3616709f437326185eb563eb4e5813d01ebe2029b61bfe7d9995fbb", size = 3517216 }, - { url = "https://files.pythonhosted.org/packages/ad/55/009497b2ae7375db090b41f9fe7a1a7362f804ddfe17ed9e34f748fcb0e5/cryptography-46.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:249c41f2bbfa026615e7bdca47e4a66135baa81b08509ab240a2e666f6af5966", size = 2923145 }, +sdist = { url = "https://files.pythonhosted.org/packages/80/ee/04cd4314db26ffc951c1ea90bde30dd226880ab9343759d7abbecef377ee/cryptography-46.0.0.tar.gz", hash = "sha256:99f64a6d15f19f3afd78720ad2978f6d8d4c68cd4eb600fab82ab1a7c2071dca", size = 749158, upload-time = "2025-09-16T21:07:49.091Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/bd/3e935ca6e87dc4969683f5dd9e49adaf2cb5734253d93317b6b346e0bd33/cryptography-46.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:c9c4121f9a41cc3d02164541d986f59be31548ad355a5c96ac50703003c50fb7", size = 7285468, upload-time = "2025-09-16T21:05:52.026Z" }, + { url = "https://files.pythonhosted.org/packages/c7/ee/dd17f412ce64b347871d7752657c5084940d42af4d9c25b1b91c7ee53362/cryptography-46.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4f70cbade61a16f5e238c4b0eb4e258d177a2fcb59aa0aae1236594f7b0ae338", size = 4308218, upload-time = "2025-09-16T21:05:55.653Z" }, + { url = "https://files.pythonhosted.org/packages/2f/53/f0b865a971e4e8b3e90e648b6f828950dea4c221bb699421e82ef45f0ef9/cryptography-46.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1eccae15d5c28c74b2bea228775c63ac5b6c36eedb574e002440c0bc28750d3", size = 4571982, upload-time = "2025-09-16T21:05:57.322Z" }, + { url = "https://files.pythonhosted.org/packages/d4/c8/035be5fd63a98284fd74df9e04156f9fed7aa45cef41feceb0d06cbdadd0/cryptography-46.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1b4fba84166d906a22027f0d958e42f3a4dbbb19c28ea71f0fb7812380b04e3c", size = 4307996, upload-time = "2025-09-16T21:05:59.043Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/dbb6d7d0a48b95984e2d4caf0a4c7d6606cea5d30241d984c0c02b47f1b6/cryptography-46.0.0-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:523153480d7575a169933f083eb47b1edd5fef45d87b026737de74ffeb300f69", size = 4015692, upload-time = "2025-09-16T21:06:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/65/48/aafcffdde716f6061864e56a0a5908f08dcb8523dab436228957c8ebd5df/cryptography-46.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:f09a3a108223e319168b7557810596631a8cb864657b0c16ed7a6017f0be9433", size = 4982192, upload-time = "2025-09-16T21:06:03.367Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ab/1e73cfc181afc3054a09e5e8f7753a8fba254592ff50b735d7456d197353/cryptography-46.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c1f6ccd6f2eef3b2eb52837f0463e853501e45a916b3fc42e5d93cf244a4b97b", size = 4603944, upload-time = "2025-09-16T21:06:05.29Z" }, + { url = "https://files.pythonhosted.org/packages/3a/02/d71dac90b77c606c90c366571edf264dc8bd37cf836e7f902253cbf5aa77/cryptography-46.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:80a548a5862d6912a45557a101092cd6c64ae1475b82cef50ee305d14a75f598", size = 4308149, upload-time = "2025-09-16T21:06:07.006Z" }, + { url = "https://files.pythonhosted.org/packages/29/e6/4dcb67fdc6addf4e319a99c4bed25776cb691f3aa6e0c4646474748816c6/cryptography-46.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6c39fd5cd9b7526afa69d64b5e5645a06e1b904f342584b3885254400b63f1b3", size = 4947449, upload-time = "2025-09-16T21:06:11.244Z" }, + { url = "https://files.pythonhosted.org/packages/26/04/91e3fad8ee33aa87815c8f25563f176a58da676c2b14757a4d3b19f0253c/cryptography-46.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:d5c0cbb2fb522f7e39b59a5482a1c9c5923b7c506cfe96a1b8e7368c31617ac0", size = 4603549, upload-time = "2025-09-16T21:06:13.268Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6e/caf4efadcc8f593cbaacfbb04778f78b6d0dac287b45cec25e5054de38b7/cryptography-46.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6d8945bc120dcd90ae39aa841afddaeafc5f2e832809dc54fb906e3db829dfdc", size = 4435976, upload-time = "2025-09-16T21:06:16.514Z" }, + { url = "https://files.pythonhosted.org/packages/c1/c0/704710f349db25c5b91965c3662d5a758011b2511408d9451126429b6cd6/cryptography-46.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:88c09da8a94ac27798f6b62de6968ac78bb94805b5d272dbcfd5fdc8c566999f", size = 4709447, upload-time = "2025-09-16T21:06:19.246Z" }, + { url = "https://files.pythonhosted.org/packages/91/5e/ff63bfd27b75adaf75cc2398de28a0b08105f9d7f8193f3b9b071e38e8b9/cryptography-46.0.0-cp311-abi3-win32.whl", hash = "sha256:3738f50215211cee1974193a1809348d33893696ce119968932ea117bcbc9b1d", size = 3058317, upload-time = "2025-09-16T21:06:21.466Z" }, + { url = "https://files.pythonhosted.org/packages/46/47/4caf35014c4551dd0b43aa6c2e250161f7ffcb9c3918c9e075785047d5d2/cryptography-46.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:bbaa5eef3c19c66613317dc61e211b48d5f550db009c45e1c28b59d5a9b7812a", size = 3523891, upload-time = "2025-09-16T21:06:23.856Z" }, + { url = "https://files.pythonhosted.org/packages/98/66/6a0cafb3084a854acf808fccf756cbc9b835d1b99fb82c4a15e2e2ffb404/cryptography-46.0.0-cp311-abi3-win_arm64.whl", hash = "sha256:16b5ac72a965ec9d1e34d9417dbce235d45fa04dac28634384e3ce40dfc66495", size = 2932145, upload-time = "2025-09-16T21:06:25.842Z" }, + { url = "https://files.pythonhosted.org/packages/f2/5f/0cf967a1dc1419d5dde111bd0e22872038199f4e4655539ea6f4da5ad7f1/cryptography-46.0.0-cp314-abi3-macosx_10_9_universal2.whl", hash = "sha256:91585fc9e696abd7b3e48a463a20dda1a5c0eeeca4ba60fa4205a79527694390", size = 7203952, upload-time = "2025-09-16T21:06:28.21Z" }, + { url = "https://files.pythonhosted.org/packages/53/06/80e7256a4677c2e9eb762638e8200a51f6dd56d2e3de3e34d0a83c2f5f80/cryptography-46.0.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:1d2073313324226fd846e6b5fc340ed02d43fd7478f584741bd6b791c33c9fee", size = 7257206, upload-time = "2025-09-16T21:06:59.295Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b8/a5ed987f5c11b242713076121dddfff999d81fb492149c006a579d0e4099/cryptography-46.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83af84ebe7b6e9b6de05050c79f8cc0173c864ce747b53abce6a11e940efdc0d", size = 4301182, upload-time = "2025-09-16T21:07:01.624Z" }, + { url = "https://files.pythonhosted.org/packages/da/94/f1c1f30110c05fa5247bf460b17acfd52fa3f5c77e94ba19cff8957dc5e6/cryptography-46.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c3cd09b1490c1509bf3892bde9cef729795fae4a2fee0621f19be3321beca7e4", size = 4562561, upload-time = "2025-09-16T21:07:03.386Z" }, + { url = "https://files.pythonhosted.org/packages/5d/54/8decbf2f707350bedcd525833d3a0cc0203d8b080d926ad75d5c4de701ba/cryptography-46.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d14eaf1569d6252280516bedaffdd65267428cdbc3a8c2d6de63753cf0863d5e", size = 4301974, upload-time = "2025-09-16T21:07:04.962Z" }, + { url = "https://files.pythonhosted.org/packages/82/63/c34a2f3516c6b05801f129616a5a1c68a8c403b91f23f9db783ee1d4f700/cryptography-46.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ab3a14cecc741c8c03ad0ad46dfbf18de25218551931a23bca2731d46c706d83", size = 4009462, upload-time = "2025-09-16T21:07:06.569Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c5/92ef920a4cf8ff35fcf9da5a09f008a6977dcb9801c709799ec1bf2873fb/cryptography-46.0.0-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:8e8b222eb54e3e7d3743a7c2b1f7fa7df7a9add790307bb34327c88ec85fe087", size = 4980769, upload-time = "2025-09-16T21:07:08.269Z" }, + { url = "https://files.pythonhosted.org/packages/a9/8f/1705f7ea3b9468c4a4fef6cce631db14feb6748499870a4772993cbeb729/cryptography-46.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7f3f88df0c9b248dcc2e76124f9140621aca187ccc396b87bc363f890acf3a30", size = 4591812, upload-time = "2025-09-16T21:07:10.288Z" }, + { url = "https://files.pythonhosted.org/packages/34/b9/2d797ce9d346b8bac9f570b43e6e14226ff0f625f7f6f2f95d9065e316e3/cryptography-46.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9aa85222f03fdb30defabc7a9e1e3d4ec76eb74ea9fe1504b2800844f9c98440", size = 4301844, upload-time = "2025-09-16T21:07:12.522Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2d/8efc9712997b46aea2ac8f74adc31f780ac4662e3b107ecad0d5c1a0c7f8/cryptography-46.0.0-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:f9aaf2a91302e1490c068d2f3af7df4137ac2b36600f5bd26e53d9ec320412d3", size = 4943257, upload-time = "2025-09-16T21:07:14.289Z" }, + { url = "https://files.pythonhosted.org/packages/c4/0c/bc365287a97d28aa7feef8810884831b2a38a8dc4cf0f8d6927ad1568d27/cryptography-46.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:32670ca085150ff36b438c17f2dfc54146fe4a074ebf0a76d72fb1b419a974bc", size = 4591154, upload-time = "2025-09-16T21:07:16.271Z" }, + { url = "https://files.pythonhosted.org/packages/51/3b/0b15107277b0c558c02027da615f4e78c892f22c6a04d29c6ad43fcddca6/cryptography-46.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0f58183453032727a65e6605240e7a3824fd1d6a7e75d2b537e280286ab79a52", size = 4428200, upload-time = "2025-09-16T21:07:18.118Z" }, + { url = "https://files.pythonhosted.org/packages/cf/24/814d69418247ea2cfc985eec6678239013500d745bc7a0a35a32c2e2f3be/cryptography-46.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4bc257c2d5d865ed37d0bd7c500baa71f939a7952c424f28632298d80ccd5ec1", size = 4699862, upload-time = "2025-09-16T21:07:20.219Z" }, + { url = "https://files.pythonhosted.org/packages/fb/1e/665c718e0c45281a4e22454fa8a9bd8835f1ceb667b9ffe807baa41cd681/cryptography-46.0.0-cp38-abi3-win32.whl", hash = "sha256:df932ac70388be034b2e046e34d636245d5eeb8140db24a6b4c2268cd2073270", size = 3043766, upload-time = "2025-09-16T21:07:21.969Z" }, + { url = "https://files.pythonhosted.org/packages/78/7e/12e1e13abff381c702697845d1cf372939957735f49ef66f2061f38da32f/cryptography-46.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:274f8b2eb3616709f437326185eb563eb4e5813d01ebe2029b61bfe7d9995fbb", size = 3517216, upload-time = "2025-09-16T21:07:24.024Z" }, + { url = "https://files.pythonhosted.org/packages/ad/55/009497b2ae7375db090b41f9fe7a1a7362f804ddfe17ed9e34f748fcb0e5/cryptography-46.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:249c41f2bbfa026615e7bdca47e4a66135baa81b08509ab240a2e666f6af5966", size = 2923145, upload-time = "2025-09-16T21:07:25.74Z" }, ] [[package]] @@ -624,27 +624,27 @@ dependencies = [ { name = "marshmallow" }, { name = "typing-inspect" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/a4/f71d9cf3a5ac257c993b5ca3f93df5f7fb395c725e7f1e6479d2514173c3/dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0", size = 32227 } +sdist = { url = "https://files.pythonhosted.org/packages/64/a4/f71d9cf3a5ac257c993b5ca3f93df5f7fb395c725e7f1e6479d2514173c3/dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0", size = 32227, upload-time = "2024-06-09T16:20:19.103Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 }, + { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" }, ] [[package]] name = "decorator" version = "5.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711 } +sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190 }, + { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, ] [[package]] name = "defusedxml" version = "0.7.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, ] [[package]] @@ -654,9 +654,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" }, ] [[package]] @@ -666,36 +666,36 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788 } +sdist = { url = "https://files.pythonhosted.org/packages/5a/d3/8ae2869247df154b64c1884d7346d412fed0c49df84db635aab2d1c40e62/deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff", size = 173788, upload-time = "2020-04-20T14:23:38.738Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178 }, + { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" }, ] [[package]] name = "dirtyjson" version = "1.0.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/db/04/d24f6e645ad82ba0ef092fa17d9ef7a21953781663648a01c9371d9e8e98/dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd", size = 30782 } +sdist = { url = "https://files.pythonhosted.org/packages/db/04/d24f6e645ad82ba0ef092fa17d9ef7a21953781663648a01c9371d9e8e98/dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd", size = 30782, upload-time = "2022-11-28T23:32:33.319Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 }, + { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197, upload-time = "2022-11-28T23:32:31.219Z" }, ] [[package]] name = "distlib" version = "0.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605 } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047 }, + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] [[package]] name = "distro" version = "1.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] [[package]] @@ -707,9 +707,9 @@ dependencies = [ { name = "sqlparse" }, { name = "tzdata", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/89/76/23ee9b9d2bd4119e930eb19164732b79c0a4f6259ca198209b0fe36551ea/Django-4.2.1.tar.gz", hash = "sha256:7efa6b1f781a6119a10ac94b4794ded90db8accbe7802281cd26f8664ffed59c", size = 10420051 } +sdist = { url = "https://files.pythonhosted.org/packages/89/76/23ee9b9d2bd4119e930eb19164732b79c0a4f6259ca198209b0fe36551ea/Django-4.2.1.tar.gz", hash = "sha256:7efa6b1f781a6119a10ac94b4794ded90db8accbe7802281cd26f8664ffed59c", size = 10420051, upload-time = "2023-05-03T12:58:41.313Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/12/13/78e8622180f101e95297965045ff1325ea7301c1b80f756debbeaa84c3be/Django-4.2.1-py3-none-any.whl", hash = "sha256:066b6debb5ac335458d2a713ed995570536c8b59a580005acb0732378d5eb1ee", size = 7988496 }, + { url = "https://files.pythonhosted.org/packages/12/13/78e8622180f101e95297965045ff1325ea7301c1b80f756debbeaa84c3be/Django-4.2.1-py3-none-any.whl", hash = "sha256:066b6debb5ac335458d2a713ed995570536c8b59a580005acb0732378d5eb1ee", size = 7988496, upload-time = "2023-05-03T12:58:27.208Z" }, ] [[package]] @@ -724,9 +724,9 @@ dependencies = [ { name = "python-crontab" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0b/97/ca63898f76dd43fc91f4791b05dbbecb60dc99215f16b270e9b1e29af974/django-celery-beat-2.5.0.tar.gz", hash = "sha256:cd0a47f5958402f51ac0c715bc942ae33d7b50b4e48cba91bc3f2712be505df1", size = 159635 } +sdist = { url = "https://files.pythonhosted.org/packages/0b/97/ca63898f76dd43fc91f4791b05dbbecb60dc99215f16b270e9b1e29af974/django-celery-beat-2.5.0.tar.gz", hash = "sha256:cd0a47f5958402f51ac0c715bc942ae33d7b50b4e48cba91bc3f2712be505df1", size = 159635, upload-time = "2023-03-14T10:02:10.9Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/92/fa53396870566276357bb81e3fece5b7f8a00f99c91689ff777c481d40e0/django_celery_beat-2.5.0-py3-none-any.whl", hash = "sha256:ae460faa5ea142fba0875409095d22f6bd7bcc7377889b85e8cab5c0dfb781fe", size = 97223 }, + { url = "https://files.pythonhosted.org/packages/c5/92/fa53396870566276357bb81e3fece5b7f8a00f99c91689ff777c481d40e0/django_celery_beat-2.5.0-py3-none-any.whl", hash = "sha256:ae460faa5ea142fba0875409095d22f6bd7bcc7377889b85e8cab5c0dfb781fe", size = 97223, upload-time = "2023-03-14T10:02:00.093Z" }, ] [[package]] @@ -737,9 +737,9 @@ dependencies = [ { name = "asgiref" }, { name = "django" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/39/55822b15b7ec87410f34cd16ce04065ff390e50f9e29f31d6d116fc80456/django_cors_headers-4.9.0.tar.gz", hash = "sha256:fe5d7cb59fdc2c8c646ce84b727ac2bca8912a247e6e68e1fb507372178e59e8", size = 21458 } +sdist = { url = "https://files.pythonhosted.org/packages/21/39/55822b15b7ec87410f34cd16ce04065ff390e50f9e29f31d6d116fc80456/django_cors_headers-4.9.0.tar.gz", hash = "sha256:fe5d7cb59fdc2c8c646ce84b727ac2bca8912a247e6e68e1fb507372178e59e8", size = 21458, upload-time = "2025-09-18T10:40:52.326Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/d8/19ed1e47badf477d17fb177c1c19b5a21da0fd2d9f093f23be3fb86c5fab/django_cors_headers-4.9.0-py3-none-any.whl", hash = "sha256:15c7f20727f90044dcee2216a9fd7303741a864865f0c3657e28b7056f61b449", size = 12809 }, + { url = "https://files.pythonhosted.org/packages/30/d8/19ed1e47badf477d17fb177c1c19b5a21da0fd2d9f093f23be3fb86c5fab/django_cors_headers-4.9.0-py3-none-any.whl", hash = "sha256:15c7f20727f90044dcee2216a9fd7303741a864865f0c3657e28b7056f61b449", size = 12809, upload-time = "2025-09-18T10:40:50.843Z" }, ] [[package]] @@ -750,9 +750,9 @@ dependencies = [ { name = "django" }, { name = "redis" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/9d/2272742fdd9d0a9f0b28cd995b0539430c9467a2192e4de2cea9ea6ad38c/django-redis-5.4.0.tar.gz", hash = "sha256:6a02abaa34b0fea8bf9b707d2c363ab6adc7409950b2db93602e6cb292818c42", size = 52567 } +sdist = { url = "https://files.pythonhosted.org/packages/83/9d/2272742fdd9d0a9f0b28cd995b0539430c9467a2192e4de2cea9ea6ad38c/django-redis-5.4.0.tar.gz", hash = "sha256:6a02abaa34b0fea8bf9b707d2c363ab6adc7409950b2db93602e6cb292818c42", size = 52567, upload-time = "2023-10-01T20:22:01.221Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/f1/63caad7c9222c26a62082f4f777de26389233b7574629996098bf6d25a4d/django_redis-5.4.0-py3-none-any.whl", hash = "sha256:ebc88df7da810732e2af9987f7f426c96204bf89319df4c6da6ca9a2942edd5b", size = 31119 }, + { url = "https://files.pythonhosted.org/packages/b7/f1/63caad7c9222c26a62082f4f777de26389233b7574629996098bf6d25a4d/django_redis-5.4.0-py3-none-any.whl", hash = "sha256:ebc88df7da810732e2af9987f7f426c96204bf89319df4c6da6ca9a2942edd5b", size = 31119, upload-time = "2023-10-01T20:21:33.009Z" }, ] [[package]] @@ -762,7 +762,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9a/2a/da4db7649ac516fc4b89b86d697edb92362c4f6b0ab2d2fe20d1e0f6ab10/django-tenants-3.5.0.tar.gz", hash = "sha256:bed426108e1bd4f962afa38c1e0fd985a3e8c4c902ded60bd57dbf4fcc92d2cc", size = 117503 } +sdist = { url = "https://files.pythonhosted.org/packages/9a/2a/da4db7649ac516fc4b89b86d697edb92362c4f6b0ab2d2fe20d1e0f6ab10/django-tenants-3.5.0.tar.gz", hash = "sha256:bed426108e1bd4f962afa38c1e0fd985a3e8c4c902ded60bd57dbf4fcc92d2cc", size = 117503, upload-time = "2023-05-11T14:10:26.045Z" } [[package]] name = "django-timezone-field" @@ -771,9 +771,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ba/5b/0dbe271fef3c2274b83dbcb1b19fa3dacf1f7e542382819294644e78ea8b/django_timezone_field-7.1.tar.gz", hash = "sha256:b3ef409d88a2718b566fabe10ea996f2838bc72b22d3a2900c0aa905c761380c", size = 13727 } +sdist = { url = "https://files.pythonhosted.org/packages/ba/5b/0dbe271fef3c2274b83dbcb1b19fa3dacf1f7e542382819294644e78ea8b/django_timezone_field-7.1.tar.gz", hash = "sha256:b3ef409d88a2718b566fabe10ea996f2838bc72b22d3a2900c0aa905c761380c", size = 13727, upload-time = "2025-01-11T17:49:54.486Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/09/7a808392a751a24ffa62bec00e3085a9c1a151d728c323a5bab229ea0e58/django_timezone_field-7.1-py3-none-any.whl", hash = "sha256:93914713ed882f5bccda080eda388f7006349f25930b6122e9b07bf8db49c4b4", size = 13177 }, + { url = "https://files.pythonhosted.org/packages/ec/09/7a808392a751a24ffa62bec00e3085a9c1a151d728c323a5bab229ea0e58/django_timezone_field-7.1-py3-none-any.whl", hash = "sha256:93914713ed882f5bccda080eda388f7006349f25930b6122e9b07bf8db49c4b4", size = 13177, upload-time = "2025-01-11T17:49:52.142Z" }, ] [[package]] @@ -784,9 +784,9 @@ dependencies = [ { name = "django" }, { name = "pytz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/53/5b2a002c5ebafd60dff1e1945a7d63dee40155830997439a9ba324f0fd50/djangorestframework-3.14.0.tar.gz", hash = "sha256:579a333e6256b09489cbe0a067e66abe55c6595d8926be6b99423786334350c8", size = 1055343 } +sdist = { url = "https://files.pythonhosted.org/packages/8e/53/5b2a002c5ebafd60dff1e1945a7d63dee40155830997439a9ba324f0fd50/djangorestframework-3.14.0.tar.gz", hash = "sha256:579a333e6256b09489cbe0a067e66abe55c6595d8926be6b99423786334350c8", size = 1055343, upload-time = "2022-09-22T11:38:44.245Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/4b/3b46c0914ba4b7546a758c35fdfa8e7f017fcbe7f23c878239e93623337a/djangorestframework-3.14.0-py3-none-any.whl", hash = "sha256:eb63f58c9f218e1a7d064d17a70751f528ed4e1d35547fdade9aaf4cd103fd08", size = 1062761 }, + { url = "https://files.pythonhosted.org/packages/ff/4b/3b46c0914ba4b7546a758c35fdfa8e7f017fcbe7f23c878239e93623337a/djangorestframework-3.14.0-py3-none-any.whl", hash = "sha256:eb63f58c9f218e1a7d064d17a70751f528ed4e1d35547fdade9aaf4cd103fd08", size = 1062761, upload-time = "2022-09-22T11:38:41.825Z" }, ] [[package]] @@ -800,18 +800,18 @@ dependencies = [ { name = "urllib3" }, { name = "websocket-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f0/73/f7c9a14e88e769f38cb7fb45aa88dfd795faa8e18aea11bababf6e068d5e/docker-6.1.3.tar.gz", hash = "sha256:aa6d17830045ba5ef0168d5eaa34d37beeb113948c413affe1d5991fc11f9a20", size = 259301 } +sdist = { url = "https://files.pythonhosted.org/packages/f0/73/f7c9a14e88e769f38cb7fb45aa88dfd795faa8e18aea11bababf6e068d5e/docker-6.1.3.tar.gz", hash = "sha256:aa6d17830045ba5ef0168d5eaa34d37beeb113948c413affe1d5991fc11f9a20", size = 259301, upload-time = "2023-06-01T14:24:49.268Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/be/3032490fa33b36ddc8c4b1da3252c6f974e7133f1a50de00c6b85cca203a/docker-6.1.3-py3-none-any.whl", hash = "sha256:aecd2277b8bf8e506e484f6ab7aec39abe0038e29fa4a6d3ba86c3fe01844ed9", size = 148096 }, + { url = "https://files.pythonhosted.org/packages/db/be/3032490fa33b36ddc8c4b1da3252c6f974e7133f1a50de00c6b85cca203a/docker-6.1.3-py3-none-any.whl", hash = "sha256:aecd2277b8bf8e506e484f6ab7aec39abe0038e29fa4a6d3ba86c3fe01844ed9", size = 148096, upload-time = "2023-06-01T14:24:47.769Z" }, ] [[package]] name = "docutils" version = "0.20.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 } +sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365, upload-time = "2023-05-16T23:39:19.748Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 }, + { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666, upload-time = "2023-05-16T23:39:15.976Z" }, ] [[package]] @@ -827,9 +827,9 @@ dependencies = [ { name = "pyyaml" }, { name = "uritemplate" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/89/e4/8f619b63bd8095f3797d41da186c707dd9add86b86341d1f350f1d15b2dd/drf-yasg-1.21.7.tar.gz", hash = "sha256:4c3b93068b3dfca6969ab111155e4dd6f7b2d680b98778de8fd460b7837bdb0d", size = 4512723 } +sdist = { url = "https://files.pythonhosted.org/packages/89/e4/8f619b63bd8095f3797d41da186c707dd9add86b86341d1f350f1d15b2dd/drf-yasg-1.21.7.tar.gz", hash = "sha256:4c3b93068b3dfca6969ab111155e4dd6f7b2d680b98778de8fd460b7837bdb0d", size = 4512723, upload-time = "2023-07-20T13:47:34.308Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/a5/9fedcd955821ec3b4d26b8a723081eb0f400b7f0bc51f1f49136648423ff/drf_yasg-1.21.7-py3-none-any.whl", hash = "sha256:f85642072c35e684356475781b7ecf5d218fff2c6185c040664dd49f0a4be181", size = 4289125 }, + { url = "https://files.pythonhosted.org/packages/26/a5/9fedcd955821ec3b4d26b8a723081eb0f400b7f0bc51f1f49136648423ff/drf_yasg-1.21.7-py3-none-any.whl", hash = "sha256:f85642072c35e684356475781b7ecf5d218fff2c6185c040664dd49f0a4be181", size = 4289125, upload-time = "2023-07-20T13:47:31.301Z" }, ] [[package]] @@ -841,9 +841,9 @@ dependencies = [ { name = "six" }, { name = "stone" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9e/56/ac085f58e8e0d0bcafdf98c2605e454ac946e3d0c72679669ae112dc30be/dropbox-12.0.2.tar.gz", hash = "sha256:50057fd5ad5fcf047f542dfc6747a896e7ef982f1b5f8500daf51f3abd609962", size = 560236 } +sdist = { url = "https://files.pythonhosted.org/packages/9e/56/ac085f58e8e0d0bcafdf98c2605e454ac946e3d0c72679669ae112dc30be/dropbox-12.0.2.tar.gz", hash = "sha256:50057fd5ad5fcf047f542dfc6747a896e7ef982f1b5f8500daf51f3abd609962", size = 560236, upload-time = "2024-06-03T16:45:30.448Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/de/95d8204d9a20fbdb353c5f8e4229b0fcb90f22b96f8246ff1f47c8a45fd5/dropbox-12.0.2-py3-none-any.whl", hash = "sha256:c5b7e9c2668adb6b12dcecd84342565dc50f7d35ab6a748d155cb79040979d1c", size = 572076 }, + { url = "https://files.pythonhosted.org/packages/2d/de/95d8204d9a20fbdb353c5f8e4229b0fcb90f22b96f8246ff1f47c8a45fd5/dropbox-12.0.2-py3-none-any.whl", hash = "sha256:c5b7e9c2668adb6b12dcecd84342565dc50f7d35ab6a748d155cb79040979d1c", size = 572076, upload-time = "2024-06-03T16:45:28.153Z" }, ] [[package]] @@ -855,78 +855,78 @@ dependencies = [ { name = "fsspec" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/97/15/6d8f4c3033ad2bc364b8bb613c52c96653f2268f32ecff4f3ab5f1d7c19b/dropboxdrivefs-1.4.1.tar.gz", hash = "sha256:6f3c6061d045813553ce91ed0e2b682f1d70bec74011943c92b3181faacefd34", size = 7413 } +sdist = { url = "https://files.pythonhosted.org/packages/97/15/6d8f4c3033ad2bc364b8bb613c52c96653f2268f32ecff4f3ab5f1d7c19b/dropboxdrivefs-1.4.1.tar.gz", hash = "sha256:6f3c6061d045813553ce91ed0e2b682f1d70bec74011943c92b3181faacefd34", size = 7413, upload-time = "2024-05-27T14:04:37.648Z" } [[package]] name = "fastuuid" version = "0.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232 } +sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164 }, - { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837 }, - { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370 }, - { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766 }, - { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105 }, - { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564 }, - { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659 }, - { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430 }, - { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894 }, - { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374 }, - { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550 }, + { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" }, + { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" }, + { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" }, + { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" }, + { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" }, + { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" }, + { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" }, + { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" }, + { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" }, + { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" }, ] [[package]] name = "filelock" version = "3.19.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/40/bb/0ab3e58d22305b6f5440629d20683af28959bf793d98d11950e305c1c326/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", size = 17687 } +sdist = { url = "https://files.pythonhosted.org/packages/40/bb/0ab3e58d22305b6f5440629d20683af28959bf793d98d11950e305c1c326/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", size = 17687, upload-time = "2025-08-14T16:56:03.016Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988 }, + { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" }, ] [[package]] name = "filetype" version = "1.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020 } +sdist = { url = "https://files.pythonhosted.org/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020, upload-time = "2022-11-02T17:34:04.141Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970 }, + { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" }, ] [[package]] name = "frozenlist" version = "1.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/79/b1/b64018016eeb087db503b038296fd782586432b9c077fc5c7839e9cb6ef6/frozenlist-1.7.0.tar.gz", hash = "sha256:2e310d81923c2437ea8670467121cc3e9b0f76d3043cc1d2331d56c7fb7a3a8f", size = 45078 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a2/c8131383f1e66adad5f6ecfcce383d584ca94055a34d683bbb24ac5f2f1c/frozenlist-1.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3dbf9952c4bb0e90e98aec1bd992b3318685005702656bc6f67c1a32b76787f2", size = 81424 }, - { url = "https://files.pythonhosted.org/packages/4c/9d/02754159955088cb52567337d1113f945b9e444c4960771ea90eb73de8db/frozenlist-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1f5906d3359300b8a9bb194239491122e6cf1444c2efb88865426f170c262cdb", size = 47952 }, - { url = "https://files.pythonhosted.org/packages/01/7a/0046ef1bd6699b40acd2067ed6d6670b4db2f425c56980fa21c982c2a9db/frozenlist-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3dabd5a8f84573c8d10d8859a50ea2dec01eea372031929871368c09fa103478", size = 46688 }, - { url = "https://files.pythonhosted.org/packages/d6/a2/a910bafe29c86997363fb4c02069df4ff0b5bc39d33c5198b4e9dd42d8f8/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa57daa5917f1738064f302bf2626281a1cb01920c32f711fbc7bc36111058a8", size = 243084 }, - { url = "https://files.pythonhosted.org/packages/64/3e/5036af9d5031374c64c387469bfcc3af537fc0f5b1187d83a1cf6fab1639/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c193dda2b6d49f4c4398962810fa7d7c78f032bf45572b3e04dd5249dff27e08", size = 233524 }, - { url = "https://files.pythonhosted.org/packages/06/39/6a17b7c107a2887e781a48ecf20ad20f1c39d94b2a548c83615b5b879f28/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe2b675cf0aaa6d61bf8fbffd3c274b3c9b7b1623beb3809df8a81399a4a9c4", size = 248493 }, - { url = "https://files.pythonhosted.org/packages/be/00/711d1337c7327d88c44d91dd0f556a1c47fb99afc060ae0ef66b4d24793d/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8fc5d5cda37f62b262405cf9652cf0856839c4be8ee41be0afe8858f17f4c94b", size = 244116 }, - { url = "https://files.pythonhosted.org/packages/24/fe/74e6ec0639c115df13d5850e75722750adabdc7de24e37e05a40527ca539/frozenlist-1.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d5ce521d1dd7d620198829b87ea002956e4319002ef0bc8d3e6d045cb4646e", size = 224557 }, - { url = "https://files.pythonhosted.org/packages/8d/db/48421f62a6f77c553575201e89048e97198046b793f4a089c79a6e3268bd/frozenlist-1.7.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:488d0a7d6a0008ca0db273c542098a0fa9e7dfaa7e57f70acef43f32b3f69dca", size = 241820 }, - { url = "https://files.pythonhosted.org/packages/1d/fa/cb4a76bea23047c8462976ea7b7a2bf53997a0ca171302deae9d6dd12096/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:15a7eaba63983d22c54d255b854e8108e7e5f3e89f647fc854bd77a237e767df", size = 236542 }, - { url = "https://files.pythonhosted.org/packages/5d/32/476a4b5cfaa0ec94d3f808f193301debff2ea42288a099afe60757ef6282/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1eaa7e9c6d15df825bf255649e05bd8a74b04a4d2baa1ae46d9c2d00b2ca2cb5", size = 249350 }, - { url = "https://files.pythonhosted.org/packages/8d/ba/9a28042f84a6bf8ea5dbc81cfff8eaef18d78b2a1ad9d51c7bc5b029ad16/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4389e06714cfa9d47ab87f784a7c5be91d3934cd6e9a7b85beef808297cc025", size = 225093 }, - { url = "https://files.pythonhosted.org/packages/bc/29/3a32959e68f9cf000b04e79ba574527c17e8842e38c91d68214a37455786/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:73bd45e1488c40b63fe5a7df892baf9e2a4d4bb6409a2b3b78ac1c6236178e01", size = 245482 }, - { url = "https://files.pythonhosted.org/packages/80/e8/edf2f9e00da553f07f5fa165325cfc302dead715cab6ac8336a5f3d0adc2/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99886d98e1643269760e5fe0df31e5ae7050788dd288947f7f007209b8c33f08", size = 249590 }, - { url = "https://files.pythonhosted.org/packages/1c/80/9a0eb48b944050f94cc51ee1c413eb14a39543cc4f760ed12657a5a3c45a/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:290a172aae5a4c278c6da8a96222e6337744cd9c77313efe33d5670b9f65fc43", size = 237785 }, - { url = "https://files.pythonhosted.org/packages/f3/74/87601e0fb0369b7a2baf404ea921769c53b7ae00dee7dcfe5162c8c6dbf0/frozenlist-1.7.0-cp312-cp312-win32.whl", hash = "sha256:426c7bc70e07cfebc178bc4c2bf2d861d720c4fff172181eeb4a4c41d4ca2ad3", size = 39487 }, - { url = "https://files.pythonhosted.org/packages/0b/15/c026e9a9fc17585a9d461f65d8593d281fedf55fbf7eb53f16c6df2392f9/frozenlist-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:563b72efe5da92e02eb68c59cb37205457c977aa7a449ed1b37e6939e5c47c6a", size = 43874 }, - { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106 }, +sdist = { url = "https://files.pythonhosted.org/packages/79/b1/b64018016eeb087db503b038296fd782586432b9c077fc5c7839e9cb6ef6/frozenlist-1.7.0.tar.gz", hash = "sha256:2e310d81923c2437ea8670467121cc3e9b0f76d3043cc1d2331d56c7fb7a3a8f", size = 45078, upload-time = "2025-06-09T23:02:35.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a2/c8131383f1e66adad5f6ecfcce383d584ca94055a34d683bbb24ac5f2f1c/frozenlist-1.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3dbf9952c4bb0e90e98aec1bd992b3318685005702656bc6f67c1a32b76787f2", size = 81424, upload-time = "2025-06-09T23:00:42.24Z" }, + { url = "https://files.pythonhosted.org/packages/4c/9d/02754159955088cb52567337d1113f945b9e444c4960771ea90eb73de8db/frozenlist-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1f5906d3359300b8a9bb194239491122e6cf1444c2efb88865426f170c262cdb", size = 47952, upload-time = "2025-06-09T23:00:43.481Z" }, + { url = "https://files.pythonhosted.org/packages/01/7a/0046ef1bd6699b40acd2067ed6d6670b4db2f425c56980fa21c982c2a9db/frozenlist-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3dabd5a8f84573c8d10d8859a50ea2dec01eea372031929871368c09fa103478", size = 46688, upload-time = "2025-06-09T23:00:44.793Z" }, + { url = "https://files.pythonhosted.org/packages/d6/a2/a910bafe29c86997363fb4c02069df4ff0b5bc39d33c5198b4e9dd42d8f8/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa57daa5917f1738064f302bf2626281a1cb01920c32f711fbc7bc36111058a8", size = 243084, upload-time = "2025-06-09T23:00:46.125Z" }, + { url = "https://files.pythonhosted.org/packages/64/3e/5036af9d5031374c64c387469bfcc3af537fc0f5b1187d83a1cf6fab1639/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c193dda2b6d49f4c4398962810fa7d7c78f032bf45572b3e04dd5249dff27e08", size = 233524, upload-time = "2025-06-09T23:00:47.73Z" }, + { url = "https://files.pythonhosted.org/packages/06/39/6a17b7c107a2887e781a48ecf20ad20f1c39d94b2a548c83615b5b879f28/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe2b675cf0aaa6d61bf8fbffd3c274b3c9b7b1623beb3809df8a81399a4a9c4", size = 248493, upload-time = "2025-06-09T23:00:49.742Z" }, + { url = "https://files.pythonhosted.org/packages/be/00/711d1337c7327d88c44d91dd0f556a1c47fb99afc060ae0ef66b4d24793d/frozenlist-1.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8fc5d5cda37f62b262405cf9652cf0856839c4be8ee41be0afe8858f17f4c94b", size = 244116, upload-time = "2025-06-09T23:00:51.352Z" }, + { url = "https://files.pythonhosted.org/packages/24/fe/74e6ec0639c115df13d5850e75722750adabdc7de24e37e05a40527ca539/frozenlist-1.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d5ce521d1dd7d620198829b87ea002956e4319002ef0bc8d3e6d045cb4646e", size = 224557, upload-time = "2025-06-09T23:00:52.855Z" }, + { url = "https://files.pythonhosted.org/packages/8d/db/48421f62a6f77c553575201e89048e97198046b793f4a089c79a6e3268bd/frozenlist-1.7.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:488d0a7d6a0008ca0db273c542098a0fa9e7dfaa7e57f70acef43f32b3f69dca", size = 241820, upload-time = "2025-06-09T23:00:54.43Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fa/cb4a76bea23047c8462976ea7b7a2bf53997a0ca171302deae9d6dd12096/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:15a7eaba63983d22c54d255b854e8108e7e5f3e89f647fc854bd77a237e767df", size = 236542, upload-time = "2025-06-09T23:00:56.409Z" }, + { url = "https://files.pythonhosted.org/packages/5d/32/476a4b5cfaa0ec94d3f808f193301debff2ea42288a099afe60757ef6282/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1eaa7e9c6d15df825bf255649e05bd8a74b04a4d2baa1ae46d9c2d00b2ca2cb5", size = 249350, upload-time = "2025-06-09T23:00:58.468Z" }, + { url = "https://files.pythonhosted.org/packages/8d/ba/9a28042f84a6bf8ea5dbc81cfff8eaef18d78b2a1ad9d51c7bc5b029ad16/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4389e06714cfa9d47ab87f784a7c5be91d3934cd6e9a7b85beef808297cc025", size = 225093, upload-time = "2025-06-09T23:01:00.015Z" }, + { url = "https://files.pythonhosted.org/packages/bc/29/3a32959e68f9cf000b04e79ba574527c17e8842e38c91d68214a37455786/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:73bd45e1488c40b63fe5a7df892baf9e2a4d4bb6409a2b3b78ac1c6236178e01", size = 245482, upload-time = "2025-06-09T23:01:01.474Z" }, + { url = "https://files.pythonhosted.org/packages/80/e8/edf2f9e00da553f07f5fa165325cfc302dead715cab6ac8336a5f3d0adc2/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99886d98e1643269760e5fe0df31e5ae7050788dd288947f7f007209b8c33f08", size = 249590, upload-time = "2025-06-09T23:01:02.961Z" }, + { url = "https://files.pythonhosted.org/packages/1c/80/9a0eb48b944050f94cc51ee1c413eb14a39543cc4f760ed12657a5a3c45a/frozenlist-1.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:290a172aae5a4c278c6da8a96222e6337744cd9c77313efe33d5670b9f65fc43", size = 237785, upload-time = "2025-06-09T23:01:05.095Z" }, + { url = "https://files.pythonhosted.org/packages/f3/74/87601e0fb0369b7a2baf404ea921769c53b7ae00dee7dcfe5162c8c6dbf0/frozenlist-1.7.0-cp312-cp312-win32.whl", hash = "sha256:426c7bc70e07cfebc178bc4c2bf2d861d720c4fff172181eeb4a4c41d4ca2ad3", size = 39487, upload-time = "2025-06-09T23:01:06.54Z" }, + { url = "https://files.pythonhosted.org/packages/0b/15/c026e9a9fc17585a9d461f65d8593d281fedf55fbf7eb53f16c6df2392f9/frozenlist-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:563b72efe5da92e02eb68c59cb37205457c977aa7a449ed1b37e6939e5c47c6a", size = 43874, upload-time = "2025-06-09T23:01:07.752Z" }, + { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106, upload-time = "2025-06-09T23:02:34.204Z" }, ] [[package]] name = "fsspec" version = "2024.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a0/52/f16a068ebadae42526484c31f4398e62962504e5724a8ba5dc3409483df2/fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493", size = 286853 } +sdist = { url = "https://files.pythonhosted.org/packages/a0/52/f16a068ebadae42526484c31f4398e62962504e5724a8ba5dc3409483df2/fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493", size = 286853, upload-time = "2024-10-21T01:21:16.969Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/b2/454d6e7f0158951d8a78c2e1eb4f69ae81beb8dca5fee9809c6c99e9d0d0/fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871", size = 179641 }, + { url = "https://files.pythonhosted.org/packages/c6/b2/454d6e7f0158951d8a78c2e1eb4f69ae81beb8dca5fee9809c6c99e9d0d0/fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871", size = 179641, upload-time = "2024-10-21T01:21:14.793Z" }, ] [package.optional-dependencies] @@ -938,9 +938,9 @@ sftp = [ name = "funcy" version = "2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/70/b8/c6081521ff70afdff55cd9512b2220bbf4fa88804dae51d1b57b4b58ef32/funcy-2.0.tar.gz", hash = "sha256:3963315d59d41c6f30c04bc910e10ab50a3ac4a225868bfa96feed133df075cb", size = 537931 } +sdist = { url = "https://files.pythonhosted.org/packages/70/b8/c6081521ff70afdff55cd9512b2220bbf4fa88804dae51d1b57b4b58ef32/funcy-2.0.tar.gz", hash = "sha256:3963315d59d41c6f30c04bc910e10ab50a3ac4a225868bfa96feed133df075cb", size = 537931, upload-time = "2023-03-28T06:22:46.764Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/08/c2409cb01d5368dcfedcbaffa7d044cc8957d57a9d0855244a5eb4709d30/funcy-2.0-py2.py3-none-any.whl", hash = "sha256:53df23c8bb1651b12f095df764bfb057935d49537a56de211b098f4c79614bb0", size = 30891 }, + { url = "https://files.pythonhosted.org/packages/d5/08/c2409cb01d5368dcfedcbaffa7d044cc8957d57a9d0855244a5eb4709d30/funcy-2.0-py2.py3-none-any.whl", hash = "sha256:53df23c8bb1651b12f095df764bfb057935d49537a56de211b098f4c79614bb0", size = 30891, upload-time = "2023-03-28T06:22:42.576Z" }, ] [[package]] @@ -956,9 +956,9 @@ dependencies = [ { name = "google-cloud-storage" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/1e/1d8c4593d9e2eb04918fec43253ab152823d67ad51ad9e3ab6b3a78c431a/gcsfs-2024.10.0.tar.gz", hash = "sha256:5df54cfe568e8fdeea5aafa7fed695cdc69a9a674e991ca8c1ce634f5df1d314", size = 79588 } +sdist = { url = "https://files.pythonhosted.org/packages/e5/1e/1d8c4593d9e2eb04918fec43253ab152823d67ad51ad9e3ab6b3a78c431a/gcsfs-2024.10.0.tar.gz", hash = "sha256:5df54cfe568e8fdeea5aafa7fed695cdc69a9a674e991ca8c1ce634f5df1d314", size = 79588, upload-time = "2024-10-21T13:43:26.163Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/96/d60e835fb7d10166c77aef0c1fa30e634153c03a0f486786977b95f88fde/gcsfs-2024.10.0-py2.py3-none-any.whl", hash = "sha256:bb2d23547e61203ea2dda5fa6c4b91a0c34b74ebe8bb6ab1926f6c33381bceb2", size = 34953 }, + { url = "https://files.pythonhosted.org/packages/dc/96/d60e835fb7d10166c77aef0c1fa30e634153c03a0f486786977b95f88fde/gcsfs-2024.10.0-py2.py3-none-any.whl", hash = "sha256:bb2d23547e61203ea2dda5fa6c4b91a0c34b74ebe8bb6ab1926f6c33381bceb2", size = 34953, upload-time = "2024-10-21T13:43:24.951Z" }, ] [[package]] @@ -972,9 +972,9 @@ dependencies = [ { name = "protobuf" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/21/e9d043e88222317afdbdb567165fdbc3b0aad90064c7e0c9eb0ad9955ad8/google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8", size = 165443 } +sdist = { url = "https://files.pythonhosted.org/packages/dc/21/e9d043e88222317afdbdb567165fdbc3b0aad90064c7e0c9eb0ad9955ad8/google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8", size = 165443, upload-time = "2025-06-12T20:52:20.439Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807 }, + { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807, upload-time = "2025-06-12T20:52:19.334Z" }, ] [package.optional-dependencies] @@ -994,9 +994,9 @@ dependencies = [ { name = "httplib2" }, { name = "uritemplate" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fa/1f/49a2c83fc6dcd8b127cc9efbecf7d5fc36109c2028ba22ed6cb4d072fca4/google_api_python_client-2.183.0.tar.gz", hash = "sha256:abae37e04fecf719388e5c02f707ed9cdf952f10b217c79a3e76c636762e3ea9", size = 13645623 } +sdist = { url = "https://files.pythonhosted.org/packages/fa/1f/49a2c83fc6dcd8b127cc9efbecf7d5fc36109c2028ba22ed6cb4d072fca4/google_api_python_client-2.183.0.tar.gz", hash = "sha256:abae37e04fecf719388e5c02f707ed9cdf952f10b217c79a3e76c636762e3ea9", size = 13645623, upload-time = "2025-09-23T22:27:00.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/06/1974f937172854bc7622eff5c2390f33542ceb843f305922922c8f5f7f17/google_api_python_client-2.183.0-py3-none-any.whl", hash = "sha256:2005b6e86c27be1db1a43f43e047a0f8e004159f3cceddecb08cf1624bddba31", size = 14214837 }, + { url = "https://files.pythonhosted.org/packages/ab/06/1974f937172854bc7622eff5c2390f33542ceb843f305922922c8f5f7f17/google_api_python_client-2.183.0-py3-none-any.whl", hash = "sha256:2005b6e86c27be1db1a43f43e047a0f8e004159f3cceddecb08cf1624bddba31", size = 14214837, upload-time = "2025-09-23T22:26:57.758Z" }, ] [[package]] @@ -1010,9 +1010,9 @@ dependencies = [ { name = "six" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4b/e0/d2c96098280f17eb626d4da0b7e553b8e5648d57514c8cefec851c16920c/google-auth-2.20.0.tar.gz", hash = "sha256:030af34138909ccde0fbce611afc178f1d65d32fbff281f25738b1fe1c6f3eaa", size = 229669 } +sdist = { url = "https://files.pythonhosted.org/packages/4b/e0/d2c96098280f17eb626d4da0b7e553b8e5648d57514c8cefec851c16920c/google-auth-2.20.0.tar.gz", hash = "sha256:030af34138909ccde0fbce611afc178f1d65d32fbff281f25738b1fe1c6f3eaa", size = 229669, upload-time = "2023-06-13T17:50:38.754Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/1a/5866a7c6e16abc1df395e6d2b9808984d0905c747d75f5e20f1a052421d1/google_auth-2.20.0-py2.py3-none-any.whl", hash = "sha256:23b7b0950fcda519bfb6692bf0d5289d2ea49fc143717cc7188458ec620e63fa", size = 181456 }, + { url = "https://files.pythonhosted.org/packages/9a/1a/5866a7c6e16abc1df395e6d2b9808984d0905c747d75f5e20f1a052421d1/google_auth-2.20.0-py2.py3-none-any.whl", hash = "sha256:23b7b0950fcda519bfb6692bf0d5289d2ea49fc143717cc7188458ec620e63fa", size = 181456, upload-time = "2023-06-13T17:50:36.408Z" }, ] [[package]] @@ -1023,9 +1023,9 @@ dependencies = [ { name = "google-auth" }, { name = "httplib2" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842 } +sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253 }, + { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" }, ] [[package]] @@ -1036,9 +1036,9 @@ dependencies = [ { name = "google-auth" }, { name = "requests-oauthlib" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955 } +sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072 }, + { url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" }, ] [[package]] @@ -1056,9 +1056,9 @@ dependencies = [ { name = "python-dateutil" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/ff/2c520952db184dec31e2ee988cfa37fa9e7776935a3f2eccc44252ecab5f/google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974", size = 410777 } +sdist = { url = "https://files.pythonhosted.org/packages/62/ff/2c520952db184dec31e2ee988cfa37fa9e7776935a3f2eccc44252ecab5f/google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974", size = 410777, upload-time = "2023-07-19T23:12:12.7Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/6a/d0ef792288f2fa2cfea80899a82de302b3332dfda41984fe114e2cfbf700/google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1", size = 219607 }, + { url = "https://files.pythonhosted.org/packages/cc/6a/d0ef792288f2fa2cfea80899a82de302b3332dfda41984fe114e2cfbf700/google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1", size = 219607, upload-time = "2023-07-19T23:12:09.449Z" }, ] [[package]] @@ -1069,9 +1069,9 @@ dependencies = [ { name = "google-api-core" }, { name = "google-auth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d6/b8/2b53838d2acd6ec6168fd284a990c76695e84c65deee79c9f3a4276f6b4f/google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53", size = 35861 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/b8/2b53838d2acd6ec6168fd284a990c76695e84c65deee79c9f3a4276f6b4f/google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53", size = 35861, upload-time = "2025-03-10T21:05:38.948Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/86/bda7241a8da2d28a754aad2ba0f6776e35b67e37c36ae0c45d49370f1014/google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e", size = 29348 }, + { url = "https://files.pythonhosted.org/packages/40/86/bda7241a8da2d28a754aad2ba0f6776e35b67e37c36ae0c45d49370f1014/google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e", size = 29348, upload-time = "2025-03-10T21:05:37.785Z" }, ] [[package]] @@ -1084,9 +1084,9 @@ dependencies = [ { name = "proto-plus" }, { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/48/6b/92b705f408c1d928526b65d1259be4254ef1f45e620f01f8665156b4d781/google-cloud-secret-manager-2.16.1.tar.gz", hash = "sha256:149d11ce9be7ea81d4ac3544d3fcd4c716a9edb2cb775d9c075231570b079fbb", size = 128884 } +sdist = { url = "https://files.pythonhosted.org/packages/48/6b/92b705f408c1d928526b65d1259be4254ef1f45e620f01f8665156b4d781/google-cloud-secret-manager-2.16.1.tar.gz", hash = "sha256:149d11ce9be7ea81d4ac3544d3fcd4c716a9edb2cb775d9c075231570b079fbb", size = 128884, upload-time = "2023-03-27T14:51:09.684Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/e3/c3aade516eaf544bd7d86694178de9c2da8eff8fc40326d0265acc65991d/google_cloud_secret_manager-2.16.1-py2.py3-none-any.whl", hash = "sha256:dad28c24921fb62961aafe808be0e7935a99096f03ac29eeeefa04b85534c1f3", size = 116749 }, + { url = "https://files.pythonhosted.org/packages/6c/e3/c3aade516eaf544bd7d86694178de9c2da8eff8fc40326d0265acc65991d/google_cloud_secret_manager-2.16.1-py2.py3-none-any.whl", hash = "sha256:dad28c24921fb62961aafe808be0e7935a99096f03ac29eeeefa04b85534c1f3", size = 116749, upload-time = "2023-03-27T14:51:07.661Z" }, ] [[package]] @@ -1100,22 +1100,22 @@ dependencies = [ { name = "google-resumable-media" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/50/c9998f84fd8ce8799d7f8020466bbc5c9e3b1126b04a09fdb02378d451b0/google-cloud-storage-2.9.0.tar.gz", hash = "sha256:9b6ae7b509fc294bdacb84d0f3ea8e20e2c54a8b4bbe39c5707635fec214eff3", size = 5498811 } +sdist = { url = "https://files.pythonhosted.org/packages/fc/50/c9998f84fd8ce8799d7f8020466bbc5c9e3b1126b04a09fdb02378d451b0/google-cloud-storage-2.9.0.tar.gz", hash = "sha256:9b6ae7b509fc294bdacb84d0f3ea8e20e2c54a8b4bbe39c5707635fec214eff3", size = 5498811, upload-time = "2023-05-04T17:56:46.265Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/74/fb/3770e7f44cf6133f502e1b8503b6739351b53272cf8313b47f1de6cf4960/google_cloud_storage-2.9.0-py2.py3-none-any.whl", hash = "sha256:83a90447f23d5edd045e0037982c270302e3aeb45fc1288d2c2ca713d27bad94", size = 113512 }, + { url = "https://files.pythonhosted.org/packages/74/fb/3770e7f44cf6133f502e1b8503b6739351b53272cf8313b47f1de6cf4960/google_cloud_storage-2.9.0-py2.py3-none-any.whl", hash = "sha256:83a90447f23d5edd045e0037982c270302e3aeb45fc1288d2c2ca713d27bad94", size = 113512, upload-time = "2023-05-04T17:56:43.929Z" }, ] [[package]] name = "google-crc32c" version = "1.7.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/19/ae/87802e6d9f9d69adfaedfcfd599266bf386a54d0be058b532d04c794f76d/google_crc32c-1.7.1.tar.gz", hash = "sha256:2bff2305f98846f3e825dbeec9ee406f89da7962accdb29356e4eadc251bd472", size = 14495 } +sdist = { url = "https://files.pythonhosted.org/packages/19/ae/87802e6d9f9d69adfaedfcfd599266bf386a54d0be058b532d04c794f76d/google_crc32c-1.7.1.tar.gz", hash = "sha256:2bff2305f98846f3e825dbeec9ee406f89da7962accdb29356e4eadc251bd472", size = 14495, upload-time = "2025-03-26T14:29:13.32Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470 }, - { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315 }, - { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180 }, - { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794 }, - { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477 }, + { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470, upload-time = "2025-03-26T14:34:31.655Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315, upload-time = "2025-03-26T15:01:54.634Z" }, + { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180, upload-time = "2025-03-26T14:41:32.168Z" }, + { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794, upload-time = "2025-03-26T14:41:33.264Z" }, + { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477, upload-time = "2025-03-26T14:29:10.94Z" }, ] [[package]] @@ -1125,9 +1125,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-crc32c" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/5a/0efdc02665dca14e0837b62c8a1a93132c264bd02054a15abb2218afe0ae/google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0", size = 2163099 } +sdist = { url = "https://files.pythonhosted.org/packages/58/5a/0efdc02665dca14e0837b62c8a1a93132c264bd02054a15abb2218afe0ae/google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0", size = 2163099, upload-time = "2024-08-07T22:20:38.555Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/35/b8d3baf8c46695858cb9d8835a53baa1eeb9906ddaf2f728a5f5b640fd1e/google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa", size = 81251 }, + { url = "https://files.pythonhosted.org/packages/82/35/b8d3baf8c46695858cb9d8835a53baa1eeb9906ddaf2f728a5f5b640fd1e/google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa", size = 81251, upload-time = "2024-08-07T22:20:36.409Z" }, ] [[package]] @@ -1137,9 +1137,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/39/24/33db22342cf4a2ea27c9955e6713140fedd51e8b141b5ce5260897020f1a/googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257", size = 145903 } +sdist = { url = "https://files.pythonhosted.org/packages/39/24/33db22342cf4a2ea27c9955e6713140fedd51e8b141b5ce5260897020f1a/googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257", size = 145903, upload-time = "2025-04-14T10:17:02.924Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530 }, + { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" }, ] [package.optional-dependencies] @@ -1151,19 +1151,19 @@ grpc = [ name = "greenlet" version = "3.2.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260 } +sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079 }, - { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997 }, - { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185 }, - { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926 }, - { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839 }, - { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586 }, - { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281 }, - { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142 }, - { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846 }, - { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814 }, - { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899 }, + { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, + { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, + { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, + { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, + { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" }, + { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, ] [[package]] @@ -1173,9 +1173,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ec/d7/6c09dd7ce4c7837e4cdb11dce980cb45ae3cd87677298dc3b781b6bce7d3/griffe-1.14.0.tar.gz", hash = "sha256:9d2a15c1eca966d68e00517de5d69dd1bc5c9f2335ef6c1775362ba5b8651a13", size = 424684 } +sdist = { url = "https://files.pythonhosted.org/packages/ec/d7/6c09dd7ce4c7837e4cdb11dce980cb45ae3cd87677298dc3b781b6bce7d3/griffe-1.14.0.tar.gz", hash = "sha256:9d2a15c1eca966d68e00517de5d69dd1bc5c9f2335ef6c1775362ba5b8651a13", size = 424684, upload-time = "2025-09-05T15:02:29.167Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/b1/9ff6578d789a89812ff21e4e0f80ffae20a65d5dd84e7a17873fe3b365be/griffe-1.14.0-py3-none-any.whl", hash = "sha256:0e9d52832cccf0f7188cfe585ba962d2674b241c01916d780925df34873bceb0", size = 144439 }, + { url = "https://files.pythonhosted.org/packages/2a/b1/9ff6578d789a89812ff21e4e0f80ffae20a65d5dd84e7a17873fe3b365be/griffe-1.14.0-py3-none-any.whl", hash = "sha256:0e9d52832cccf0f7188cfe585ba962d2674b241c01916d780925df34873bceb0", size = 144439, upload-time = "2025-09-05T15:02:27.511Z" }, ] [[package]] @@ -1187,9 +1187,9 @@ dependencies = [ { name = "grpcio" }, { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/4e/8d0ca3b035e41fe0b3f31ebbb638356af720335e5a11154c330169b40777/grpc_google_iam_v1-0.14.2.tar.gz", hash = "sha256:b3e1fc387a1a329e41672197d0ace9de22c78dd7d215048c4c78712073f7bd20", size = 16259 } +sdist = { url = "https://files.pythonhosted.org/packages/b9/4e/8d0ca3b035e41fe0b3f31ebbb638356af720335e5a11154c330169b40777/grpc_google_iam_v1-0.14.2.tar.gz", hash = "sha256:b3e1fc387a1a329e41672197d0ace9de22c78dd7d215048c4c78712073f7bd20", size = 16259, upload-time = "2025-03-17T11:40:23.586Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/66/6f/dd9b178aee7835b96c2e63715aba6516a9d50f6bebbd1cc1d32c82a2a6c3/grpc_google_iam_v1-0.14.2-py3-none-any.whl", hash = "sha256:a3171468459770907926d56a440b2bb643eec1d7ba215f48f3ecece42b4d8351", size = 19242 }, + { url = "https://files.pythonhosted.org/packages/66/6f/dd9b178aee7835b96c2e63715aba6516a9d50f6bebbd1cc1d32c82a2a6c3/grpc_google_iam_v1-0.14.2-py3-none-any.whl", hash = "sha256:a3171468459770907926d56a440b2bb643eec1d7ba215f48f3ecece42b4d8351", size = 19242, upload-time = "2025-03-17T11:40:22.648Z" }, ] [[package]] @@ -1199,18 +1199,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 } +sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182, upload-time = "2025-10-21T16:23:12.106Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 }, - { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 }, - { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 }, - { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 }, - { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 }, - { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 }, - { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 }, - { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 }, - { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 }, - { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 }, + { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718, upload-time = "2025-10-21T16:21:17.939Z" }, + { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627, upload-time = "2025-10-21T16:21:20.466Z" }, + { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167, upload-time = "2025-10-21T16:21:23.122Z" }, + { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267, upload-time = "2025-10-21T16:21:25.995Z" }, + { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963, upload-time = "2025-10-21T16:21:28.631Z" }, + { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484, upload-time = "2025-10-21T16:21:30.837Z" }, + { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777, upload-time = "2025-10-21T16:21:33.577Z" }, + { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014, upload-time = "2025-10-21T16:21:41.882Z" }, + { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750, upload-time = "2025-10-21T16:21:44.006Z" }, + { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003, upload-time = "2025-10-21T16:21:46.244Z" }, ] [[package]] @@ -1222,9 +1222,9 @@ dependencies = [ { name = "grpcio" }, { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2a/38/0cd65d29f8fe0b5efaef60a0664885b5457a566b1a531d3e6b76a8bb0f21/grpcio-status-1.60.0.tar.gz", hash = "sha256:f10e0b6db3adc0fdc244b71962814ee982996ef06186446b5695b9fa635aa1ab", size = 13546 } +sdist = { url = "https://files.pythonhosted.org/packages/2a/38/0cd65d29f8fe0b5efaef60a0664885b5457a566b1a531d3e6b76a8bb0f21/grpcio-status-1.60.0.tar.gz", hash = "sha256:f10e0b6db3adc0fdc244b71962814ee982996ef06186446b5695b9fa635aa1ab", size = 13546, upload-time = "2023-12-07T19:00:28.884Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/bd/f46d6511088f314cfedc880721fd32d387b8513b22da01cf4771d7439a2b/grpcio_status-1.60.0-py3-none-any.whl", hash = "sha256:7d383fa36e59c1e61d380d91350badd4d12ac56e4de2c2b831b050362c3c572e", size = 14448 }, + { url = "https://files.pythonhosted.org/packages/d9/bd/f46d6511088f314cfedc880721fd32d387b8513b22da01cf4771d7439a2b/grpcio_status-1.60.0-py3-none-any.whl", hash = "sha256:7d383fa36e59c1e61d380d91350badd4d12ac56e4de2c2b831b050362c3c572e", size = 14448, upload-time = "2023-12-07T18:56:58.47Z" }, ] [[package]] @@ -1236,25 +1236,25 @@ dependencies = [ { name = "protobuf" }, { name = "setuptools" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/54/fa/b69bd8040eafc09b88bb0ec0fea59e8aacd1a801e688af087cead213b0d0/grpcio-tools-1.62.3.tar.gz", hash = "sha256:7c7136015c3d62c3eef493efabaf9e3380e3e66d24ee8e94c01cb71377f57833", size = 4538520 } +sdist = { url = "https://files.pythonhosted.org/packages/54/fa/b69bd8040eafc09b88bb0ec0fea59e8aacd1a801e688af087cead213b0d0/grpcio-tools-1.62.3.tar.gz", hash = "sha256:7c7136015c3d62c3eef493efabaf9e3380e3e66d24ee8e94c01cb71377f57833", size = 4538520, upload-time = "2024-08-06T00:37:11.035Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/a5/d6887eba415ce318ae5005e8dfac3fa74892400b54b6d37b79e8b4f14f5e/grpcio_tools-1.62.3-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:d102b9b21c4e1e40af9a2ab3c6d41afba6bd29c0aa50ca013bf85c99cdc44ac5", size = 5147690 }, - { url = "https://files.pythonhosted.org/packages/8a/7c/3cde447a045e83ceb4b570af8afe67ffc86896a2fe7f59594dc8e5d0a645/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:0a52cc9444df978438b8d2332c0ca99000521895229934a59f94f37ed896b133", size = 2720538 }, - { url = "https://files.pythonhosted.org/packages/88/07/f83f2750d44ac4f06c07c37395b9c1383ef5c994745f73c6bfaf767f0944/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141d028bf5762d4a97f981c501da873589df3f7e02f4c1260e1921e565b376fa", size = 3071571 }, - { url = "https://files.pythonhosted.org/packages/37/74/40175897deb61e54aca716bc2e8919155b48f33aafec8043dda9592d8768/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47a5c093ab256dec5714a7a345f8cc89315cb57c298b276fa244f37a0ba507f0", size = 2806207 }, - { url = "https://files.pythonhosted.org/packages/ec/ee/d8de915105a217cbcb9084d684abdc032030dcd887277f2ef167372287fe/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f6831fdec2b853c9daa3358535c55eed3694325889aa714070528cf8f92d7d6d", size = 3685815 }, - { url = "https://files.pythonhosted.org/packages/fd/d9/4360a6c12be3d7521b0b8c39e5d3801d622fbb81cc2721dbd3eee31e28c8/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e02d7c1a02e3814c94ba0cfe43d93e872c758bd8fd5c2797f894d0c49b4a1dfc", size = 3298378 }, - { url = "https://files.pythonhosted.org/packages/29/3b/7cdf4a9e5a3e0a35a528b48b111355cd14da601413a4f887aa99b6da468f/grpcio_tools-1.62.3-cp312-cp312-win32.whl", hash = "sha256:b881fd9505a84457e9f7e99362eeedd86497b659030cf57c6f0070df6d9c2b9b", size = 910416 }, - { url = "https://files.pythonhosted.org/packages/6c/66/dd3ec249e44c1cc15e902e783747819ed41ead1336fcba72bf841f72c6e9/grpcio_tools-1.62.3-cp312-cp312-win_amd64.whl", hash = "sha256:11c625eebefd1fd40a228fc8bae385e448c7e32a6ae134e43cf13bbc23f902b7", size = 1052856 }, + { url = "https://files.pythonhosted.org/packages/2a/a5/d6887eba415ce318ae5005e8dfac3fa74892400b54b6d37b79e8b4f14f5e/grpcio_tools-1.62.3-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:d102b9b21c4e1e40af9a2ab3c6d41afba6bd29c0aa50ca013bf85c99cdc44ac5", size = 5147690, upload-time = "2024-08-06T00:31:16.436Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/3cde447a045e83ceb4b570af8afe67ffc86896a2fe7f59594dc8e5d0a645/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:0a52cc9444df978438b8d2332c0ca99000521895229934a59f94f37ed896b133", size = 2720538, upload-time = "2024-08-06T00:31:18.905Z" }, + { url = "https://files.pythonhosted.org/packages/88/07/f83f2750d44ac4f06c07c37395b9c1383ef5c994745f73c6bfaf767f0944/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141d028bf5762d4a97f981c501da873589df3f7e02f4c1260e1921e565b376fa", size = 3071571, upload-time = "2024-08-06T00:31:21.684Z" }, + { url = "https://files.pythonhosted.org/packages/37/74/40175897deb61e54aca716bc2e8919155b48f33aafec8043dda9592d8768/grpcio_tools-1.62.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47a5c093ab256dec5714a7a345f8cc89315cb57c298b276fa244f37a0ba507f0", size = 2806207, upload-time = "2024-08-06T00:31:24.208Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/d8de915105a217cbcb9084d684abdc032030dcd887277f2ef167372287fe/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f6831fdec2b853c9daa3358535c55eed3694325889aa714070528cf8f92d7d6d", size = 3685815, upload-time = "2024-08-06T00:31:26.917Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d9/4360a6c12be3d7521b0b8c39e5d3801d622fbb81cc2721dbd3eee31e28c8/grpcio_tools-1.62.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e02d7c1a02e3814c94ba0cfe43d93e872c758bd8fd5c2797f894d0c49b4a1dfc", size = 3298378, upload-time = "2024-08-06T00:31:30.401Z" }, + { url = "https://files.pythonhosted.org/packages/29/3b/7cdf4a9e5a3e0a35a528b48b111355cd14da601413a4f887aa99b6da468f/grpcio_tools-1.62.3-cp312-cp312-win32.whl", hash = "sha256:b881fd9505a84457e9f7e99362eeedd86497b659030cf57c6f0070df6d9c2b9b", size = 910416, upload-time = "2024-08-06T00:31:33.118Z" }, + { url = "https://files.pythonhosted.org/packages/6c/66/dd3ec249e44c1cc15e902e783747819ed41ead1336fcba72bf841f72c6e9/grpcio_tools-1.62.3-cp312-cp312-win_amd64.whl", hash = "sha256:11c625eebefd1fd40a228fc8bae385e448c7e32a6ae134e43cf13bbc23f902b7", size = 1052856, upload-time = "2024-08-06T00:31:36.519Z" }, ] [[package]] name = "h11" version = "0.16.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] [[package]] @@ -1265,33 +1265,33 @@ dependencies = [ { name = "hpack" }, { name = "hyperframe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026 } +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779 }, + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, ] [[package]] name = "hf-xet" version = "1.1.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/74/31/feeddfce1748c4a233ec1aa5b7396161c07ae1aa9b7bdbc9a72c3c7dd768/hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97", size = 487910 } +sdist = { url = "https://files.pythonhosted.org/packages/74/31/feeddfce1748c4a233ec1aa5b7396161c07ae1aa9b7bdbc9a72c3c7dd768/hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97", size = 487910, upload-time = "2025-09-12T20:10:27.12Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/a2/343e6d05de96908366bdc0081f2d8607d61200be2ac802769c4284cc65bd/hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d", size = 2761466 }, - { url = "https://files.pythonhosted.org/packages/31/f9/6215f948ac8f17566ee27af6430ea72045e0418ce757260248b483f4183b/hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b", size = 2623807 }, - { url = "https://files.pythonhosted.org/packages/15/07/86397573efefff941e100367bbda0b21496ffcdb34db7ab51912994c32a2/hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435", size = 3186960 }, - { url = "https://files.pythonhosted.org/packages/01/a7/0b2e242b918cc30e1f91980f3c4b026ff2eedaf1e2ad96933bca164b2869/hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c", size = 3087167 }, - { url = "https://files.pythonhosted.org/packages/4a/25/3e32ab61cc7145b11eee9d745988e2f0f4fafda81b25980eebf97d8cff15/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06", size = 3248612 }, - { url = "https://files.pythonhosted.org/packages/2c/3d/ab7109e607ed321afaa690f557a9ada6d6d164ec852fd6bf9979665dc3d6/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f", size = 3353360 }, - { url = "https://files.pythonhosted.org/packages/ee/0e/471f0a21db36e71a2f1752767ad77e92d8cde24e974e03d662931b1305ec/hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045", size = 2804691 }, + { url = "https://files.pythonhosted.org/packages/f7/a2/343e6d05de96908366bdc0081f2d8607d61200be2ac802769c4284cc65bd/hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d", size = 2761466, upload-time = "2025-09-12T20:10:22.836Z" }, + { url = "https://files.pythonhosted.org/packages/31/f9/6215f948ac8f17566ee27af6430ea72045e0418ce757260248b483f4183b/hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b", size = 2623807, upload-time = "2025-09-12T20:10:21.118Z" }, + { url = "https://files.pythonhosted.org/packages/15/07/86397573efefff941e100367bbda0b21496ffcdb34db7ab51912994c32a2/hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435", size = 3186960, upload-time = "2025-09-12T20:10:19.336Z" }, + { url = "https://files.pythonhosted.org/packages/01/a7/0b2e242b918cc30e1f91980f3c4b026ff2eedaf1e2ad96933bca164b2869/hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c", size = 3087167, upload-time = "2025-09-12T20:10:17.255Z" }, + { url = "https://files.pythonhosted.org/packages/4a/25/3e32ab61cc7145b11eee9d745988e2f0f4fafda81b25980eebf97d8cff15/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06", size = 3248612, upload-time = "2025-09-12T20:10:24.093Z" }, + { url = "https://files.pythonhosted.org/packages/2c/3d/ab7109e607ed321afaa690f557a9ada6d6d164ec852fd6bf9979665dc3d6/hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f", size = 3353360, upload-time = "2025-09-12T20:10:25.563Z" }, + { url = "https://files.pythonhosted.org/packages/ee/0e/471f0a21db36e71a2f1752767ad77e92d8cde24e974e03d662931b1305ec/hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045", size = 2804691, upload-time = "2025-09-12T20:10:28.433Z" }, ] [[package]] name = "hpack" version = "4.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, ] [[package]] @@ -1302,9 +1302,9 @@ dependencies = [ { name = "certifi" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] [[package]] @@ -1314,9 +1314,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyparsing" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/77/6653db69c1f7ecfe5e3f9726fdadc981794656fcd7d98c4209fecfea9993/httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c", size = 250759 } +sdist = { url = "https://files.pythonhosted.org/packages/52/77/6653db69c1f7ecfe5e3f9726fdadc981794656fcd7d98c4209fecfea9993/httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c", size = 250759, upload-time = "2025-09-11T12:16:03.403Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24", size = 91148 }, + { url = "https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24", size = 91148, upload-time = "2025-09-11T12:16:01.803Z" }, ] [[package]] @@ -1329,9 +1329,9 @@ dependencies = [ { name = "httpcore" }, { name = "idna" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] [package.optional-dependencies] @@ -1353,36 +1353,36 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/10/7e/a0a97de7c73671863ca6b3f61fa12518caf35db37825e43d63a70956738c/huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a", size = 461798 } +sdist = { url = "https://files.pythonhosted.org/packages/10/7e/a0a97de7c73671863ca6b3f61fa12518caf35db37825e43d63a70956738c/huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a", size = 461798, upload-time = "2025-09-29T14:29:58.625Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba", size = 564262 }, + { url = "https://files.pythonhosted.org/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba", size = 564262, upload-time = "2025-09-29T14:29:55.813Z" }, ] [[package]] name = "hyperframe" version = "6.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] [[package]] name = "identify" version = "2.6.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/52/c4/62963f25a678f6a050fb0505a65e9e726996171e6dbe1547f79619eefb15/identify-2.6.14.tar.gz", hash = "sha256:663494103b4f717cb26921c52f8751363dc89db64364cd836a9bf1535f53cd6a", size = 99283 } +sdist = { url = "https://files.pythonhosted.org/packages/52/c4/62963f25a678f6a050fb0505a65e9e726996171e6dbe1547f79619eefb15/identify-2.6.14.tar.gz", hash = "sha256:663494103b4f717cb26921c52f8751363dc89db64364cd836a9bf1535f53cd6a", size = 99283, upload-time = "2025-09-06T19:30:52.938Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/ae/2ad30f4652712c82f1c23423d79136fbce338932ad166d70c1efb86a5998/identify-2.6.14-py2.py3-none-any.whl", hash = "sha256:11a073da82212c6646b1f39bb20d4483bfb9543bd5566fec60053c4bb309bf2e", size = 99172 }, + { url = "https://files.pythonhosted.org/packages/e5/ae/2ad30f4652712c82f1c23423d79136fbce338932ad166d70c1efb86a5998/identify-2.6.14-py2.py3-none-any.whl", hash = "sha256:11a073da82212c6646b1f39bb20d4483bfb9543bd5566fec60053c4bb309bf2e", size = 99172, upload-time = "2025-09-06T19:30:51.759Z" }, ] [[package]] name = "idna" version = "3.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, ] [[package]] @@ -1392,45 +1392,45 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "zipp" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641 } +sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656 }, + { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, ] [[package]] name = "inflection" version = "0.5.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/7e/691d061b7329bc8d54edbf0ec22fbfb2afe61facb681f9aaa9bff7a27d04/inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417", size = 15091 } +sdist = { url = "https://files.pythonhosted.org/packages/e1/7e/691d061b7329bc8d54edbf0ec22fbfb2afe61facb681f9aaa9bff7a27d04/inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417", size = 15091, upload-time = "2020-08-22T08:16:29.139Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/91/aa6bde563e0085a02a435aa99b49ef75b0a4b062635e606dab23ce18d720/inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2", size = 9454 }, + { url = "https://files.pythonhosted.org/packages/59/91/aa6bde563e0085a02a435aa99b49ef75b0a4b062635e606dab23ce18d720/inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2", size = 9454, upload-time = "2020-08-22T08:16:27.816Z" }, ] [[package]] name = "iniconfig" version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] [[package]] name = "invoke" version = "2.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/42/127e6d792884ab860defc3f4d80a8f9812e48ace584ffc5a346de58cdc6c/invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5", size = 299835 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/42/127e6d792884ab860defc3f4d80a8f9812e48ace584ffc5a346de58cdc6c/invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5", size = 299835, upload-time = "2023-07-12T18:05:17.998Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/66/7f8c48009c72d73bc6bbe6eb87ac838d6a526146f7dab14af671121eb379/invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820", size = 160274 }, + { url = "https://files.pythonhosted.org/packages/0a/66/7f8c48009c72d73bc6bbe6eb87ac838d6a526146f7dab14af671121eb379/invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820", size = 160274, upload-time = "2023-07-12T18:05:16.294Z" }, ] [[package]] name = "isodate" version = "0.7.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705, upload-time = "2024-10-08T23:04:11.5Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320, upload-time = "2024-10-08T23:04:09.501Z" }, ] [[package]] @@ -1440,47 +1440,47 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markupsafe" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] [[package]] name = "jiter" version = "0.11.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/c0/a3bb4cc13aced219dd18191ea66e874266bd8aa7b96744e495e1c733aa2d/jiter-0.11.0.tar.gz", hash = "sha256:1d9637eaf8c1d6a63d6562f2a6e5ab3af946c66037eb1b894e8fad75422266e4", size = 167094 } +sdist = { url = "https://files.pythonhosted.org/packages/9d/c0/a3bb4cc13aced219dd18191ea66e874266bd8aa7b96744e495e1c733aa2d/jiter-0.11.0.tar.gz", hash = "sha256:1d9637eaf8c1d6a63d6562f2a6e5ab3af946c66037eb1b894e8fad75422266e4", size = 167094, upload-time = "2025-09-15T09:20:38.212Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/b5/3009b112b8f673e568ef79af9863d8309a15f0a8cdcc06ed6092051f377e/jiter-0.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2fb7b377688cc3850bbe5c192a6bd493562a0bc50cbc8b047316428fbae00ada", size = 305510 }, - { url = "https://files.pythonhosted.org/packages/fe/82/15514244e03b9e71e086bbe2a6de3e4616b48f07d5f834200c873956fb8c/jiter-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1b7cbe3f25bd0d8abb468ba4302a5d45617ee61b2a7a638f63fee1dc086be99", size = 316521 }, - { url = "https://files.pythonhosted.org/packages/92/94/7a2e905f40ad2d6d660e00b68d818f9e29fb87ffe82774f06191e93cbe4a/jiter-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0a7f0ec81d5b7588c5cade1eb1925b91436ae6726dc2df2348524aeabad5de6", size = 338214 }, - { url = "https://files.pythonhosted.org/packages/a8/9c/5791ed5bdc76f12110158d3316a7a3ec0b1413d018b41c5ed399549d3ad5/jiter-0.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07630bb46ea2a6b9c6ed986c6e17e35b26148cce2c535454b26ee3f0e8dcaba1", size = 361280 }, - { url = "https://files.pythonhosted.org/packages/d4/7f/b7d82d77ff0d2cb06424141000176b53a9e6b16a1125525bb51ea4990c2e/jiter-0.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7764f27d28cd4a9cbc61704dfcd80c903ce3aad106a37902d3270cd6673d17f4", size = 487895 }, - { url = "https://files.pythonhosted.org/packages/42/44/10a1475d46f1fc1fd5cc2e82c58e7bca0ce5852208e0fa5df2f949353321/jiter-0.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4a6c4a737d486f77f842aeb22807edecb4a9417e6700c7b981e16d34ba7c72", size = 378421 }, - { url = "https://files.pythonhosted.org/packages/9a/5f/0dc34563d8164d31d07bc09d141d3da08157a68dcd1f9b886fa4e917805b/jiter-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf408d2a0abd919b60de8c2e7bc5eeab72d4dafd18784152acc7c9adc3291591", size = 347932 }, - { url = "https://files.pythonhosted.org/packages/f7/de/b68f32a4fcb7b4a682b37c73a0e5dae32180140cd1caf11aef6ad40ddbf2/jiter-0.11.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cdef53eda7d18e799625023e1e250dbc18fbc275153039b873ec74d7e8883e09", size = 386959 }, - { url = "https://files.pythonhosted.org/packages/76/0a/c08c92e713b6e28972a846a81ce374883dac2f78ec6f39a0dad9f2339c3a/jiter-0.11.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:53933a38ef7b551dd9c7f1064f9d7bb235bb3168d0fa5f14f0798d1b7ea0d9c5", size = 517187 }, - { url = "https://files.pythonhosted.org/packages/89/b5/4a283bec43b15aad54fcae18d951f06a2ec3f78db5708d3b59a48e9c3fbd/jiter-0.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11840d2324c9ab5162fc1abba23bc922124fedcff0d7b7f85fffa291e2f69206", size = 509461 }, - { url = "https://files.pythonhosted.org/packages/34/a5/f8bad793010534ea73c985caaeef8cc22dfb1fedb15220ecdf15c623c07a/jiter-0.11.0-cp312-cp312-win32.whl", hash = "sha256:4f01a744d24a5f2bb4a11657a1b27b61dc038ae2e674621a74020406e08f749b", size = 206664 }, - { url = "https://files.pythonhosted.org/packages/ed/42/5823ec2b1469395a160b4bf5f14326b4a098f3b6898fbd327366789fa5d3/jiter-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:29fff31190ab3a26de026da2f187814f4b9c6695361e20a9ac2123e4d4378a4c", size = 203520 }, + { url = "https://files.pythonhosted.org/packages/ba/b5/3009b112b8f673e568ef79af9863d8309a15f0a8cdcc06ed6092051f377e/jiter-0.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2fb7b377688cc3850bbe5c192a6bd493562a0bc50cbc8b047316428fbae00ada", size = 305510, upload-time = "2025-09-15T09:19:25.893Z" }, + { url = "https://files.pythonhosted.org/packages/fe/82/15514244e03b9e71e086bbe2a6de3e4616b48f07d5f834200c873956fb8c/jiter-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1b7cbe3f25bd0d8abb468ba4302a5d45617ee61b2a7a638f63fee1dc086be99", size = 316521, upload-time = "2025-09-15T09:19:27.525Z" }, + { url = "https://files.pythonhosted.org/packages/92/94/7a2e905f40ad2d6d660e00b68d818f9e29fb87ffe82774f06191e93cbe4a/jiter-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0a7f0ec81d5b7588c5cade1eb1925b91436ae6726dc2df2348524aeabad5de6", size = 338214, upload-time = "2025-09-15T09:19:28.727Z" }, + { url = "https://files.pythonhosted.org/packages/a8/9c/5791ed5bdc76f12110158d3316a7a3ec0b1413d018b41c5ed399549d3ad5/jiter-0.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07630bb46ea2a6b9c6ed986c6e17e35b26148cce2c535454b26ee3f0e8dcaba1", size = 361280, upload-time = "2025-09-15T09:19:30.013Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7f/b7d82d77ff0d2cb06424141000176b53a9e6b16a1125525bb51ea4990c2e/jiter-0.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7764f27d28cd4a9cbc61704dfcd80c903ce3aad106a37902d3270cd6673d17f4", size = 487895, upload-time = "2025-09-15T09:19:31.424Z" }, + { url = "https://files.pythonhosted.org/packages/42/44/10a1475d46f1fc1fd5cc2e82c58e7bca0ce5852208e0fa5df2f949353321/jiter-0.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4a6c4a737d486f77f842aeb22807edecb4a9417e6700c7b981e16d34ba7c72", size = 378421, upload-time = "2025-09-15T09:19:32.746Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5f/0dc34563d8164d31d07bc09d141d3da08157a68dcd1f9b886fa4e917805b/jiter-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf408d2a0abd919b60de8c2e7bc5eeab72d4dafd18784152acc7c9adc3291591", size = 347932, upload-time = "2025-09-15T09:19:34.612Z" }, + { url = "https://files.pythonhosted.org/packages/f7/de/b68f32a4fcb7b4a682b37c73a0e5dae32180140cd1caf11aef6ad40ddbf2/jiter-0.11.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cdef53eda7d18e799625023e1e250dbc18fbc275153039b873ec74d7e8883e09", size = 386959, upload-time = "2025-09-15T09:19:35.994Z" }, + { url = "https://files.pythonhosted.org/packages/76/0a/c08c92e713b6e28972a846a81ce374883dac2f78ec6f39a0dad9f2339c3a/jiter-0.11.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:53933a38ef7b551dd9c7f1064f9d7bb235bb3168d0fa5f14f0798d1b7ea0d9c5", size = 517187, upload-time = "2025-09-15T09:19:37.426Z" }, + { url = "https://files.pythonhosted.org/packages/89/b5/4a283bec43b15aad54fcae18d951f06a2ec3f78db5708d3b59a48e9c3fbd/jiter-0.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11840d2324c9ab5162fc1abba23bc922124fedcff0d7b7f85fffa291e2f69206", size = 509461, upload-time = "2025-09-15T09:19:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/34/a5/f8bad793010534ea73c985caaeef8cc22dfb1fedb15220ecdf15c623c07a/jiter-0.11.0-cp312-cp312-win32.whl", hash = "sha256:4f01a744d24a5f2bb4a11657a1b27b61dc038ae2e674621a74020406e08f749b", size = 206664, upload-time = "2025-09-15T09:19:40.096Z" }, + { url = "https://files.pythonhosted.org/packages/ed/42/5823ec2b1469395a160b4bf5f14326b4a098f3b6898fbd327366789fa5d3/jiter-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:29fff31190ab3a26de026da2f187814f4b9c6695361e20a9ac2123e4d4378a4c", size = 203520, upload-time = "2025-09-15T09:19:41.798Z" }, ] [[package]] name = "jmespath" version = "1.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 }, + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, ] [[package]] name = "joblib" version = "1.5.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/5d/447af5ea094b9e4c4054f82e223ada074c552335b9b4b2d14bd9b35a67c4/joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55", size = 331077 } +sdist = { url = "https://files.pythonhosted.org/packages/e8/5d/447af5ea094b9e4c4054f82e223ada074c552335b9b4b2d14bd9b35a67c4/joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55", size = 331077, upload-time = "2025-08-27T12:15:46.575Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396 }, + { url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396, upload-time = "2025-08-27T12:15:45.188Z" }, ] [[package]] @@ -1493,9 +1493,9 @@ dependencies = [ { name = "referencing" }, { name = "rpds-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342 } +sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040 }, + { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, ] [[package]] @@ -1505,9 +1505,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "referencing" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855 } +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437 }, + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] [[package]] @@ -1520,9 +1520,9 @@ dependencies = [ { name = "tzdata" }, { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/d3/5ff936d8319ac86b9c409f1501b07c426e6ad41966fedace9ef1b966e23f/kombu-5.5.4.tar.gz", hash = "sha256:886600168275ebeada93b888e831352fe578168342f0d1d5833d88ba0d847363", size = 461992 } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d3/5ff936d8319ac86b9c409f1501b07c426e6ad41966fedace9ef1b966e23f/kombu-5.5.4.tar.gz", hash = "sha256:886600168275ebeada93b888e831352fe578168342f0d1d5833d88ba0d847363", size = 461992, upload-time = "2025-06-01T10:19:22.281Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/70/a07dcf4f62598c8ad579df241af55ced65bed76e42e45d3c368a6d82dbc1/kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8", size = 210034 }, + { url = "https://files.pythonhosted.org/packages/ef/70/a07dcf4f62598c8ad579df241af55ced65bed76e42e45d3c368a6d82dbc1/kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8", size = 210034, upload-time = "2025-06-01T10:19:20.436Z" }, ] [[package]] @@ -1532,16 +1532,16 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5c/55/ca4552d7fe79a91b2a7b4fa39991e8a45a17c8bfbcaf264597d95903c777/libcst-1.8.5.tar.gz", hash = "sha256:e72e1816eed63f530668e93a4c22ff1cf8b91ddce0ec53e597d3f6c53e103ec7", size = 884582 } +sdist = { url = "https://files.pythonhosted.org/packages/5c/55/ca4552d7fe79a91b2a7b4fa39991e8a45a17c8bfbcaf264597d95903c777/libcst-1.8.5.tar.gz", hash = "sha256:e72e1816eed63f530668e93a4c22ff1cf8b91ddce0ec53e597d3f6c53e103ec7", size = 884582, upload-time = "2025-09-26T05:29:44.101Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/13/bb/c7abe0654fcf00292d6959256948ce4ae07785c4f65a45c3e25cc4637074/libcst-1.8.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:27c7733aba7b43239157661207b1e3a9f3711a7fc061a0eca6a33f0716fdfd21", size = 2196690 }, - { url = "https://files.pythonhosted.org/packages/49/25/e7c02209e8ce66e7b75a66d132118f6f812a8b03cd31ee7d96de56c733a1/libcst-1.8.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b8c3cfbbf6049e3c587713652e4b3c88cfbf7df7878b2eeefaa8dd20a48dc607", size = 2082616 }, - { url = "https://files.pythonhosted.org/packages/32/68/a4f49d99e3130256e225d639722440ba2682c12812a30ebd7ba64fd0fd31/libcst-1.8.5-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:31d86025d8997c853f85c4b5d494f04a157fb962e24f187b4af70c7755c9b27d", size = 2229037 }, - { url = "https://files.pythonhosted.org/packages/b2/62/4fa21600a0bf3eb9f4d4f8bbb50ef120fb0b2990195eabba997b0b889566/libcst-1.8.5-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff9c535cfe99f0be79ac3024772b288570751fc69fc472b44fca12d1912d1561", size = 2292806 }, - { url = "https://files.pythonhosted.org/packages/14/df/a01e8d54b62060698e37e3e28f77559ecb70c7b93ffee00d17e40221f419/libcst-1.8.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e8204607504563d3606bbaea2b9b04e0cef2b3bdc14c89171a702c1e09b9318a", size = 2294836 }, - { url = "https://files.pythonhosted.org/packages/75/4f/c410e7f7ceda0558f688c1ca5dfb3a40ff8dfc527f8e6015fa749e11a650/libcst-1.8.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5e6cd3df72d47701b205fa3349ba8899566df82cef248c2fdf5f575d640419c4", size = 2396004 }, - { url = "https://files.pythonhosted.org/packages/f0/07/bb77dcb94badad0ad3e5a1e992a4318dbdf40632eac3b5cf18299858ad7d/libcst-1.8.5-cp312-cp312-win_amd64.whl", hash = "sha256:197c2f86dd0ca5c6464184ddef7f6440d64c8da39b78d16fc053da6701ed1209", size = 2107301 }, - { url = "https://files.pythonhosted.org/packages/79/70/e688e6d99d6920c3f97bf8bbaec33ac2c71a947730772a1d32dd899dbbf1/libcst-1.8.5-cp312-cp312-win_arm64.whl", hash = "sha256:c5ca109c9a81dff3d947dceba635a08f9c3dfeb7f61b0b824a175ef0a98ea69b", size = 1990870 }, + { url = "https://files.pythonhosted.org/packages/13/bb/c7abe0654fcf00292d6959256948ce4ae07785c4f65a45c3e25cc4637074/libcst-1.8.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:27c7733aba7b43239157661207b1e3a9f3711a7fc061a0eca6a33f0716fdfd21", size = 2196690, upload-time = "2025-09-26T05:28:17.839Z" }, + { url = "https://files.pythonhosted.org/packages/49/25/e7c02209e8ce66e7b75a66d132118f6f812a8b03cd31ee7d96de56c733a1/libcst-1.8.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b8c3cfbbf6049e3c587713652e4b3c88cfbf7df7878b2eeefaa8dd20a48dc607", size = 2082616, upload-time = "2025-09-26T05:28:19.794Z" }, + { url = "https://files.pythonhosted.org/packages/32/68/a4f49d99e3130256e225d639722440ba2682c12812a30ebd7ba64fd0fd31/libcst-1.8.5-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:31d86025d8997c853f85c4b5d494f04a157fb962e24f187b4af70c7755c9b27d", size = 2229037, upload-time = "2025-09-26T05:28:21.459Z" }, + { url = "https://files.pythonhosted.org/packages/b2/62/4fa21600a0bf3eb9f4d4f8bbb50ef120fb0b2990195eabba997b0b889566/libcst-1.8.5-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff9c535cfe99f0be79ac3024772b288570751fc69fc472b44fca12d1912d1561", size = 2292806, upload-time = "2025-09-26T05:28:23.033Z" }, + { url = "https://files.pythonhosted.org/packages/14/df/a01e8d54b62060698e37e3e28f77559ecb70c7b93ffee00d17e40221f419/libcst-1.8.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e8204607504563d3606bbaea2b9b04e0cef2b3bdc14c89171a702c1e09b9318a", size = 2294836, upload-time = "2025-09-26T05:28:24.937Z" }, + { url = "https://files.pythonhosted.org/packages/75/4f/c410e7f7ceda0558f688c1ca5dfb3a40ff8dfc527f8e6015fa749e11a650/libcst-1.8.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5e6cd3df72d47701b205fa3349ba8899566df82cef248c2fdf5f575d640419c4", size = 2396004, upload-time = "2025-09-26T05:28:26.582Z" }, + { url = "https://files.pythonhosted.org/packages/f0/07/bb77dcb94badad0ad3e5a1e992a4318dbdf40632eac3b5cf18299858ad7d/libcst-1.8.5-cp312-cp312-win_amd64.whl", hash = "sha256:197c2f86dd0ca5c6464184ddef7f6440d64c8da39b78d16fc053da6701ed1209", size = 2107301, upload-time = "2025-09-26T05:28:28.235Z" }, + { url = "https://files.pythonhosted.org/packages/79/70/e688e6d99d6920c3f97bf8bbaec33ac2c71a947730772a1d32dd899dbbf1/libcst-1.8.5-cp312-cp312-win_arm64.whl", hash = "sha256:c5ca109c9a81dff3d947dceba635a08f9c3dfeb7f61b0b824a175ef0a98ea69b", size = 1990870, upload-time = "2025-09-26T05:28:29.858Z" }, ] [[package]] @@ -1576,9 +1576,9 @@ dependencies = [ { name = "httpx" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9b/72/816e6e900448e1b4a8137d90e65876b296c5264a23db6ae888bd3e6660ba/llama_cloud-0.1.35.tar.gz", hash = "sha256:200349d5d57424d7461f304cdb1355a58eea3e6ca1e6b0d75c66b2e937216983", size = 106403 } +sdist = { url = "https://files.pythonhosted.org/packages/9b/72/816e6e900448e1b4a8137d90e65876b296c5264a23db6ae888bd3e6660ba/llama_cloud-0.1.35.tar.gz", hash = "sha256:200349d5d57424d7461f304cdb1355a58eea3e6ca1e6b0d75c66b2e937216983", size = 106403, upload-time = "2025-07-28T17:22:06.41Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/d2/8d18a021ab757cea231428404f21fe3186bf1ebaac3f57a73c379483fd3f/llama_cloud-0.1.35-py3-none-any.whl", hash = "sha256:b7abab4423118e6f638d2f326749e7a07c6426543bea6da99b623c715b22af71", size = 303280 }, + { url = "https://files.pythonhosted.org/packages/1d/d2/8d18a021ab757cea231428404f21fe3186bf1ebaac3f57a73c379483fd3f/llama_cloud-0.1.35-py3-none-any.whl", hash = "sha256:b7abab4423118e6f638d2f326749e7a07c6426543bea6da99b623c715b22af71", size = 303280, upload-time = "2025-07-28T17:22:04.946Z" }, ] [[package]] @@ -1595,9 +1595,9 @@ dependencies = [ { name = "llama-index-readers-llama-parse" }, { name = "nltk" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/27/5fffc57b98e753eff580184b6260b47d8d2fff4fc91edf75352402f33881/llama_index-0.13.2.tar.gz", hash = "sha256:110e5e8e077aab7643eecb0962bcdb927bdea6a2c9897606b4b26e498d93dd5b", size = 8029 } +sdist = { url = "https://files.pythonhosted.org/packages/40/27/5fffc57b98e753eff580184b6260b47d8d2fff4fc91edf75352402f33881/llama_index-0.13.2.tar.gz", hash = "sha256:110e5e8e077aab7643eecb0962bcdb927bdea6a2c9897606b4b26e498d93dd5b", size = 8029, upload-time = "2025-08-14T22:04:03.732Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/3a/de1a7d6cf24c41082464fa4bda82dba014acee0f438ef0cec606ba43ed28/llama_index-0.13.2-py3-none-any.whl", hash = "sha256:8de8eefffcfa64a9225267d7813fcb55b8ea12181d4044efe5b22642d91d2294", size = 7027 }, + { url = "https://files.pythonhosted.org/packages/5c/3a/de1a7d6cf24c41082464fa4bda82dba014acee0f438ef0cec606ba43ed28/llama_index-0.13.2-py3-none-any.whl", hash = "sha256:8de8eefffcfa64a9225267d7813fcb55b8ea12181d4044efe5b22642d91d2294", size = 7027, upload-time = "2025-08-14T22:04:02.408Z" }, ] [[package]] @@ -1609,9 +1609,9 @@ dependencies = [ { name = "llama-index-embeddings-openai" }, { name = "llama-index-llms-openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/e3/ac6928586e20cfd327a2a38a00781cbc8fae923edcd0316c23e38aae1537/llama_index_cli-0.5.1.tar.gz", hash = "sha256:0446159d85c56c29022c1c830c9886f670d5f59d69343c3c029a3b20eda1a9d8", size = 24821 } +sdist = { url = "https://files.pythonhosted.org/packages/d2/e3/ac6928586e20cfd327a2a38a00781cbc8fae923edcd0316c23e38aae1537/llama_index_cli-0.5.1.tar.gz", hash = "sha256:0446159d85c56c29022c1c830c9886f670d5f59d69343c3c029a3b20eda1a9d8", size = 24821, upload-time = "2025-09-12T15:22:44.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/16/b53af5b23921d1e18f57b7a79d557b34554df295c63f5c59d5bee1f5fb47/llama_index_cli-0.5.1-py3-none-any.whl", hash = "sha256:5429b2fd7960df7724c2955b6e6901f6fa910b7b5ecef411c979a8b545a6b7e2", size = 28179 }, + { url = "https://files.pythonhosted.org/packages/b3/16/b53af5b23921d1e18f57b7a79d557b34554df295c63f5c59d5bee1f5fb47/llama_index_cli-0.5.1-py3-none-any.whl", hash = "sha256:5429b2fd7960df7724c2955b6e6901f6fa910b7b5ecef411c979a8b545a6b7e2", size = 28179, upload-time = "2025-09-12T15:22:43.169Z" }, ] [[package]] @@ -1647,9 +1647,9 @@ dependencies = [ { name = "typing-inspect" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2d/f8/4f6e2bbc34ec6586456727a644960a1ff2d9db60b92071e213ad9d160456/llama_index_core-0.13.6.tar.gz", hash = "sha256:80315a6bd1f9804f48c1870eff1a0315bf9fe5a413747d53eb88a8ebb2602b97", size = 7232179 } +sdist = { url = "https://files.pythonhosted.org/packages/2d/f8/4f6e2bbc34ec6586456727a644960a1ff2d9db60b92071e213ad9d160456/llama_index_core-0.13.6.tar.gz", hash = "sha256:80315a6bd1f9804f48c1870eff1a0315bf9fe5a413747d53eb88a8ebb2602b97", size = 7232179, upload-time = "2025-09-07T03:27:26.544Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/23/7e497216ece6e041c6a271f2b7952e5609729da0dcdf09dd3f25a4efc1b9/llama_index_core-0.13.6-py3-none-any.whl", hash = "sha256:67bec3c06a8105cd82d83db0f8c3122f4e4d8a4b9c7a2768cced6a2686ddb331", size = 7575324 }, + { url = "https://files.pythonhosted.org/packages/35/23/7e497216ece6e041c6a271f2b7952e5609729da0dcdf09dd3f25a4efc1b9/llama_index_core-0.13.6-py3-none-any.whl", hash = "sha256:67bec3c06a8105cd82d83db0f8c3122f4e4d8a4b9c7a2768cced6a2686ddb331", size = 7575324, upload-time = "2025-09-07T03:27:19.243Z" }, ] [[package]] @@ -1660,9 +1660,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/26/6a/80ed46993c6827786cdec4f6b553f3f4e5fc8741c31e8903c694833d24bf/llama_index_embeddings_openai-0.5.0.tar.gz", hash = "sha256:ac587839a111089ea8a6255f9214016d7a813b383bbbbf9207799be1100758eb", size = 7019 } +sdist = { url = "https://files.pythonhosted.org/packages/26/6a/80ed46993c6827786cdec4f6b553f3f4e5fc8741c31e8903c694833d24bf/llama_index_embeddings_openai-0.5.0.tar.gz", hash = "sha256:ac587839a111089ea8a6255f9214016d7a813b383bbbbf9207799be1100758eb", size = 7019, upload-time = "2025-07-30T19:55:05.699Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/01/21/65f13a385292d7d573dfde472da7daff5f779345d60c5c3e274142ec8ba2/llama_index_embeddings_openai-0.5.0-py3-none-any.whl", hash = "sha256:d817edb22e3ff475e8cd1833faf1147028986bc1d688f7894ef947558864b728", size = 7009 }, + { url = "https://files.pythonhosted.org/packages/01/21/65f13a385292d7d573dfde472da7daff5f779345d60c5c3e274142ec8ba2/llama_index_embeddings_openai-0.5.0-py3-none-any.whl", hash = "sha256:d817edb22e3ff475e8cd1833faf1147028986bc1d688f7894ef947558864b728", size = 7009, upload-time = "2025-07-30T19:55:04.86Z" }, ] [[package]] @@ -1674,9 +1674,9 @@ dependencies = [ { name = "llama-cloud" }, { name = "llama-index-core" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/4a/79044fcb3209583d1ffe0c2a7c19dddfb657a03faeb9fe0cf5a74027e646/llama_index_indices_managed_llama_cloud-0.9.4.tar.gz", hash = "sha256:b5e00752ab30564abf19c57595a2107f5697c3b03b085817b4fca84a38ebbd59", size = 15146 } +sdist = { url = "https://files.pythonhosted.org/packages/61/4a/79044fcb3209583d1ffe0c2a7c19dddfb657a03faeb9fe0cf5a74027e646/llama_index_indices_managed_llama_cloud-0.9.4.tar.gz", hash = "sha256:b5e00752ab30564abf19c57595a2107f5697c3b03b085817b4fca84a38ebbd59", size = 15146, upload-time = "2025-09-08T20:29:58.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/6a/0e33245df06afc9766c46a1fe92687be8a09da5d0d0128bc08d84a9f5efa/llama_index_indices_managed_llama_cloud-0.9.4-py3-none-any.whl", hash = "sha256:535a08811046803ca6ab7f8e9d510e926aa5306608b02201ad3d9d21701383bc", size = 17005 }, + { url = "https://files.pythonhosted.org/packages/a6/6a/0e33245df06afc9766c46a1fe92687be8a09da5d0d0128bc08d84a9f5efa/llama_index_indices_managed_llama_cloud-0.9.4-py3-none-any.whl", hash = "sha256:535a08811046803ca6ab7f8e9d510e926aa5306608b02201ad3d9d21701383bc", size = 17005, upload-time = "2025-09-08T20:29:57.876Z" }, ] [[package]] @@ -1687,9 +1687,9 @@ dependencies = [ { name = "deprecated" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/70/e5/a3628da5d716d6bbc2c0a8d39b629dff81b33d5625c5b934e1456370064f/llama_index_instrumentation-0.4.1.tar.gz", hash = "sha256:a79d0dd2baba34f05ff4354d63a99b212322635b8afa6cc96ed00a7e11ebfdc3", size = 45788 } +sdist = { url = "https://files.pythonhosted.org/packages/70/e5/a3628da5d716d6bbc2c0a8d39b629dff81b33d5625c5b934e1456370064f/llama_index_instrumentation-0.4.1.tar.gz", hash = "sha256:a79d0dd2baba34f05ff4354d63a99b212322635b8afa6cc96ed00a7e11ebfdc3", size = 45788, upload-time = "2025-09-15T03:53:00.219Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/7a/c414f4dc9a7dd90d050c387489436bab2d678a566b704ede2f5b62f82ad7/llama_index_instrumentation-0.4.1-py3-none-any.whl", hash = "sha256:0d3ac926d0db3d39c0ec34ee72da5322d61e06b87fe956407e4a1e7a2708b936", size = 15063 }, + { url = "https://files.pythonhosted.org/packages/3a/7a/c414f4dc9a7dd90d050c387489436bab2d678a566b704ede2f5b62f82ad7/llama_index_instrumentation-0.4.1-py3-none-any.whl", hash = "sha256:0d3ac926d0db3d39c0ec34ee72da5322d61e06b87fe956407e4a1e7a2708b936", size = 15063, upload-time = "2025-09-15T03:52:59.098Z" }, ] [[package]] @@ -1700,9 +1700,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "openai" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/59/4c414d79a21189d9db6de58ecbc297cd0f5ea121803b836bd134c67dd7a3/llama_index_llms_openai-0.5.4.tar.gz", hash = "sha256:9e36b6d2fc5f056b00ee655901b3bb7e7060b23f7b19439889fb78d696340f54", size = 24230 } +sdist = { url = "https://files.pythonhosted.org/packages/20/59/4c414d79a21189d9db6de58ecbc297cd0f5ea121803b836bd134c67dd7a3/llama_index_llms_openai-0.5.4.tar.gz", hash = "sha256:9e36b6d2fc5f056b00ee655901b3bb7e7060b23f7b19439889fb78d696340f54", size = 24230, upload-time = "2025-08-16T22:41:17.408Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/62/aec65450b8d7ba723fa557884ac34d94b2b8f3876a54249c05d240a2be6c/llama_index_llms_openai-0.5.4-py3-none-any.whl", hash = "sha256:8d42fbfa56b5f281ad0dfcb2915916c188b5876625f9f8d27016b7dc4366cc24", size = 25357 }, + { url = "https://files.pythonhosted.org/packages/ad/62/aec65450b8d7ba723fa557884ac34d94b2b8f3876a54249c05d240a2be6c/llama_index_llms_openai-0.5.4-py3-none-any.whl", hash = "sha256:8d42fbfa56b5f281ad0dfcb2915916c188b5876625f9f8d27016b7dc4366cc24", size = 25357, upload-time = "2025-08-16T22:41:16.472Z" }, ] [[package]] @@ -1717,9 +1717,9 @@ dependencies = [ { name = "pypdf" }, { name = "striprtf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/01/d9/c67ad2b9cba8dacf1d4a55fe5432357b6eceaecfb096a0de5c1cbd959b98/llama_index_readers_file-0.5.4.tar.gz", hash = "sha256:5e766f32597622e66529464101914548ad683770a0a5d2bdc9ee84eb3a110332", size = 32565 } +sdist = { url = "https://files.pythonhosted.org/packages/01/d9/c67ad2b9cba8dacf1d4a55fe5432357b6eceaecfb096a0de5c1cbd959b98/llama_index_readers_file-0.5.4.tar.gz", hash = "sha256:5e766f32597622e66529464101914548ad683770a0a5d2bdc9ee84eb3a110332", size = 32565, upload-time = "2025-09-08T20:39:40.287Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/e3/76d72a7281b9c88d488908731c9034e1ee1a2cad5aa1dead76b051eca989/llama_index_readers_file-0.5.4-py3-none-any.whl", hash = "sha256:135be5ddda66c5b35883911918b2d99f67a2ab010d180af5630c872ea9509d45", size = 51827 }, + { url = "https://files.pythonhosted.org/packages/ea/e3/76d72a7281b9c88d488908731c9034e1ee1a2cad5aa1dead76b051eca989/llama_index_readers_file-0.5.4-py3-none-any.whl", hash = "sha256:135be5ddda66c5b35883911918b2d99f67a2ab010d180af5630c872ea9509d45", size = 51827, upload-time = "2025-09-08T20:39:39.408Z" }, ] [[package]] @@ -1730,9 +1730,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "llama-parse" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b3/77/5bfaab20e6ec8428dbf2352e18be550c957602723d69383908176b5686cd/llama_index_readers_llama_parse-0.5.1.tar.gz", hash = "sha256:2b78b73faa933e30e6c69df351e4e9f36dfe2ae142e2ab3969ddd2ac48930e37", size = 3858 } +sdist = { url = "https://files.pythonhosted.org/packages/b3/77/5bfaab20e6ec8428dbf2352e18be550c957602723d69383908176b5686cd/llama_index_readers_llama_parse-0.5.1.tar.gz", hash = "sha256:2b78b73faa933e30e6c69df351e4e9f36dfe2ae142e2ab3969ddd2ac48930e37", size = 3858, upload-time = "2025-09-08T20:41:29.201Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/81/52410c7245dcbf1a54756a9ce3892cdd167ec0b884d696de1304ca3f452e/llama_index_readers_llama_parse-0.5.1-py3-none-any.whl", hash = "sha256:0d41450ed29b0c49c024e206ef6c8e662b1854e77a1c5faefed3b958be54f880", size = 3203 }, + { url = "https://files.pythonhosted.org/packages/68/81/52410c7245dcbf1a54756a9ce3892cdd167ec0b884d696de1304ca3f452e/llama_index_readers_llama_parse-0.5.1-py3-none-any.whl", hash = "sha256:0d41450ed29b0c49c024e206ef6c8e662b1854e77a1c5faefed3b958be54f880", size = 3203, upload-time = "2025-09-08T20:41:28.438Z" }, ] [[package]] @@ -1743,9 +1743,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "pymilvus" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/92/50/428b4af2d65b3f0ec0b41638579a5d67c027d64f46c2e11769975737f0ef/llama_index_vector_stores_milvus-0.9.0.tar.gz", hash = "sha256:938f002aa0817c3afc85f233791fdeefd87093e806c5108411f07d8d616b3d30", size = 15284 } +sdist = { url = "https://files.pythonhosted.org/packages/92/50/428b4af2d65b3f0ec0b41638579a5d67c027d64f46c2e11769975737f0ef/llama_index_vector_stores_milvus-0.9.0.tar.gz", hash = "sha256:938f002aa0817c3afc85f233791fdeefd87093e806c5108411f07d8d616b3d30", size = 15284, upload-time = "2025-07-30T21:12:38.4Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/fa/56b1d6626a4fcd968a940b13683b181cfd14bdb8b348772bedfa82b7e71d/llama_index_vector_stores_milvus-0.9.0-py3-none-any.whl", hash = "sha256:a08e20e72816c7b81cb82d27211e63ca175e4683b07e954adef1bae7a2c844f7", size = 15563 }, + { url = "https://files.pythonhosted.org/packages/ac/fa/56b1d6626a4fcd968a940b13683b181cfd14bdb8b348772bedfa82b7e71d/llama_index_vector_stores_milvus-0.9.0-py3-none-any.whl", hash = "sha256:a08e20e72816c7b81cb82d27211e63ca175e4683b07e954adef1bae7a2c844f7", size = 15563, upload-time = "2025-07-30T21:12:37.465Z" }, ] [[package]] @@ -1756,9 +1756,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "pinecone" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/13/31/9be27780523a4784bea5cf7910004b0e805b9fef09a4a5ed3af38757cb2b/llama_index_vector_stores_pinecone-0.7.0.tar.gz", hash = "sha256:72f4828115d5857249fc7d7a0753a6b1c2644c929687d86f5bed41274e5b7e76", size = 7852 } +sdist = { url = "https://files.pythonhosted.org/packages/13/31/9be27780523a4784bea5cf7910004b0e805b9fef09a4a5ed3af38757cb2b/llama_index_vector_stores_pinecone-0.7.0.tar.gz", hash = "sha256:72f4828115d5857249fc7d7a0753a6b1c2644c929687d86f5bed41274e5b7e76", size = 7852, upload-time = "2025-07-30T20:54:28.213Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/de/901d76d42474cce0aa8c054ee76e4dc9967d8df84907797ab99b3423d988/llama_index_vector_stores_pinecone-0.7.0-py3-none-any.whl", hash = "sha256:023ac4cde067f7154cc90534b72388c0b6905eaa41f30c7ef1446f67e3549b25", size = 8039 }, + { url = "https://files.pythonhosted.org/packages/54/de/901d76d42474cce0aa8c054ee76e4dc9967d8df84907797ab99b3423d988/llama_index_vector_stores_pinecone-0.7.0-py3-none-any.whl", hash = "sha256:023ac4cde067f7154cc90534b72388c0b6905eaa41f30c7ef1446f67e3549b25", size = 8039, upload-time = "2025-07-30T20:54:27.487Z" }, ] [[package]] @@ -1772,9 +1772,9 @@ dependencies = [ { name = "psycopg2-binary" }, { name = "sqlalchemy", extra = ["asyncio"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/2e/ddd8accef30a39f8ffb7bae9f5a5c91ba5f1f45ede1d55c73ba78e61e23a/llama_index_vector_stores_postgres-0.6.3.tar.gz", hash = "sha256:b15d2e7c3bf2a0b18754934a84cf5324403b9401e2b31bcdb00418ed2d03770c", size = 11316 } +sdist = { url = "https://files.pythonhosted.org/packages/a3/2e/ddd8accef30a39f8ffb7bae9f5a5c91ba5f1f45ede1d55c73ba78e61e23a/llama_index_vector_stores_postgres-0.6.3.tar.gz", hash = "sha256:b15d2e7c3bf2a0b18754934a84cf5324403b9401e2b31bcdb00418ed2d03770c", size = 11316, upload-time = "2025-08-12T12:36:35.281Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/cd/0aa5189615f33e805d8bc306d8a0f646892b55245e88fe6fb8df61059f66/llama_index_vector_stores_postgres-0.6.3-py3-none-any.whl", hash = "sha256:6086b7d450bf1204eb5523cd924c8395fc9cbd212f337d1caef18ce41cefc198", size = 11042 }, + { url = "https://files.pythonhosted.org/packages/a0/cd/0aa5189615f33e805d8bc306d8a0f646892b55245e88fe6fb8df61059f66/llama_index_vector_stores_postgres-0.6.3-py3-none-any.whl", hash = "sha256:6086b7d450bf1204eb5523cd924c8395fc9cbd212f337d1caef18ce41cefc198", size = 11042, upload-time = "2025-08-12T12:36:33.019Z" }, ] [[package]] @@ -1786,9 +1786,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "qdrant-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/88/84/441a41a34dea214c89e3cabc177f07615ba4b434d46a70ba810c8c3c5bcd/llama_index_vector_stores_qdrant-0.7.1.tar.gz", hash = "sha256:d51a561dc5aad270c4bbed72370cea9002e4b72d0038ec5b465f6bcdb67b1213", size = 13013 } +sdist = { url = "https://files.pythonhosted.org/packages/88/84/441a41a34dea214c89e3cabc177f07615ba4b434d46a70ba810c8c3c5bcd/llama_index_vector_stores_qdrant-0.7.1.tar.gz", hash = "sha256:d51a561dc5aad270c4bbed72370cea9002e4b72d0038ec5b465f6bcdb67b1213", size = 13013, upload-time = "2025-07-31T18:18:55.931Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/b3/623615e44ff4c19ca593a620eef670cad9bed78fe6e4d364753415b71aa0/llama_index_vector_stores_qdrant-0.7.1-py3-none-any.whl", hash = "sha256:f48eeb9228f7dc7e4d41a55d76dcf6d93b8bfbea1c943c09140a09252018f577", size = 13204 }, + { url = "https://files.pythonhosted.org/packages/44/b3/623615e44ff4c19ca593a620eef670cad9bed78fe6e4d364753415b71aa0/llama_index_vector_stores_qdrant-0.7.1-py3-none-any.whl", hash = "sha256:f48eeb9228f7dc7e4d41a55d76dcf6d93b8bfbea1c943c09140a09252018f577", size = 13204, upload-time = "2025-07-31T18:18:54.364Z" }, ] [[package]] @@ -1799,9 +1799,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "weaviate-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/ab/6da9ec13e8c1a6dc2a00eb17074388a4720d66252f9b784b725f2282ca5e/llama_index_vector_stores_weaviate-1.4.0.tar.gz", hash = "sha256:c5374406b90b4f27455c623a84f56c6df3d71408ffac8984cab39edc8f6a748e", size = 8535 } +sdist = { url = "https://files.pythonhosted.org/packages/5e/ab/6da9ec13e8c1a6dc2a00eb17074388a4720d66252f9b784b725f2282ca5e/llama_index_vector_stores_weaviate-1.4.0.tar.gz", hash = "sha256:c5374406b90b4f27455c623a84f56c6df3d71408ffac8984cab39edc8f6a748e", size = 8535, upload-time = "2025-07-30T20:57:22.275Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/70/aef7524a6ed14f79dca84685559045b303cb43f11a38b9f790e6274115e2/llama_index_vector_stores_weaviate-1.4.0-py3-none-any.whl", hash = "sha256:5e3ac7e499e20988f8165c7dfa223b64714572164114e5818c3d51ff273a0c53", size = 9326 }, + { url = "https://files.pythonhosted.org/packages/e6/70/aef7524a6ed14f79dca84685559045b303cb43f11a38b9f790e6274115e2/llama_index_vector_stores_weaviate-1.4.0-py3-none-any.whl", hash = "sha256:5e3ac7e499e20988f8165c7dfa223b64714572164114e5818c3d51ff273a0c53", size = 9326, upload-time = "2025-07-30T20:57:21.207Z" }, ] [[package]] @@ -1813,9 +1813,9 @@ dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/00/dc/54fd5dec0ad3c65f3e8a520db7a3024141b71cd41660d0baca3cd6b18707/llama_index_workflows-1.3.0.tar.gz", hash = "sha256:9c1688e237efad384f16485af71c6f9456a2eb6d85bf61ff49e5717f10ff286d", size = 1040839 } +sdist = { url = "https://files.pythonhosted.org/packages/00/dc/54fd5dec0ad3c65f3e8a520db7a3024141b71cd41660d0baca3cd6b18707/llama_index_workflows-1.3.0.tar.gz", hash = "sha256:9c1688e237efad384f16485af71c6f9456a2eb6d85bf61ff49e5717f10ff286d", size = 1040839, upload-time = "2025-08-07T09:11:00.307Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/cf/0c50bc6e5c4fb7913f5682a0d26a60b976533dd8a87a5dbd84f617c6f1ab/llama_index_workflows-1.3.0-py3-none-any.whl", hash = "sha256:328cc25d92b014ef527f105a2f2088c0924fff0494e53d93decb951f14fbfe47", size = 42527 }, + { url = "https://files.pythonhosted.org/packages/3b/cf/0c50bc6e5c4fb7913f5682a0d26a60b976533dd8a87a5dbd84f617c6f1ab/llama_index_workflows-1.3.0-py3-none-any.whl", hash = "sha256:328cc25d92b014ef527f105a2f2088c0924fff0494e53d93decb951f14fbfe47", size = 42527, upload-time = "2025-08-07T09:10:59.155Z" }, ] [[package]] @@ -1827,9 +1827,9 @@ dependencies = [ { name = "llama-index-core" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3b/02/63839a55f6f207110400c4f394152fd0290e9f8e450226b02a87cfdbd835/llama_parse-0.5.19.tar.gz", hash = "sha256:db69da70e199a2664705eb983a70fa92b7cee19dd6cff175af7692a0b8a4dd53", size = 16100 } +sdist = { url = "https://files.pythonhosted.org/packages/3b/02/63839a55f6f207110400c4f394152fd0290e9f8e450226b02a87cfdbd835/llama_parse-0.5.19.tar.gz", hash = "sha256:db69da70e199a2664705eb983a70fa92b7cee19dd6cff175af7692a0b8a4dd53", size = 16100, upload-time = "2024-12-27T19:08:43.051Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/b7/3ff106e8199992bb62e72f195c8f6f2f2fe4a185f5f92746f0ed9db5c5d2/llama_parse-0.5.19-py3-none-any.whl", hash = "sha256:715cc895d183531b4299359d4f4004089b2e522f5f137f316084e7aa04035b62", size = 15421 }, + { url = "https://files.pythonhosted.org/packages/38/b7/3ff106e8199992bb62e72f195c8f6f2f2fe4a185f5f92746f0ed9db5c5d2/llama_parse-0.5.19-py3-none-any.whl", hash = "sha256:715cc895d183531b4299359d4f4004089b2e522f5f137f316084e7aa04035b62", size = 15421, upload-time = "2024-12-27T19:08:41.974Z" }, ] [[package]] @@ -1839,9 +1839,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/2d/3657ff470c491c3778ae519e51756b2aee8b8ba25bf4d0fd0a77662146f5/llmwhisperer_client-2.5.0.tar.gz", hash = "sha256:8d08df695ca74513ca904ddb42620ecf70a1eb8b432872ba15fbf238529245ac", size = 3261186 } +sdist = { url = "https://files.pythonhosted.org/packages/ac/2d/3657ff470c491c3778ae519e51756b2aee8b8ba25bf4d0fd0a77662146f5/llmwhisperer_client-2.5.0.tar.gz", hash = "sha256:8d08df695ca74513ca904ddb42620ecf70a1eb8b432872ba15fbf238529245ac", size = 3261186, upload-time = "2025-11-04T12:50:58.969Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/dc/f99fb0903d244066ef1207b7b465e3acfd68df575b129e45e397f05bc1f4/llmwhisperer_client-2.5.0-py3-none-any.whl", hash = "sha256:b637aa914875a25b76de60cb0e92be9237d554967380d2aeeab70ecadcff9bab", size = 9733 }, + { url = "https://files.pythonhosted.org/packages/0b/dc/f99fb0903d244066ef1207b7b465e3acfd68df575b129e45e397f05bc1f4/llmwhisperer_client-2.5.0-py3-none-any.whl", hash = "sha256:b637aa914875a25b76de60cb0e92be9237d554967380d2aeeab70ecadcff9bab", size = 9733, upload-time = "2025-11-04T12:50:57.671Z" }, ] [[package]] @@ -1851,28 +1851,28 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070 } +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321 }, + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] [[package]] name = "markupsafe" version = "3.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, - { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, - { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, - { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, - { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, - { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, - { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, - { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, - { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, - { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, - { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, ] [[package]] @@ -1882,18 +1882,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/5e/5e53d26b42ab75491cda89b871dab9e97c840bf12c63ec58a1919710cd06/marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6", size = 221825 } +sdist = { url = "https://files.pythonhosted.org/packages/ab/5e/5e53d26b42ab75491cda89b871dab9e97c840bf12c63ec58a1919710cd06/marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6", size = 221825, upload-time = "2025-02-03T15:32:25.093Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/75/51952c7b2d3873b44a0028b1bd26a25078c18f92f256608e8d1dc61b39fd/marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", size = 50878 }, + { url = "https://files.pythonhosted.org/packages/34/75/51952c7b2d3873b44a0028b1bd26a25078c18f92f256608e8d1dc61b39fd/marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", size = 50878, upload-time = "2025-02-03T15:32:22.295Z" }, ] [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] [[package]] @@ -1904,10 +1904,10 @@ dependencies = [ { name = "tqdm" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713 }, - { url = "https://files.pythonhosted.org/packages/9b/2e/746f5bb1d6facd1e73eb4af6dd5efda11125b0f29d7908a097485ca6cad9/milvus_lite-2.5.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a2e031088bf308afe5f8567850412d618cfb05a65238ed1a6117f60decccc95a", size = 24421451 }, - { url = "https://files.pythonhosted.org/packages/2e/cf/3d1fee5c16c7661cf53977067a34820f7269ed8ba99fe9cf35efc1700866/milvus_lite-2.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:a13277e9bacc6933dea172e42231f7e6135bd3bdb073dd2688ee180418abd8d9", size = 45337093 }, - { url = "https://files.pythonhosted.org/packages/d3/82/41d9b80f09b82e066894d9b508af07b7b0fa325ce0322980674de49106a0/milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25ce13f4b8d46876dd2b7ac8563d7d8306da7ff3999bb0d14b116b30f71d706c", size = 55263911 }, + { url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713, upload-time = "2025-06-30T04:23:37.028Z" }, + { url = "https://files.pythonhosted.org/packages/9b/2e/746f5bb1d6facd1e73eb4af6dd5efda11125b0f29d7908a097485ca6cad9/milvus_lite-2.5.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a2e031088bf308afe5f8567850412d618cfb05a65238ed1a6117f60decccc95a", size = 24421451, upload-time = "2025-06-30T04:23:51.747Z" }, + { url = "https://files.pythonhosted.org/packages/2e/cf/3d1fee5c16c7661cf53977067a34820f7269ed8ba99fe9cf35efc1700866/milvus_lite-2.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:a13277e9bacc6933dea172e42231f7e6135bd3bdb073dd2688ee180418abd8d9", size = 45337093, upload-time = "2025-06-30T04:24:06.706Z" }, + { url = "https://files.pythonhosted.org/packages/d3/82/41d9b80f09b82e066894d9b508af07b7b0fa325ce0322980674de49106a0/milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25ce13f4b8d46876dd2b7ac8563d7d8306da7ff3999bb0d14b116b30f71d706c", size = 55263911, upload-time = "2025-06-30T04:24:19.434Z" }, ] [[package]] @@ -1919,9 +1919,9 @@ dependencies = [ { name = "pyjwt", extra = ["crypto"] }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cf/0e/c857c46d653e104019a84f22d4494f2119b4fe9f896c92b4b864b3b045cc/msal-1.34.0.tar.gz", hash = "sha256:76ba83b716ea5a6d75b0279c0ac353a0e05b820ca1f6682c0eb7f45190c43c2f", size = 153961 } +sdist = { url = "https://files.pythonhosted.org/packages/cf/0e/c857c46d653e104019a84f22d4494f2119b4fe9f896c92b4b864b3b045cc/msal-1.34.0.tar.gz", hash = "sha256:76ba83b716ea5a6d75b0279c0ac353a0e05b820ca1f6682c0eb7f45190c43c2f", size = 153961, upload-time = "2025-09-22T23:05:48.989Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/dc/18d48843499e278538890dc709e9ee3dea8375f8be8e82682851df1b48b5/msal-1.34.0-py3-none-any.whl", hash = "sha256:f669b1644e4950115da7a176441b0e13ec2975c29528d8b9e81316023676d6e1", size = 116987 }, + { url = "https://files.pythonhosted.org/packages/c2/dc/18d48843499e278538890dc709e9ee3dea8375f8be8e82682851df1b48b5/msal-1.34.0-py3-none-any.whl", hash = "sha256:f669b1644e4950115da7a176441b0e13ec2975c29528d8b9e81316023676d6e1", size = 116987, upload-time = "2025-09-22T23:05:47.294Z" }, ] [[package]] @@ -1931,36 +1931,36 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "msal" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315 } +sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583 }, + { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, ] [[package]] name = "multidict" version = "6.6.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/69/7f/0652e6ed47ab288e3756ea9c0df8b14950781184d4bd7883f4d87dd41245/multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd", size = 101843 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/f6/512ffd8fd8b37fb2680e5ac35d788f1d71bbaf37789d21a820bdc441e565/multidict-6.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0ffb87be160942d56d7b87b0fdf098e81ed565add09eaa1294268c7f3caac4c8", size = 76516 }, - { url = "https://files.pythonhosted.org/packages/99/58/45c3e75deb8855c36bd66cc1658007589662ba584dbf423d01df478dd1c5/multidict-6.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d191de6cbab2aff5de6c5723101705fd044b3e4c7cfd587a1929b5028b9714b3", size = 45394 }, - { url = "https://files.pythonhosted.org/packages/fd/ca/e8c4472a93a26e4507c0b8e1f0762c0d8a32de1328ef72fd704ef9cc5447/multidict-6.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38a0956dd92d918ad5feff3db8fcb4a5eb7dba114da917e1a88475619781b57b", size = 43591 }, - { url = "https://files.pythonhosted.org/packages/05/51/edf414f4df058574a7265034d04c935aa84a89e79ce90fcf4df211f47b16/multidict-6.6.4-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6865f6d3b7900ae020b495d599fcf3765653bc927951c1abb959017f81ae8287", size = 237215 }, - { url = "https://files.pythonhosted.org/packages/c8/45/8b3d6dbad8cf3252553cc41abea09ad527b33ce47a5e199072620b296902/multidict-6.6.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a2088c126b6f72db6c9212ad827d0ba088c01d951cee25e758c450da732c138", size = 258299 }, - { url = "https://files.pythonhosted.org/packages/3c/e8/8ca2e9a9f5a435fc6db40438a55730a4bf4956b554e487fa1b9ae920f825/multidict-6.6.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0f37bed7319b848097085d7d48116f545985db988e2256b2e6f00563a3416ee6", size = 242357 }, - { url = "https://files.pythonhosted.org/packages/0f/84/80c77c99df05a75c28490b2af8f7cba2a12621186e0a8b0865d8e745c104/multidict-6.6.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:01368e3c94032ba6ca0b78e7ccb099643466cf24f8dc8eefcfdc0571d56e58f9", size = 268369 }, - { url = "https://files.pythonhosted.org/packages/0d/e9/920bfa46c27b05fb3e1ad85121fd49f441492dca2449c5bcfe42e4565d8a/multidict-6.6.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fe323540c255db0bffee79ad7f048c909f2ab0edb87a597e1c17da6a54e493c", size = 269341 }, - { url = "https://files.pythonhosted.org/packages/af/65/753a2d8b05daf496f4a9c367fe844e90a1b2cac78e2be2c844200d10cc4c/multidict-6.6.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8eb3025f17b0a4c3cd08cda49acf312a19ad6e8a4edd9dbd591e6506d999402", size = 256100 }, - { url = "https://files.pythonhosted.org/packages/09/54/655be13ae324212bf0bc15d665a4e34844f34c206f78801be42f7a0a8aaa/multidict-6.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbc14f0365534d35a06970d6a83478b249752e922d662dc24d489af1aa0d1be7", size = 253584 }, - { url = "https://files.pythonhosted.org/packages/5c/74/ab2039ecc05264b5cec73eb018ce417af3ebb384ae9c0e9ed42cb33f8151/multidict-6.6.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:75aa52fba2d96bf972e85451b99d8e19cc37ce26fd016f6d4aa60da9ab2b005f", size = 251018 }, - { url = "https://files.pythonhosted.org/packages/af/0a/ccbb244ac848e56c6427f2392741c06302bbfba49c0042f1eb3c5b606497/multidict-6.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fefd4a815e362d4f011919d97d7b4a1e566f1dde83dc4ad8cfb5b41de1df68d", size = 251477 }, - { url = "https://files.pythonhosted.org/packages/0e/b0/0ed49bba775b135937f52fe13922bc64a7eaf0a3ead84a36e8e4e446e096/multidict-6.6.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:db9801fe021f59a5b375ab778973127ca0ac52429a26e2fd86aa9508f4d26eb7", size = 263575 }, - { url = "https://files.pythonhosted.org/packages/3e/d9/7fb85a85e14de2e44dfb6a24f03c41e2af8697a6df83daddb0e9b7569f73/multidict-6.6.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a650629970fa21ac1fb06ba25dabfc5b8a2054fcbf6ae97c758aa956b8dba802", size = 259649 }, - { url = "https://files.pythonhosted.org/packages/03/9e/b3a459bcf9b6e74fa461a5222a10ff9b544cb1cd52fd482fb1b75ecda2a2/multidict-6.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:452ff5da78d4720d7516a3a2abd804957532dd69296cb77319c193e3ffb87e24", size = 251505 }, - { url = "https://files.pythonhosted.org/packages/86/a2/8022f78f041dfe6d71e364001a5cf987c30edfc83c8a5fb7a3f0974cff39/multidict-6.6.4-cp312-cp312-win32.whl", hash = "sha256:8c2fcb12136530ed19572bbba61b407f655e3953ba669b96a35036a11a485793", size = 41888 }, - { url = "https://files.pythonhosted.org/packages/c7/eb/d88b1780d43a56db2cba24289fa744a9d216c1a8546a0dc3956563fd53ea/multidict-6.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:047d9425860a8c9544fed1b9584f0c8bcd31bcde9568b047c5e567a1025ecd6e", size = 46072 }, - { url = "https://files.pythonhosted.org/packages/9f/16/b929320bf5750e2d9d4931835a4c638a19d2494a5b519caaaa7492ebe105/multidict-6.6.4-cp312-cp312-win_arm64.whl", hash = "sha256:14754eb72feaa1e8ae528468f24250dd997b8e2188c3d2f593f9eba259e4b364", size = 43222 }, - { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313 }, +sdist = { url = "https://files.pythonhosted.org/packages/69/7f/0652e6ed47ab288e3756ea9c0df8b14950781184d4bd7883f4d87dd41245/multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd", size = 101843, upload-time = "2025-08-11T12:08:48.217Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/f6/512ffd8fd8b37fb2680e5ac35d788f1d71bbaf37789d21a820bdc441e565/multidict-6.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0ffb87be160942d56d7b87b0fdf098e81ed565add09eaa1294268c7f3caac4c8", size = 76516, upload-time = "2025-08-11T12:06:53.393Z" }, + { url = "https://files.pythonhosted.org/packages/99/58/45c3e75deb8855c36bd66cc1658007589662ba584dbf423d01df478dd1c5/multidict-6.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d191de6cbab2aff5de6c5723101705fd044b3e4c7cfd587a1929b5028b9714b3", size = 45394, upload-time = "2025-08-11T12:06:54.555Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ca/e8c4472a93a26e4507c0b8e1f0762c0d8a32de1328ef72fd704ef9cc5447/multidict-6.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38a0956dd92d918ad5feff3db8fcb4a5eb7dba114da917e1a88475619781b57b", size = 43591, upload-time = "2025-08-11T12:06:55.672Z" }, + { url = "https://files.pythonhosted.org/packages/05/51/edf414f4df058574a7265034d04c935aa84a89e79ce90fcf4df211f47b16/multidict-6.6.4-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6865f6d3b7900ae020b495d599fcf3765653bc927951c1abb959017f81ae8287", size = 237215, upload-time = "2025-08-11T12:06:57.213Z" }, + { url = "https://files.pythonhosted.org/packages/c8/45/8b3d6dbad8cf3252553cc41abea09ad527b33ce47a5e199072620b296902/multidict-6.6.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a2088c126b6f72db6c9212ad827d0ba088c01d951cee25e758c450da732c138", size = 258299, upload-time = "2025-08-11T12:06:58.946Z" }, + { url = "https://files.pythonhosted.org/packages/3c/e8/8ca2e9a9f5a435fc6db40438a55730a4bf4956b554e487fa1b9ae920f825/multidict-6.6.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0f37bed7319b848097085d7d48116f545985db988e2256b2e6f00563a3416ee6", size = 242357, upload-time = "2025-08-11T12:07:00.301Z" }, + { url = "https://files.pythonhosted.org/packages/0f/84/80c77c99df05a75c28490b2af8f7cba2a12621186e0a8b0865d8e745c104/multidict-6.6.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:01368e3c94032ba6ca0b78e7ccb099643466cf24f8dc8eefcfdc0571d56e58f9", size = 268369, upload-time = "2025-08-11T12:07:01.638Z" }, + { url = "https://files.pythonhosted.org/packages/0d/e9/920bfa46c27b05fb3e1ad85121fd49f441492dca2449c5bcfe42e4565d8a/multidict-6.6.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fe323540c255db0bffee79ad7f048c909f2ab0edb87a597e1c17da6a54e493c", size = 269341, upload-time = "2025-08-11T12:07:02.943Z" }, + { url = "https://files.pythonhosted.org/packages/af/65/753a2d8b05daf496f4a9c367fe844e90a1b2cac78e2be2c844200d10cc4c/multidict-6.6.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8eb3025f17b0a4c3cd08cda49acf312a19ad6e8a4edd9dbd591e6506d999402", size = 256100, upload-time = "2025-08-11T12:07:04.564Z" }, + { url = "https://files.pythonhosted.org/packages/09/54/655be13ae324212bf0bc15d665a4e34844f34c206f78801be42f7a0a8aaa/multidict-6.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbc14f0365534d35a06970d6a83478b249752e922d662dc24d489af1aa0d1be7", size = 253584, upload-time = "2025-08-11T12:07:05.914Z" }, + { url = "https://files.pythonhosted.org/packages/5c/74/ab2039ecc05264b5cec73eb018ce417af3ebb384ae9c0e9ed42cb33f8151/multidict-6.6.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:75aa52fba2d96bf972e85451b99d8e19cc37ce26fd016f6d4aa60da9ab2b005f", size = 251018, upload-time = "2025-08-11T12:07:08.301Z" }, + { url = "https://files.pythonhosted.org/packages/af/0a/ccbb244ac848e56c6427f2392741c06302bbfba49c0042f1eb3c5b606497/multidict-6.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fefd4a815e362d4f011919d97d7b4a1e566f1dde83dc4ad8cfb5b41de1df68d", size = 251477, upload-time = "2025-08-11T12:07:10.248Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b0/0ed49bba775b135937f52fe13922bc64a7eaf0a3ead84a36e8e4e446e096/multidict-6.6.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:db9801fe021f59a5b375ab778973127ca0ac52429a26e2fd86aa9508f4d26eb7", size = 263575, upload-time = "2025-08-11T12:07:11.928Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d9/7fb85a85e14de2e44dfb6a24f03c41e2af8697a6df83daddb0e9b7569f73/multidict-6.6.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a650629970fa21ac1fb06ba25dabfc5b8a2054fcbf6ae97c758aa956b8dba802", size = 259649, upload-time = "2025-08-11T12:07:13.244Z" }, + { url = "https://files.pythonhosted.org/packages/03/9e/b3a459bcf9b6e74fa461a5222a10ff9b544cb1cd52fd482fb1b75ecda2a2/multidict-6.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:452ff5da78d4720d7516a3a2abd804957532dd69296cb77319c193e3ffb87e24", size = 251505, upload-time = "2025-08-11T12:07:14.57Z" }, + { url = "https://files.pythonhosted.org/packages/86/a2/8022f78f041dfe6d71e364001a5cf987c30edfc83c8a5fb7a3f0974cff39/multidict-6.6.4-cp312-cp312-win32.whl", hash = "sha256:8c2fcb12136530ed19572bbba61b407f655e3953ba669b96a35036a11a485793", size = 41888, upload-time = "2025-08-11T12:07:15.904Z" }, + { url = "https://files.pythonhosted.org/packages/c7/eb/d88b1780d43a56db2cba24289fa744a9d216c1a8546a0dc3956563fd53ea/multidict-6.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:047d9425860a8c9544fed1b9584f0c8bcd31bcde9568b047c5e567a1025ecd6e", size = 46072, upload-time = "2025-08-11T12:07:17.045Z" }, + { url = "https://files.pythonhosted.org/packages/9f/16/b929320bf5750e2d9d4931835a4c638a19d2494a5b519caaaa7492ebe105/multidict-6.6.4-cp312-cp312-win_arm64.whl", hash = "sha256:14754eb72feaa1e8ae528468f24250dd997b8e2188c3d2f593f9eba259e4b364", size = 43222, upload-time = "2025-08-11T12:07:18.328Z" }, + { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313, upload-time = "2025-08-11T12:08:46.891Z" }, ] [[package]] @@ -1971,41 +1971,41 @@ dependencies = [ { name = "mypy-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/1e/a587a862c766a755a58b62d8c00aed11b74a15dc415c1bf5da7b607b0efd/mypy-1.9.0.tar.gz", hash = "sha256:3cc5da0127e6a478cddd906068496a97a7618a21ce9b54bde5bf7e539c7af974", size = 2995901 } +sdist = { url = "https://files.pythonhosted.org/packages/72/1e/a587a862c766a755a58b62d8c00aed11b74a15dc415c1bf5da7b607b0efd/mypy-1.9.0.tar.gz", hash = "sha256:3cc5da0127e6a478cddd906068496a97a7618a21ce9b54bde5bf7e539c7af974", size = 2995901, upload-time = "2024-03-08T16:10:12.412Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/96/40f0f605b1d4e2ad1fb11d21988ce3a3e205886c0fcbd35c9789a214de9a/mypy-1.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aceb1db093b04db5cd390821464504111b8ec3e351eb85afd1433490163d60cd", size = 10725390 }, - { url = "https://files.pythonhosted.org/packages/d7/d2/072e40384b53051106b4fcf03537fb88e2a6ad0757d2ab7f6c8c2f188a69/mypy-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0235391f1c6f6ce487b23b9dbd1327b4ec33bb93934aa986efe8a9563d9349e6", size = 9731292 }, - { url = "https://files.pythonhosted.org/packages/85/a5/b7dc7eb69eda899fd07e71403b51b598a1f4df0f452d1da5844374082bcd/mypy-1.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4d5ddc13421ba3e2e082a6c2d74c2ddb3979c39b582dacd53dd5d9431237185", size = 12455450 }, - { url = "https://files.pythonhosted.org/packages/1c/1b/3e962a201d2f0f57c9fa1990e0dd6076f4f2f94954ab56e4a701ec3cc070/mypy-1.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:190da1ee69b427d7efa8aa0d5e5ccd67a4fb04038c380237a0d96829cb157913", size = 12530368 }, - { url = "https://files.pythonhosted.org/packages/72/1f/8b214b69d08cc5e4bd8c3769ac55a43318f3529362ea55e5957774b69924/mypy-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe28657de3bfec596bbeef01cb219833ad9d38dd5393fc649f4b366840baefe6", size = 9319112 }, - { url = "https://files.pythonhosted.org/packages/60/db/0ba2eaedca52bf5276275e8489951c26206030b3d31bf06f00875ae75d5d/mypy-1.9.0-py3-none-any.whl", hash = "sha256:a260627a570559181a9ea5de61ac6297aa5af202f06fd7ab093ce74e7181e43e", size = 2555887 }, + { url = "https://files.pythonhosted.org/packages/6e/96/40f0f605b1d4e2ad1fb11d21988ce3a3e205886c0fcbd35c9789a214de9a/mypy-1.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aceb1db093b04db5cd390821464504111b8ec3e351eb85afd1433490163d60cd", size = 10725390, upload-time = "2024-03-08T16:10:01.099Z" }, + { url = "https://files.pythonhosted.org/packages/d7/d2/072e40384b53051106b4fcf03537fb88e2a6ad0757d2ab7f6c8c2f188a69/mypy-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0235391f1c6f6ce487b23b9dbd1327b4ec33bb93934aa986efe8a9563d9349e6", size = 9731292, upload-time = "2024-03-08T16:08:48.463Z" }, + { url = "https://files.pythonhosted.org/packages/85/a5/b7dc7eb69eda899fd07e71403b51b598a1f4df0f452d1da5844374082bcd/mypy-1.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4d5ddc13421ba3e2e082a6c2d74c2ddb3979c39b582dacd53dd5d9431237185", size = 12455450, upload-time = "2024-03-08T16:08:57.375Z" }, + { url = "https://files.pythonhosted.org/packages/1c/1b/3e962a201d2f0f57c9fa1990e0dd6076f4f2f94954ab56e4a701ec3cc070/mypy-1.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:190da1ee69b427d7efa8aa0d5e5ccd67a4fb04038c380237a0d96829cb157913", size = 12530368, upload-time = "2024-03-08T16:09:17.061Z" }, + { url = "https://files.pythonhosted.org/packages/72/1f/8b214b69d08cc5e4bd8c3769ac55a43318f3529362ea55e5957774b69924/mypy-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe28657de3bfec596bbeef01cb219833ad9d38dd5393fc649f4b366840baefe6", size = 9319112, upload-time = "2024-03-08T16:09:07.961Z" }, + { url = "https://files.pythonhosted.org/packages/60/db/0ba2eaedca52bf5276275e8489951c26206030b3d31bf06f00875ae75d5d/mypy-1.9.0-py3-none-any.whl", hash = "sha256:a260627a570559181a9ea5de61ac6297aa5af202f06fd7ab093ce74e7181e43e", size = 2555887, upload-time = "2024-03-08T16:09:48.584Z" }, ] [[package]] name = "mypy-extensions" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] [[package]] name = "nest-asyncio" version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418 } +sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 }, + { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, ] [[package]] name = "networkx" version = "3.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065 } +sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406 }, + { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" }, ] [[package]] @@ -2018,37 +2018,37 @@ dependencies = [ { name = "regex" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691 } +sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691, upload-time = "2024-08-18T19:48:37.769Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442 }, + { url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442, upload-time = "2024-08-18T19:48:21.909Z" }, ] [[package]] name = "nodeenv" version = "1.9.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 }, + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, ] [[package]] name = "numpy" version = "2.3.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648 } +sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648, upload-time = "2025-09-09T16:54:12.543Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014 }, - { url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220 }, - { url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918 }, - { url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922 }, - { url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991 }, - { url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643 }, - { url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787 }, - { url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598 }, - { url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800 }, - { url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615 }, - { url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936 }, + { url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" }, + { url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" }, + { url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" }, + { url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" }, + { url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" }, + { url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" }, + { url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" }, + { url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" }, + { url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" }, + { url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" }, ] [[package]] @@ -2062,18 +2062,33 @@ dependencies = [ { name = "rsa" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a6/7b/17244b1083e8e604bf154cf9b716aecd6388acd656dd01893d0d244c94d9/oauth2client-4.1.3.tar.gz", hash = "sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6", size = 155910 } +sdist = { url = "https://files.pythonhosted.org/packages/a6/7b/17244b1083e8e604bf154cf9b716aecd6388acd656dd01893d0d244c94d9/oauth2client-4.1.3.tar.gz", hash = "sha256:d486741e451287f69568a4d26d70d9acd73a2bbfa275746c535b4209891cccc6", size = 155910, upload-time = "2018-09-07T21:38:18.036Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl", hash = "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", size = 98206 }, + { url = "https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl", hash = "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", size = 98206, upload-time = "2018-09-07T21:38:16.742Z" }, ] [[package]] name = "oauthlib" version = "3.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918 } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065 }, + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + +[[package]] +name = "office365-rest-python-client" +version = "2.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "msal" }, + { name = "pytz" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/04/6dce2d581c54a8e55a3b128cf79a93821a68a62bb9a956e65476c5bb247e/office365_rest_python_client-2.6.2.tar.gz", hash = "sha256:ce27f5a1c0cc3ff97041ccd9b386145692be4c64739f243f7d6ac3edbe0a3c46", size = 659460, upload-time = "2025-05-11T10:24:21.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/a4/611155711f8af347875c15b8b83f5fd9e978bd4de45f90085b9a583b684d/Office365_REST_Python_Client-2.6.2-py3-none-any.whl", hash = "sha256:06fc6829c39b503897caa9d881db419d7f97a8e4f1c95c4c2d12db36ea6c955d", size = 1337139, upload-time = "2025-05-11T10:24:18.926Z" }, ] [[package]] @@ -2105,9 +2120,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133 } +sdist = { url = "https://files.pythonhosted.org/packages/c6/a1/a303104dc55fc546a3f6914c842d3da471c64eec92043aef8f652eb6c524/openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869", size = 564133, upload-time = "2025-09-24T13:00:53.075Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627 }, + { url = "https://files.pythonhosted.org/packages/1d/2a/7dd3d207ec669cacc1f186fd856a0f61dbc255d24f6fdc1a6715d6051b0f/openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315", size = 948627, upload-time = "2025-09-24T13:00:50.754Z" }, ] [[package]] @@ -2117,22 +2132,22 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/c7/12632c03022aa5059ce9b6738397cda682dfda9d9afe7008b8a4f98c6ee5/oracledb-2.4.0.tar.gz", hash = "sha256:bdd61a9d5077448b5f1c58af6a14accc287bf8032846c351a3cdde5cf64fe95b", size = 614809 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/c7/12632c03022aa5059ce9b6738397cda682dfda9d9afe7008b8a4f98c6ee5/oracledb-2.4.0.tar.gz", hash = "sha256:bdd61a9d5077448b5f1c58af6a14accc287bf8032846c351a3cdde5cf64fe95b", size = 614809, upload-time = "2024-08-20T21:02:35.362Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/5b/5125e0a74a58717ac094d953ddaa4c61cfefcd926850c0ecc081e0c209f3/oracledb-2.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:03d1072db83e3f95a8792b8452c78940141902ef97f31223f1d96bfeb8ff830b", size = 3769983 }, - { url = "https://files.pythonhosted.org/packages/17/22/81eb81e15a86989acd21220480a87a3891a27b3f2d64b249098e09e002eb/oracledb-2.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fda77ace54379ad70187627ed02329f9ef4f35c1cc1052e4d27fe4ec68d38fc", size = 2081340 }, - { url = "https://files.pythonhosted.org/packages/6f/56/9cd84f67a573cc6066589d8264ab13f710a128197977205b9c4b177ee85e/oracledb-2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed34cdd5037277424bb5a38987e00cbb6eea3670ce9c4fcc3cab5971fab5348", size = 2234827 }, - { url = "https://files.pythonhosted.org/packages/f5/ca/4406cfe3400735bf4a1eee951eb174c6cd8573e74d43c1aba9448066a3d2/oracledb-2.4.0-cp312-cp312-win32.whl", hash = "sha256:02e1eea36de371d7719ca02d20a8900fab767e5db71aa59be101405060cf2cfa", size = 1373933 }, - { url = "https://files.pythonhosted.org/packages/a8/e9/1a8afdbe4aaba030476c91284d7599f54fce2879232d28797a4a71d5cfe2/oracledb-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:0b81ec1e20d4d20b0f95a673bb73923d24673e8739d3a25a746113519612c057", size = 1681666 }, + { url = "https://files.pythonhosted.org/packages/9e/5b/5125e0a74a58717ac094d953ddaa4c61cfefcd926850c0ecc081e0c209f3/oracledb-2.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:03d1072db83e3f95a8792b8452c78940141902ef97f31223f1d96bfeb8ff830b", size = 3769983, upload-time = "2024-08-20T21:03:08.186Z" }, + { url = "https://files.pythonhosted.org/packages/17/22/81eb81e15a86989acd21220480a87a3891a27b3f2d64b249098e09e002eb/oracledb-2.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fda77ace54379ad70187627ed02329f9ef4f35c1cc1052e4d27fe4ec68d38fc", size = 2081340, upload-time = "2024-08-20T21:03:10.988Z" }, + { url = "https://files.pythonhosted.org/packages/6f/56/9cd84f67a573cc6066589d8264ab13f710a128197977205b9c4b177ee85e/oracledb-2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bed34cdd5037277424bb5a38987e00cbb6eea3670ce9c4fcc3cab5971fab5348", size = 2234827, upload-time = "2024-08-20T21:03:13.716Z" }, + { url = "https://files.pythonhosted.org/packages/f5/ca/4406cfe3400735bf4a1eee951eb174c6cd8573e74d43c1aba9448066a3d2/oracledb-2.4.0-cp312-cp312-win32.whl", hash = "sha256:02e1eea36de371d7719ca02d20a8900fab767e5db71aa59be101405060cf2cfa", size = 1373933, upload-time = "2024-08-20T21:03:15.514Z" }, + { url = "https://files.pythonhosted.org/packages/a8/e9/1a8afdbe4aaba030476c91284d7599f54fce2879232d28797a4a71d5cfe2/oracledb-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:0b81ec1e20d4d20b0f95a673bb73923d24673e8739d3a25a746113519612c057", size = 1681666, upload-time = "2024-08-20T21:03:17.366Z" }, ] [[package]] name = "packaging" version = "25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] [[package]] @@ -2145,15 +2160,15 @@ dependencies = [ { name = "pytz" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, - { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, - { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, - { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, - { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, - { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, - { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, ] [[package]] @@ -2166,18 +2181,18 @@ dependencies = [ { name = "invoke" }, { name = "pynacl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/e7/81fdcbc7f190cdb058cffc9431587eb289833bdd633e2002455ca9bb13d4/paramiko-4.0.0.tar.gz", hash = "sha256:6a25f07b380cc9c9a88d2b920ad37167ac4667f8d9886ccebd8f90f654b5d69f", size = 1630743 } +sdist = { url = "https://files.pythonhosted.org/packages/1f/e7/81fdcbc7f190cdb058cffc9431587eb289833bdd633e2002455ca9bb13d4/paramiko-4.0.0.tar.gz", hash = "sha256:6a25f07b380cc9c9a88d2b920ad37167ac4667f8d9886ccebd8f90f654b5d69f", size = 1630743, upload-time = "2025-08-04T01:02:03.711Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl", hash = "sha256:0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9", size = 223932 }, + { url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl", hash = "sha256:0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9", size = 223932, upload-time = "2025-08-04T01:02:02.029Z" }, ] [[package]] name = "pathspec" version = "0.12.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, ] [[package]] @@ -2188,9 +2203,9 @@ dependencies = [ { name = "charset-normalizer" }, { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/46/5223d613ac4963e1f7c07b2660fe0e9e770102ec6bda8c038400113fb215/pdfminer_six-20250506.tar.gz", hash = "sha256:b03cc8df09cf3c7aba8246deae52e0bca7ebb112a38895b5e1d4f5dd2b8ca2e7", size = 7387678 } +sdist = { url = "https://files.pythonhosted.org/packages/78/46/5223d613ac4963e1f7c07b2660fe0e9e770102ec6bda8c038400113fb215/pdfminer_six-20250506.tar.gz", hash = "sha256:b03cc8df09cf3c7aba8246deae52e0bca7ebb112a38895b5e1d4f5dd2b8ca2e7", size = 7387678, upload-time = "2025-05-06T16:17:00.787Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/16/7a432c0101fa87457e75cb12c879e1749c5870a786525e2e0f42871d6462/pdfminer_six-20250506-py3-none-any.whl", hash = "sha256:d81ad173f62e5f841b53a8ba63af1a4a355933cfc0ffabd608e568b9193909e3", size = 5620187 }, + { url = "https://files.pythonhosted.org/packages/73/16/7a432c0101fa87457e75cb12c879e1749c5870a786525e2e0f42871d6462/pdfminer_six-20250506-py3-none-any.whl", hash = "sha256:d81ad173f62e5f841b53a8ba63af1a4a355933cfc0ffabd608e568b9193909e3", size = 5620187, upload-time = "2025-05-06T16:16:58.669Z" }, ] [[package]] @@ -2202,9 +2217,9 @@ dependencies = [ { name = "pillow" }, { name = "pypdfium2" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/0d/4135821aa7b1a0b77a29fac881ef0890b46b0b002290d04915ed7acc0043/pdfplumber-0.11.7.tar.gz", hash = "sha256:fa67773e5e599de1624255e9b75d1409297c5e1d7493b386ce63648637c67368", size = 115518 } +sdist = { url = "https://files.pythonhosted.org/packages/6d/0d/4135821aa7b1a0b77a29fac881ef0890b46b0b002290d04915ed7acc0043/pdfplumber-0.11.7.tar.gz", hash = "sha256:fa67773e5e599de1624255e9b75d1409297c5e1d7493b386ce63648637c67368", size = 115518, upload-time = "2025-06-12T11:30:49.864Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/e0/52b67d4f00e09e497aec4f71bc44d395605e8ebcea52543242ed34c25ef9/pdfplumber-0.11.7-py3-none-any.whl", hash = "sha256:edd2195cca68bd770da479cf528a737e362968ec2351e62a6c0b71ff612ac25e", size = 60029 }, + { url = "https://files.pythonhosted.org/packages/db/e0/52b67d4f00e09e497aec4f71bc44d395605e8ebcea52543242ed34c25ef9/pdfplumber-0.11.7-py3-none-any.whl", hash = "sha256:edd2195cca68bd770da479cf528a737e362968ec2351e62a6c0b71ff612ac25e", size = 60029, upload-time = "2025-06-12T11:30:48.89Z" }, ] [[package]] @@ -2214,28 +2229,28 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/44/43/9a0fb552ab4fd980680c2037962e331820f67585df740bedc4a2b50faf20/pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003", size = 30646 } +sdist = { url = "https://files.pythonhosted.org/packages/44/43/9a0fb552ab4fd980680c2037962e331820f67585df740bedc4a2b50faf20/pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003", size = 30646, upload-time = "2025-04-26T18:56:37.151Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/21/b5735d5982892c878ff3d01bb06e018c43fc204428361ee9fc25a1b2125c/pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362", size = 27086 }, + { url = "https://files.pythonhosted.org/packages/bf/21/b5735d5982892c878ff3d01bb06e018c43fc204428361ee9fc25a1b2125c/pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362", size = 27086, upload-time = "2025-04-26T18:56:35.956Z" }, ] [[package]] name = "pillow" version = "11.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069 } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload-time = "2025-07-01T09:16:30.666Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800 }, - { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296 }, - { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726 }, - { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652 }, - { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787 }, - { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236 }, - { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950 }, - { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358 }, - { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079 }, - { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324 }, - { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067 }, + { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload-time = "2025-07-01T09:14:17.648Z" }, + { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload-time = "2025-07-01T09:14:19.828Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload-time = "2025-07-03T13:10:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload-time = "2025-07-03T13:10:10.391Z" }, + { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload-time = "2025-07-01T09:14:21.63Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload-time = "2025-07-01T09:14:23.321Z" }, + { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload-time = "2025-07-01T09:14:25.237Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358, upload-time = "2025-07-01T09:14:27.053Z" }, + { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079, upload-time = "2025-07-01T09:14:30.104Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324, upload-time = "2025-07-01T09:14:31.899Z" }, + { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067, upload-time = "2025-07-01T09:14:33.709Z" }, ] [[package]] @@ -2249,45 +2264,45 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bc/9d/07a7f2136ce04cabd21d69c057dc2915867082b0047e6873e424388d4475/pinecone-7.0.1.tar.gz", hash = "sha256:49ff7b0f5be4a2ddec5aaa709758a9f2df56baa58ad46507d081409e246a81ec", size = 207930 } +sdist = { url = "https://files.pythonhosted.org/packages/bc/9d/07a7f2136ce04cabd21d69c057dc2915867082b0047e6873e424388d4475/pinecone-7.0.1.tar.gz", hash = "sha256:49ff7b0f5be4a2ddec5aaa709758a9f2df56baa58ad46507d081409e246a81ec", size = 207930, upload-time = "2025-05-21T19:39:01.218Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/88/896221e991077d353e61991b759f46d75f3b4298eb5a4aa6534c1371f4b0/pinecone-7.0.1-py3-none-any.whl", hash = "sha256:ce7b0dab3c9f7d81e75b24c13fcbca4a51371e08021faaecaf0cd9a45ca1be6c", size = 516590 }, + { url = "https://files.pythonhosted.org/packages/81/88/896221e991077d353e61991b759f46d75f3b4298eb5a4aa6534c1371f4b0/pinecone-7.0.1-py3-none-any.whl", hash = "sha256:ce7b0dab3c9f7d81e75b24c13fcbca4a51371e08021faaecaf0cd9a45ca1be6c", size = 516590, upload-time = "2025-05-21T19:38:59.117Z" }, ] [[package]] name = "pinecone-plugin-interface" version = "0.0.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370, upload-time = "2024-06-05T01:57:52.093Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249 }, + { url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249, upload-time = "2024-06-05T01:57:50.583Z" }, ] [[package]] name = "platformdirs" version = "4.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634 } +sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654 }, + { url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" }, ] [[package]] name = "pluggy" version = "1.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] [[package]] name = "ply" version = "3.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130 } +sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload-time = "2018-02-15T19:01:31.097Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567 }, + { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, ] [[package]] @@ -2297,9 +2312,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pywin32", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644 } +sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644, upload-time = "2025-06-14T13:20:40.03Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424 }, + { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424, upload-time = "2025-06-14T13:20:38.083Z" }, ] [[package]] @@ -2313,18 +2328,18 @@ dependencies = [ { name = "pyyaml" }, { name = "virtualenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/bd/8a672a86e68f542c3f2ae17a9a8fa63babf16d1107be2f5290e5aa4369ba/pre_commit-3.6.2.tar.gz", hash = "sha256:c3ef34f463045c88658c5b99f38c1e297abdcc0ff13f98d3370055fbbfabc67e", size = 177293 } +sdist = { url = "https://files.pythonhosted.org/packages/04/bd/8a672a86e68f542c3f2ae17a9a8fa63babf16d1107be2f5290e5aa4369ba/pre_commit-3.6.2.tar.gz", hash = "sha256:c3ef34f463045c88658c5b99f38c1e297abdcc0ff13f98d3370055fbbfabc67e", size = 177293, upload-time = "2024-02-18T18:19:41.431Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/7c/f7a50d07ae9fa86d2149d4acb2daf61e7c0257b56bc1a24a7fb09c1b70df/pre_commit-3.6.2-py2.py3-none-any.whl", hash = "sha256:ba637c2d7a670c10daedc059f5c49b5bd0aadbccfcd7ec15592cf9665117532c", size = 204185 }, + { url = "https://files.pythonhosted.org/packages/f8/7c/f7a50d07ae9fa86d2149d4acb2daf61e7c0257b56bc1a24a7fb09c1b70df/pre_commit-3.6.2-py2.py3-none-any.whl", hash = "sha256:ba637c2d7a670c10daedc059f5c49b5bd0aadbccfcd7ec15592cf9665117532c", size = 204185, upload-time = "2024-02-18T18:19:38.953Z" }, ] [[package]] name = "prometheus-client" version = "0.23.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/23/53/3edb5d68ecf6b38fcbcc1ad28391117d2a322d9a1a3eff04bfdb184d8c3b/prometheus_client-0.23.1.tar.gz", hash = "sha256:6ae8f9081eaaaf153a2e959d2e6c4f4fb57b12ef76c8c7980202f1e57b48b2ce", size = 80481 } +sdist = { url = "https://files.pythonhosted.org/packages/23/53/3edb5d68ecf6b38fcbcc1ad28391117d2a322d9a1a3eff04bfdb184d8c3b/prometheus_client-0.23.1.tar.gz", hash = "sha256:6ae8f9081eaaaf153a2e959d2e6c4f4fb57b12ef76c8c7980202f1e57b48b2ce", size = 80481, upload-time = "2025-09-18T20:47:25.043Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/db/14bafcb4af2139e046d03fd00dea7873e48eafe18b7d2797e73d6681f210/prometheus_client-0.23.1-py3-none-any.whl", hash = "sha256:dd1913e6e76b59cfe44e7a4b83e01afc9873c1bdfd2ed8739f1e76aeca115f99", size = 61145 }, + { url = "https://files.pythonhosted.org/packages/b8/db/14bafcb4af2139e046d03fd00dea7873e48eafe18b7d2797e73d6681f210/prometheus_client-0.23.1-py3-none-any.whl", hash = "sha256:dd1913e6e76b59cfe44e7a4b83e01afc9873c1bdfd2ed8739f1e76aeca115f99", size = 61145, upload-time = "2025-09-18T20:47:23.875Z" }, ] [[package]] @@ -2334,34 +2349,34 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wcwidth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431 }, + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, ] [[package]] name = "propcache" version = "0.3.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/16/43264e4a779dd8588c21a70f0709665ee8f611211bdd2c87d952cfa7c776/propcache-0.3.2.tar.gz", hash = "sha256:20d7d62e4e7ef05f221e0db2856b979540686342e7dd9973b815599c7057e168", size = 44139 } +sdist = { url = "https://files.pythonhosted.org/packages/a6/16/43264e4a779dd8588c21a70f0709665ee8f611211bdd2c87d952cfa7c776/propcache-0.3.2.tar.gz", hash = "sha256:20d7d62e4e7ef05f221e0db2856b979540686342e7dd9973b815599c7057e168", size = 44139, upload-time = "2025-06-09T22:56:06.081Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/42/9ca01b0a6f48e81615dca4765a8f1dd2c057e0540f6116a27dc5ee01dfb6/propcache-0.3.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8de106b6c84506b31c27168582cd3cb3000a6412c16df14a8628e5871ff83c10", size = 73674 }, - { url = "https://files.pythonhosted.org/packages/af/6e/21293133beb550f9c901bbece755d582bfaf2176bee4774000bd4dd41884/propcache-0.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:28710b0d3975117239c76600ea351934ac7b5ff56e60953474342608dbbb6154", size = 43570 }, - { url = "https://files.pythonhosted.org/packages/0c/c8/0393a0a3a2b8760eb3bde3c147f62b20044f0ddac81e9d6ed7318ec0d852/propcache-0.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce26862344bdf836650ed2487c3d724b00fbfec4233a1013f597b78c1cb73615", size = 43094 }, - { url = "https://files.pythonhosted.org/packages/37/2c/489afe311a690399d04a3e03b069225670c1d489eb7b044a566511c1c498/propcache-0.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bca54bd347a253af2cf4544bbec232ab982f4868de0dd684246b67a51bc6b1db", size = 226958 }, - { url = "https://files.pythonhosted.org/packages/9d/ca/63b520d2f3d418c968bf596839ae26cf7f87bead026b6192d4da6a08c467/propcache-0.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55780d5e9a2ddc59711d727226bb1ba83a22dd32f64ee15594b9392b1f544eb1", size = 234894 }, - { url = "https://files.pythonhosted.org/packages/11/60/1d0ed6fff455a028d678df30cc28dcee7af77fa2b0e6962ce1df95c9a2a9/propcache-0.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:035e631be25d6975ed87ab23153db6a73426a48db688070d925aa27e996fe93c", size = 233672 }, - { url = "https://files.pythonhosted.org/packages/37/7c/54fd5301ef38505ab235d98827207176a5c9b2aa61939b10a460ca53e123/propcache-0.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee6f22b6eaa39297c751d0e80c0d3a454f112f5c6481214fcf4c092074cecd67", size = 224395 }, - { url = "https://files.pythonhosted.org/packages/ee/1a/89a40e0846f5de05fdc6779883bf46ba980e6df4d2ff8fb02643de126592/propcache-0.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca3aee1aa955438c4dba34fc20a9f390e4c79967257d830f137bd5a8a32ed3b", size = 212510 }, - { url = "https://files.pythonhosted.org/packages/5e/33/ca98368586c9566a6b8d5ef66e30484f8da84c0aac3f2d9aec6d31a11bd5/propcache-0.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4f30862869fa2b68380d677cc1c5fcf1e0f2b9ea0cf665812895c75d0ca3b8", size = 222949 }, - { url = "https://files.pythonhosted.org/packages/ba/11/ace870d0aafe443b33b2f0b7efdb872b7c3abd505bfb4890716ad7865e9d/propcache-0.3.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b77ec3c257d7816d9f3700013639db7491a434644c906a2578a11daf13176251", size = 217258 }, - { url = "https://files.pythonhosted.org/packages/5b/d2/86fd6f7adffcfc74b42c10a6b7db721d1d9ca1055c45d39a1a8f2a740a21/propcache-0.3.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cab90ac9d3f14b2d5050928483d3d3b8fb6b4018893fc75710e6aa361ecb2474", size = 213036 }, - { url = "https://files.pythonhosted.org/packages/07/94/2d7d1e328f45ff34a0a284cf5a2847013701e24c2a53117e7c280a4316b3/propcache-0.3.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0b504d29f3c47cf6b9e936c1852246c83d450e8e063d50562115a6be6d3a2535", size = 227684 }, - { url = "https://files.pythonhosted.org/packages/b7/05/37ae63a0087677e90b1d14710e532ff104d44bc1efa3b3970fff99b891dc/propcache-0.3.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:ce2ac2675a6aa41ddb2a0c9cbff53780a617ac3d43e620f8fd77ba1c84dcfc06", size = 234562 }, - { url = "https://files.pythonhosted.org/packages/a4/7c/3f539fcae630408d0bd8bf3208b9a647ccad10976eda62402a80adf8fc34/propcache-0.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b4239611205294cc433845b914131b2a1f03500ff3c1ed093ed216b82621e1", size = 222142 }, - { url = "https://files.pythonhosted.org/packages/7c/d2/34b9eac8c35f79f8a962546b3e97e9d4b990c420ee66ac8255d5d9611648/propcache-0.3.2-cp312-cp312-win32.whl", hash = "sha256:df4a81b9b53449ebc90cc4deefb052c1dd934ba85012aa912c7ea7b7e38b60c1", size = 37711 }, - { url = "https://files.pythonhosted.org/packages/19/61/d582be5d226cf79071681d1b46b848d6cb03d7b70af7063e33a2787eaa03/propcache-0.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7046e79b989d7fe457bb755844019e10f693752d169076138abf17f31380800c", size = 41479 }, - { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663 }, + { url = "https://files.pythonhosted.org/packages/a8/42/9ca01b0a6f48e81615dca4765a8f1dd2c057e0540f6116a27dc5ee01dfb6/propcache-0.3.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8de106b6c84506b31c27168582cd3cb3000a6412c16df14a8628e5871ff83c10", size = 73674, upload-time = "2025-06-09T22:54:30.551Z" }, + { url = "https://files.pythonhosted.org/packages/af/6e/21293133beb550f9c901bbece755d582bfaf2176bee4774000bd4dd41884/propcache-0.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:28710b0d3975117239c76600ea351934ac7b5ff56e60953474342608dbbb6154", size = 43570, upload-time = "2025-06-09T22:54:32.296Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c8/0393a0a3a2b8760eb3bde3c147f62b20044f0ddac81e9d6ed7318ec0d852/propcache-0.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce26862344bdf836650ed2487c3d724b00fbfec4233a1013f597b78c1cb73615", size = 43094, upload-time = "2025-06-09T22:54:33.929Z" }, + { url = "https://files.pythonhosted.org/packages/37/2c/489afe311a690399d04a3e03b069225670c1d489eb7b044a566511c1c498/propcache-0.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bca54bd347a253af2cf4544bbec232ab982f4868de0dd684246b67a51bc6b1db", size = 226958, upload-time = "2025-06-09T22:54:35.186Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ca/63b520d2f3d418c968bf596839ae26cf7f87bead026b6192d4da6a08c467/propcache-0.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55780d5e9a2ddc59711d727226bb1ba83a22dd32f64ee15594b9392b1f544eb1", size = 234894, upload-time = "2025-06-09T22:54:36.708Z" }, + { url = "https://files.pythonhosted.org/packages/11/60/1d0ed6fff455a028d678df30cc28dcee7af77fa2b0e6962ce1df95c9a2a9/propcache-0.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:035e631be25d6975ed87ab23153db6a73426a48db688070d925aa27e996fe93c", size = 233672, upload-time = "2025-06-09T22:54:38.062Z" }, + { url = "https://files.pythonhosted.org/packages/37/7c/54fd5301ef38505ab235d98827207176a5c9b2aa61939b10a460ca53e123/propcache-0.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee6f22b6eaa39297c751d0e80c0d3a454f112f5c6481214fcf4c092074cecd67", size = 224395, upload-time = "2025-06-09T22:54:39.634Z" }, + { url = "https://files.pythonhosted.org/packages/ee/1a/89a40e0846f5de05fdc6779883bf46ba980e6df4d2ff8fb02643de126592/propcache-0.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca3aee1aa955438c4dba34fc20a9f390e4c79967257d830f137bd5a8a32ed3b", size = 212510, upload-time = "2025-06-09T22:54:41.565Z" }, + { url = "https://files.pythonhosted.org/packages/5e/33/ca98368586c9566a6b8d5ef66e30484f8da84c0aac3f2d9aec6d31a11bd5/propcache-0.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4f30862869fa2b68380d677cc1c5fcf1e0f2b9ea0cf665812895c75d0ca3b8", size = 222949, upload-time = "2025-06-09T22:54:43.038Z" }, + { url = "https://files.pythonhosted.org/packages/ba/11/ace870d0aafe443b33b2f0b7efdb872b7c3abd505bfb4890716ad7865e9d/propcache-0.3.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b77ec3c257d7816d9f3700013639db7491a434644c906a2578a11daf13176251", size = 217258, upload-time = "2025-06-09T22:54:44.376Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d2/86fd6f7adffcfc74b42c10a6b7db721d1d9ca1055c45d39a1a8f2a740a21/propcache-0.3.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cab90ac9d3f14b2d5050928483d3d3b8fb6b4018893fc75710e6aa361ecb2474", size = 213036, upload-time = "2025-06-09T22:54:46.243Z" }, + { url = "https://files.pythonhosted.org/packages/07/94/2d7d1e328f45ff34a0a284cf5a2847013701e24c2a53117e7c280a4316b3/propcache-0.3.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0b504d29f3c47cf6b9e936c1852246c83d450e8e063d50562115a6be6d3a2535", size = 227684, upload-time = "2025-06-09T22:54:47.63Z" }, + { url = "https://files.pythonhosted.org/packages/b7/05/37ae63a0087677e90b1d14710e532ff104d44bc1efa3b3970fff99b891dc/propcache-0.3.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:ce2ac2675a6aa41ddb2a0c9cbff53780a617ac3d43e620f8fd77ba1c84dcfc06", size = 234562, upload-time = "2025-06-09T22:54:48.982Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7c/3f539fcae630408d0bd8bf3208b9a647ccad10976eda62402a80adf8fc34/propcache-0.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b4239611205294cc433845b914131b2a1f03500ff3c1ed093ed216b82621e1", size = 222142, upload-time = "2025-06-09T22:54:50.424Z" }, + { url = "https://files.pythonhosted.org/packages/7c/d2/34b9eac8c35f79f8a962546b3e97e9d4b990c420ee66ac8255d5d9611648/propcache-0.3.2-cp312-cp312-win32.whl", hash = "sha256:df4a81b9b53449ebc90cc4deefb052c1dd934ba85012aa912c7ea7b7e38b60c1", size = 37711, upload-time = "2025-06-09T22:54:52.072Z" }, + { url = "https://files.pythonhosted.org/packages/19/61/d582be5d226cf79071681d1b46b848d6cb03d7b70af7063e33a2787eaa03/propcache-0.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7046e79b989d7fe457bb755844019e10f693752d169076138abf17f31380800c", size = 41479, upload-time = "2025-06-09T22:54:53.234Z" }, + { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" }, ] [[package]] @@ -2371,81 +2386,81 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142, upload-time = "2025-03-10T15:54:38.843Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163 }, + { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163, upload-time = "2025-03-10T15:54:37.335Z" }, ] [[package]] name = "protobuf" version = "4.25.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/01/34c8d2b6354906d728703cb9d546a0e534de479e25f1b581e4094c4a85cc/protobuf-4.25.8.tar.gz", hash = "sha256:6135cf8affe1fc6f76cced2641e4ea8d3e59518d1f24ae41ba97bcad82d397cd", size = 380920 } +sdist = { url = "https://files.pythonhosted.org/packages/df/01/34c8d2b6354906d728703cb9d546a0e534de479e25f1b581e4094c4a85cc/protobuf-4.25.8.tar.gz", hash = "sha256:6135cf8affe1fc6f76cced2641e4ea8d3e59518d1f24ae41ba97bcad82d397cd", size = 380920, upload-time = "2025-05-28T14:22:25.153Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/ff/05f34305fe6b85bbfbecbc559d423a5985605cad5eda4f47eae9e9c9c5c5/protobuf-4.25.8-cp310-abi3-win32.whl", hash = "sha256:504435d831565f7cfac9f0714440028907f1975e4bed228e58e72ecfff58a1e0", size = 392745 }, - { url = "https://files.pythonhosted.org/packages/08/35/8b8a8405c564caf4ba835b1fdf554da869954712b26d8f2a98c0e434469b/protobuf-4.25.8-cp310-abi3-win_amd64.whl", hash = "sha256:bd551eb1fe1d7e92c1af1d75bdfa572eff1ab0e5bf1736716814cdccdb2360f9", size = 413736 }, - { url = "https://files.pythonhosted.org/packages/28/d7/ab27049a035b258dab43445eb6ec84a26277b16105b277cbe0a7698bdc6c/protobuf-4.25.8-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:ca809b42f4444f144f2115c4c1a747b9a404d590f18f37e9402422033e464e0f", size = 394537 }, - { url = "https://files.pythonhosted.org/packages/bd/6d/a4a198b61808dd3d1ee187082ccc21499bc949d639feb948961b48be9a7e/protobuf-4.25.8-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:9ad7ef62d92baf5a8654fbb88dac7fa5594cfa70fd3440488a5ca3bfc6d795a7", size = 294005 }, - { url = "https://files.pythonhosted.org/packages/d6/c6/c9deaa6e789b6fc41b88ccbdfe7a42d2b82663248b715f55aa77fbc00724/protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:83e6e54e93d2b696a92cad6e6efc924f3850f82b52e1563778dfab8b355101b0", size = 294924 }, - { url = "https://files.pythonhosted.org/packages/0c/c1/6aece0ab5209981a70cd186f164c133fdba2f51e124ff92b73de7fd24d78/protobuf-4.25.8-py3-none-any.whl", hash = "sha256:15a0af558aa3b13efef102ae6e4f3efac06f1eea11afb3a57db2901447d9fb59", size = 156757 }, + { url = "https://files.pythonhosted.org/packages/45/ff/05f34305fe6b85bbfbecbc559d423a5985605cad5eda4f47eae9e9c9c5c5/protobuf-4.25.8-cp310-abi3-win32.whl", hash = "sha256:504435d831565f7cfac9f0714440028907f1975e4bed228e58e72ecfff58a1e0", size = 392745, upload-time = "2025-05-28T14:22:10.524Z" }, + { url = "https://files.pythonhosted.org/packages/08/35/8b8a8405c564caf4ba835b1fdf554da869954712b26d8f2a98c0e434469b/protobuf-4.25.8-cp310-abi3-win_amd64.whl", hash = "sha256:bd551eb1fe1d7e92c1af1d75bdfa572eff1ab0e5bf1736716814cdccdb2360f9", size = 413736, upload-time = "2025-05-28T14:22:13.156Z" }, + { url = "https://files.pythonhosted.org/packages/28/d7/ab27049a035b258dab43445eb6ec84a26277b16105b277cbe0a7698bdc6c/protobuf-4.25.8-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:ca809b42f4444f144f2115c4c1a747b9a404d590f18f37e9402422033e464e0f", size = 394537, upload-time = "2025-05-28T14:22:14.768Z" }, + { url = "https://files.pythonhosted.org/packages/bd/6d/a4a198b61808dd3d1ee187082ccc21499bc949d639feb948961b48be9a7e/protobuf-4.25.8-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:9ad7ef62d92baf5a8654fbb88dac7fa5594cfa70fd3440488a5ca3bfc6d795a7", size = 294005, upload-time = "2025-05-28T14:22:16.052Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c6/c9deaa6e789b6fc41b88ccbdfe7a42d2b82663248b715f55aa77fbc00724/protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:83e6e54e93d2b696a92cad6e6efc924f3850f82b52e1563778dfab8b355101b0", size = 294924, upload-time = "2025-05-28T14:22:17.105Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c1/6aece0ab5209981a70cd186f164c133fdba2f51e124ff92b73de7fd24d78/protobuf-4.25.8-py3-none-any.whl", hash = "sha256:15a0af558aa3b13efef102ae6e4f3efac06f1eea11afb3a57db2901447d9fb59", size = 156757, upload-time = "2025-05-28T14:22:24.135Z" }, ] [[package]] name = "psutil" version = "5.9.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/90/c7/6dc0a455d111f68ee43f27793971cf03fe29b6ef972042549db29eec39a2/psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c", size = 503247 } +sdist = { url = "https://files.pythonhosted.org/packages/90/c7/6dc0a455d111f68ee43f27793971cf03fe29b6ef972042549db29eec39a2/psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c", size = 503247, upload-time = "2024-01-19T20:47:09.517Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/e3/07ae864a636d70a8a6f58da27cb1179192f1140d5d1da10886ade9405797/psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81", size = 248702 }, - { url = "https://files.pythonhosted.org/packages/b3/bd/28c5f553667116b2598b9cc55908ec435cb7f77a34f2bff3e3ca765b0f78/psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421", size = 285242 }, - { url = "https://files.pythonhosted.org/packages/c5/4f/0e22aaa246f96d6ac87fe5ebb9c5a693fbe8877f537a1022527c47ca43c5/psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4", size = 288191 }, - { url = "https://files.pythonhosted.org/packages/6e/f5/2aa3a4acdc1e5940b59d421742356f133185667dd190b166dbcfcf5d7b43/psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0", size = 251252 }, - { url = "https://files.pythonhosted.org/packages/93/52/3e39d26feae7df0aa0fd510b14012c3678b36ed068f7d78b8d8784d61f0e/psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf", size = 255090 }, - { url = "https://files.pythonhosted.org/packages/05/33/2d74d588408caedd065c2497bdb5ef83ce6082db01289a1e1147f6639802/psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8", size = 249898 }, + { url = "https://files.pythonhosted.org/packages/e7/e3/07ae864a636d70a8a6f58da27cb1179192f1140d5d1da10886ade9405797/psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81", size = 248702, upload-time = "2024-01-19T20:47:36.303Z" }, + { url = "https://files.pythonhosted.org/packages/b3/bd/28c5f553667116b2598b9cc55908ec435cb7f77a34f2bff3e3ca765b0f78/psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421", size = 285242, upload-time = "2024-01-19T20:47:39.65Z" }, + { url = "https://files.pythonhosted.org/packages/c5/4f/0e22aaa246f96d6ac87fe5ebb9c5a693fbe8877f537a1022527c47ca43c5/psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4", size = 288191, upload-time = "2024-01-19T20:47:43.078Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f5/2aa3a4acdc1e5940b59d421742356f133185667dd190b166dbcfcf5d7b43/psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0", size = 251252, upload-time = "2024-01-19T20:47:52.88Z" }, + { url = "https://files.pythonhosted.org/packages/93/52/3e39d26feae7df0aa0fd510b14012c3678b36ed068f7d78b8d8784d61f0e/psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf", size = 255090, upload-time = "2024-01-19T20:47:56.019Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/2d74d588408caedd065c2497bdb5ef83ce6082db01289a1e1147f6639802/psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8", size = 249898, upload-time = "2024-01-19T20:47:59.238Z" }, ] [[package]] name = "psycopg2-binary" version = "2.9.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/07/e720e53bfab016ebcc34241695ccc06a9e3d91ba19b40ca81317afbdc440/psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c", size = 384973 } +sdist = { url = "https://files.pythonhosted.org/packages/fc/07/e720e53bfab016ebcc34241695ccc06a9e3d91ba19b40ca81317afbdc440/psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c", size = 384973, upload-time = "2023-10-03T12:48:55.128Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/d0/5f2db14e7b53552276ab613399a83f83f85b173a862d3f20580bc7231139/psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf", size = 2823784 }, - { url = "https://files.pythonhosted.org/packages/18/ca/da384fd47233e300e3e485c90e7aab5d7def896d1281239f75901faf87d4/psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d", size = 2553308 }, - { url = "https://files.pythonhosted.org/packages/50/66/fa53d2d3d92f6e1ef469d92afc6a4fe3f6e8a9a04b687aa28fb1f1d954ee/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212", size = 2851283 }, - { url = "https://files.pythonhosted.org/packages/04/37/2429360ac5547378202db14eec0dde76edbe1f6627df5a43c7e164922859/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493", size = 3081839 }, - { url = "https://files.pythonhosted.org/packages/62/2a/c0530b59d7e0d09824bc2102ecdcec0456b8ca4d47c0caa82e86fce3ed4c/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996", size = 3264488 }, - { url = "https://files.pythonhosted.org/packages/19/57/9f172b900795ea37246c78b5f52e00f4779984370855b3e161600156906d/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119", size = 3020700 }, - { url = "https://files.pythonhosted.org/packages/94/68/1176fc14ea76861b7b8360be5176e87fb20d5091b137c76570eb4e237324/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba", size = 2355968 }, - { url = "https://files.pythonhosted.org/packages/70/bb/aec2646a705a09079d008ce88073401cd61fc9b04f92af3eb282caa3a2ec/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07", size = 2536101 }, - { url = "https://files.pythonhosted.org/packages/14/33/12818c157e333cb9d9e6753d1b2463b6f60dbc1fade115f8e4dc5c52cac4/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb", size = 2487064 }, - { url = "https://files.pythonhosted.org/packages/56/a2/7851c68fe8768f3c9c246198b6356ee3e4a8a7f6820cc798443faada3400/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe", size = 2456257 }, - { url = "https://files.pythonhosted.org/packages/6f/ee/3ba07c6dc7c3294e717e94720da1597aedc82a10b1b180203ce183d4631a/psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93", size = 1024709 }, - { url = "https://files.pythonhosted.org/packages/7b/08/9c66c269b0d417a0af9fb969535f0371b8c538633535a7a6a5ca3f9231e2/psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab", size = 1163864 }, + { url = "https://files.pythonhosted.org/packages/a7/d0/5f2db14e7b53552276ab613399a83f83f85b173a862d3f20580bc7231139/psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf", size = 2823784, upload-time = "2023-10-03T12:47:00.404Z" }, + { url = "https://files.pythonhosted.org/packages/18/ca/da384fd47233e300e3e485c90e7aab5d7def896d1281239f75901faf87d4/psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d", size = 2553308, upload-time = "2023-11-01T10:40:33.984Z" }, + { url = "https://files.pythonhosted.org/packages/50/66/fa53d2d3d92f6e1ef469d92afc6a4fe3f6e8a9a04b687aa28fb1f1d954ee/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212", size = 2851283, upload-time = "2023-10-03T12:47:02.736Z" }, + { url = "https://files.pythonhosted.org/packages/04/37/2429360ac5547378202db14eec0dde76edbe1f6627df5a43c7e164922859/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493", size = 3081839, upload-time = "2023-10-03T12:47:05.027Z" }, + { url = "https://files.pythonhosted.org/packages/62/2a/c0530b59d7e0d09824bc2102ecdcec0456b8ca4d47c0caa82e86fce3ed4c/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996", size = 3264488, upload-time = "2023-10-03T12:47:08.962Z" }, + { url = "https://files.pythonhosted.org/packages/19/57/9f172b900795ea37246c78b5f52e00f4779984370855b3e161600156906d/psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119", size = 3020700, upload-time = "2023-10-03T12:47:12.23Z" }, + { url = "https://files.pythonhosted.org/packages/94/68/1176fc14ea76861b7b8360be5176e87fb20d5091b137c76570eb4e237324/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba", size = 2355968, upload-time = "2023-10-03T12:47:14.817Z" }, + { url = "https://files.pythonhosted.org/packages/70/bb/aec2646a705a09079d008ce88073401cd61fc9b04f92af3eb282caa3a2ec/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07", size = 2536101, upload-time = "2023-10-03T12:47:17.454Z" }, + { url = "https://files.pythonhosted.org/packages/14/33/12818c157e333cb9d9e6753d1b2463b6f60dbc1fade115f8e4dc5c52cac4/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb", size = 2487064, upload-time = "2023-10-03T12:47:20.717Z" }, + { url = "https://files.pythonhosted.org/packages/56/a2/7851c68fe8768f3c9c246198b6356ee3e4a8a7f6820cc798443faada3400/psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe", size = 2456257, upload-time = "2023-10-03T12:47:23.004Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ee/3ba07c6dc7c3294e717e94720da1597aedc82a10b1b180203ce183d4631a/psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93", size = 1024709, upload-time = "2023-10-28T09:37:24.991Z" }, + { url = "https://files.pythonhosted.org/packages/7b/08/9c66c269b0d417a0af9fb969535f0371b8c538633535a7a6a5ca3f9231e2/psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab", size = 1163864, upload-time = "2023-10-28T09:37:28.155Z" }, ] [[package]] name = "pyarrow" version = "18.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } +sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671, upload-time = "2024-11-26T02:01:48.62Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620 }, - { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494 }, - { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624 }, - { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341 }, - { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629 }, - { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661 }, - { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330 }, + { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620, upload-time = "2024-11-26T01:59:39.797Z" }, + { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494, upload-time = "2024-11-26T01:59:44.725Z" }, + { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624, upload-time = "2024-11-26T01:59:49.189Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341, upload-time = "2024-11-26T01:59:54.849Z" }, + { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629, upload-time = "2024-11-26T01:59:59.966Z" }, + { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661, upload-time = "2024-11-26T02:00:04.55Z" }, + { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330, upload-time = "2024-11-26T02:00:09.576Z" }, ] [[package]] name = "pyasn1" version = "0.6.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322 } +sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135 }, + { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" }, ] [[package]] @@ -2455,9 +2470,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyasn1" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892 } +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, ] [[package]] @@ -2480,9 +2495,9 @@ wheels = [ name = "pycparser" version = "2.23" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140 }, + { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, ] [[package]] @@ -2495,9 +2510,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ff/5d/09a551ba512d7ca404d785072700d3f6727a02f6f3c24ecfd081c7cf0aa8/pydantic-2.11.9.tar.gz", hash = "sha256:6b8ffda597a14812a7975c90b82a8a2e777d9257aba3453f973acd3c032a18e2", size = 788495 } +sdist = { url = "https://files.pythonhosted.org/packages/ff/5d/09a551ba512d7ca404d785072700d3f6727a02f6f3c24ecfd081c7cf0aa8/pydantic-2.11.9.tar.gz", hash = "sha256:6b8ffda597a14812a7975c90b82a8a2e777d9257aba3453f973acd3c032a18e2", size = 788495, upload-time = "2025-09-13T11:26:39.325Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/d3/108f2006987c58e76691d5ae5d200dd3e0f532cb4e5fa3560751c3a1feba/pydantic-2.11.9-py3-none-any.whl", hash = "sha256:c42dd626f5cfc1c6950ce6205ea58c93efa406da65f479dcb4029d5934857da2", size = 444855 }, + { url = "https://files.pythonhosted.org/packages/3e/d3/108f2006987c58e76691d5ae5d200dd3e0f532cb4e5fa3560751c3a1feba/pydantic-2.11.9-py3-none-any.whl", hash = "sha256:c42dd626f5cfc1c6950ce6205ea58c93efa406da65f479dcb4029d5934857da2", size = 444855, upload-time = "2025-09-13T11:26:36.909Z" }, ] [[package]] @@ -2507,22 +2522,22 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195 } +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000 }, - { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996 }, - { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957 }, - { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199 }, - { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296 }, - { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109 }, - { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028 }, - { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044 }, - { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881 }, - { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034 }, - { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187 }, - { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628 }, - { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866 }, - { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894 }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, ] [[package]] @@ -2535,9 +2550,9 @@ dependencies = [ { name = "pyopenssl" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/45/74/b591079fa588351cec61861b85ba26f7deb96f3b445556c100e17db5572b/PyDrive2-1.15.4.tar.gz", hash = "sha256:0c011b74ebc24f3c6ca72820626b77f1dfe0ae88f5740c5a5cf96e83dd79ba99", size = 60514 } +sdist = { url = "https://files.pythonhosted.org/packages/45/74/b591079fa588351cec61861b85ba26f7deb96f3b445556c100e17db5572b/PyDrive2-1.15.4.tar.gz", hash = "sha256:0c011b74ebc24f3c6ca72820626b77f1dfe0ae88f5740c5a5cf96e83dd79ba99", size = 60514, upload-time = "2023-05-21T02:25:57.217Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/f4/d0b40ee1c703304e8cc737e53516f834c0fbad4fe9b27aed7680d9fdf344/PyDrive2-1.15.4-py3-none-any.whl", hash = "sha256:91fe28e5f094a6dfff834495c4aee0041cbef979467ad27cd0d4b1f91afa8869", size = 45011 }, + { url = "https://files.pythonhosted.org/packages/18/f4/d0b40ee1c703304e8cc737e53516f834c0fbad4fe9b27aed7680d9fdf344/PyDrive2-1.15.4-py3-none-any.whl", hash = "sha256:91fe28e5f094a6dfff834495c4aee0041cbef979467ad27cd0d4b1f91afa8869", size = 45011, upload-time = "2023-05-21T02:25:55.265Z" }, ] [package.optional-dependencies] @@ -2552,18 +2567,18 @@ fsspec = [ name = "pygments" version = "2.19.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] [[package]] name = "pyjwt" version = "2.10.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, ] [package.optional-dependencies] @@ -2584,37 +2599,37 @@ dependencies = [ { name = "setuptools" }, { name = "ujson" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/24/e2/5613bc7b2af0ccd760177ca4255243c284cfc0f2cba3f10ff63325c4ca34/pymilvus-2.5.16.tar.gz", hash = "sha256:65f56b81806bc217cca3cf29b70a27d053dea4b1ffada910cf63a38f96381618", size = 1280614 } +sdist = { url = "https://files.pythonhosted.org/packages/24/e2/5613bc7b2af0ccd760177ca4255243c284cfc0f2cba3f10ff63325c4ca34/pymilvus-2.5.16.tar.gz", hash = "sha256:65f56b81806bc217cca3cf29b70a27d053dea4b1ffada910cf63a38f96381618", size = 1280614, upload-time = "2025-09-19T07:02:14.747Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/09/b67a55abee0a53ea50ba0de0cba6e1c0f7ca7ce2c15ffd6f40c059c25e88/pymilvus-2.5.16-py3-none-any.whl", hash = "sha256:76258a324f19c60fee247467e11cd7d6f35a64d2a9c753f5d7b1a5fa15dd6c8a", size = 243272 }, + { url = "https://files.pythonhosted.org/packages/c6/09/b67a55abee0a53ea50ba0de0cba6e1c0f7ca7ce2c15ffd6f40c059c25e88/pymilvus-2.5.16-py3-none-any.whl", hash = "sha256:76258a324f19c60fee247467e11cd7d6f35a64d2a9c753f5d7b1a5fa15dd6c8a", size = 243272, upload-time = "2025-09-19T07:02:12.443Z" }, ] [[package]] name = "pymssql" version = "2.3.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/83/35/5a0b79369e42fffd5c04e4e74fa90ef034cc5c3f314e14f6d58cac646ccf/pymssql-2.3.4.tar.gz", hash = "sha256:117c82d7aa9021171aa9be98368475519f33d9c32073cdcf9b0d76231abc6436", size = 184604 } +sdist = { url = "https://files.pythonhosted.org/packages/83/35/5a0b79369e42fffd5c04e4e74fa90ef034cc5c3f314e14f6d58cac646ccf/pymssql-2.3.4.tar.gz", hash = "sha256:117c82d7aa9021171aa9be98368475519f33d9c32073cdcf9b0d76231abc6436", size = 184604, upload-time = "2025-04-02T02:08:43.503Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/89/5a7a4b27ee44b2dc4708de7e897311cb17f15e7c983c299e8bf97ebf98d1/pymssql-2.3.4-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:809b75aaeb9bcd061230bace41e275f80f464f70fcbf5dde2ba7ba8f0eea5298", size = 3075736 }, - { url = "https://files.pythonhosted.org/packages/43/f9/19bbb0026a47043fb239e821e10a75304b12ba986ce4af71cf8986af411c/pymssql-2.3.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48ab1ee04754fb8ce703b6c154e54fde4f6c7f440766d397b101b748123a12df", size = 4019433 }, - { url = "https://files.pythonhosted.org/packages/a6/ac/3aca13f1f527299db4adef594fb9f14d47d68de91b93a220a67391b8ec87/pymssql-2.3.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e22bb4d5aed85b084e3b9fb5ae3463301dd69c17703cfef72e0aed746452cc9", size = 3993550 }, - { url = "https://files.pythonhosted.org/packages/b9/93/879d92f61afb974f69b9186b16ee6a97adff2abc82777e3b66c9c9efb179/pymssql-2.3.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2c1c8d3632630d52387e5b5b4483027494b5cb8f65401573715b74e7a3f16e5", size = 4381934 }, - { url = "https://files.pythonhosted.org/packages/6c/a6/923769b6dbb4e3a4c07a867e0c7fa8e4b230f675095cd7109d4e3eb9ddf0/pymssql-2.3.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f7f245acbdf89b96a41513ef0214b55a3ba2824f1f3119dd1945443b6cac78d3", size = 4849674 }, - { url = "https://files.pythonhosted.org/packages/7a/2d/c787f061dcd0603905bf8085dda9cddb8c3c03b18d9239d5d18c953eebba/pymssql-2.3.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9484485fb847eb67f828459b0f4857c9725b20c517c2b7f88a9788fd72b76a6a", size = 4076649 }, - { url = "https://files.pythonhosted.org/packages/c1/a2/e55d823e3ab21cf9fc88e4e2424936899392d9d2e6569d5bcce063f84dac/pymssql-2.3.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4a0716482cd5ecce07230925593cefd9137959c18aca4c92fc24c243d3c20e38", size = 4139477 }, - { url = "https://files.pythonhosted.org/packages/c7/7c/0fec6587b38081d0d0fca4f9ad31e85ec6c5791879e57f0e559ec6be4d3d/pymssql-2.3.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ba4f988674b361709821c8173a6471aa6e47ee6e45b5a8e30d4dcbde1f62fb0f", size = 4653837 }, - { url = "https://files.pythonhosted.org/packages/5f/7c/77d0251f4b5ad5690226a93547fc8279c1c48bd14e3ccc820f5c580a3b73/pymssql-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:51b8ebfbd7d14d5e7c65e76ffaf31584ffabe9fb1bfd2a85f529bd707512e39d", size = 4910914 }, - { url = "https://files.pythonhosted.org/packages/4f/22/1b2ef85804872a5940010d3c012722356af1fa24f8ba6f419c0260881032/pymssql-2.3.4-cp312-cp312-win32.whl", hash = "sha256:c8f5718f5e7d2623eaf35e025d5fa288c5789916809a89f00b42346b888673da", size = 1337991 }, - { url = "https://files.pythonhosted.org/packages/0f/43/c98f34e7b3cd45653fb233a4bee83bffca0cf5e78c290c291cec34faac21/pymssql-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:d72b38b5ba66a4072c680447099bb63ac35d0425e9a29ff91b048e563b999be5", size = 2021760 }, + { url = "https://files.pythonhosted.org/packages/bc/89/5a7a4b27ee44b2dc4708de7e897311cb17f15e7c983c299e8bf97ebf98d1/pymssql-2.3.4-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:809b75aaeb9bcd061230bace41e275f80f464f70fcbf5dde2ba7ba8f0eea5298", size = 3075736, upload-time = "2025-04-02T02:11:44.347Z" }, + { url = "https://files.pythonhosted.org/packages/43/f9/19bbb0026a47043fb239e821e10a75304b12ba986ce4af71cf8986af411c/pymssql-2.3.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48ab1ee04754fb8ce703b6c154e54fde4f6c7f440766d397b101b748123a12df", size = 4019433, upload-time = "2025-04-02T03:07:58.222Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ac/3aca13f1f527299db4adef594fb9f14d47d68de91b93a220a67391b8ec87/pymssql-2.3.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e22bb4d5aed85b084e3b9fb5ae3463301dd69c17703cfef72e0aed746452cc9", size = 3993550, upload-time = "2025-04-02T02:13:16.433Z" }, + { url = "https://files.pythonhosted.org/packages/b9/93/879d92f61afb974f69b9186b16ee6a97adff2abc82777e3b66c9c9efb179/pymssql-2.3.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2c1c8d3632630d52387e5b5b4483027494b5cb8f65401573715b74e7a3f16e5", size = 4381934, upload-time = "2025-04-02T02:12:45.424Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a6/923769b6dbb4e3a4c07a867e0c7fa8e4b230f675095cd7109d4e3eb9ddf0/pymssql-2.3.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f7f245acbdf89b96a41513ef0214b55a3ba2824f1f3119dd1945443b6cac78d3", size = 4849674, upload-time = "2025-04-02T02:13:05.245Z" }, + { url = "https://files.pythonhosted.org/packages/7a/2d/c787f061dcd0603905bf8085dda9cddb8c3c03b18d9239d5d18c953eebba/pymssql-2.3.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9484485fb847eb67f828459b0f4857c9725b20c517c2b7f88a9788fd72b76a6a", size = 4076649, upload-time = "2025-04-02T02:15:13.053Z" }, + { url = "https://files.pythonhosted.org/packages/c1/a2/e55d823e3ab21cf9fc88e4e2424936899392d9d2e6569d5bcce063f84dac/pymssql-2.3.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4a0716482cd5ecce07230925593cefd9137959c18aca4c92fc24c243d3c20e38", size = 4139477, upload-time = "2025-04-02T02:13:42.91Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7c/0fec6587b38081d0d0fca4f9ad31e85ec6c5791879e57f0e559ec6be4d3d/pymssql-2.3.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ba4f988674b361709821c8173a6471aa6e47ee6e45b5a8e30d4dcbde1f62fb0f", size = 4653837, upload-time = "2025-04-02T02:15:05.102Z" }, + { url = "https://files.pythonhosted.org/packages/5f/7c/77d0251f4b5ad5690226a93547fc8279c1c48bd14e3ccc820f5c580a3b73/pymssql-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:51b8ebfbd7d14d5e7c65e76ffaf31584ffabe9fb1bfd2a85f529bd707512e39d", size = 4910914, upload-time = "2025-04-02T02:13:55.446Z" }, + { url = "https://files.pythonhosted.org/packages/4f/22/1b2ef85804872a5940010d3c012722356af1fa24f8ba6f419c0260881032/pymssql-2.3.4-cp312-cp312-win32.whl", hash = "sha256:c8f5718f5e7d2623eaf35e025d5fa288c5789916809a89f00b42346b888673da", size = 1337991, upload-time = "2025-04-02T02:29:43.394Z" }, + { url = "https://files.pythonhosted.org/packages/0f/43/c98f34e7b3cd45653fb233a4bee83bffca0cf5e78c290c291cec34faac21/pymssql-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:d72b38b5ba66a4072c680447099bb63ac35d0425e9a29ff91b048e563b999be5", size = 2021760, upload-time = "2025-04-02T02:28:06.757Z" }, ] [[package]] name = "pymysql" version = "1.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/9d/ee68dee1c8821c839bb31e6e5f40e61035a5278f7c1307dde758f0c90452/PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96", size = 47240 } +sdist = { url = "https://files.pythonhosted.org/packages/41/9d/ee68dee1c8821c839bb31e6e5f40e61035a5278f7c1307dde758f0c90452/PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96", size = 47240, upload-time = "2023-06-26T05:34:02.058Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/30/20467e39523d0cfc2b6227902d3687a16364307260c75e6a1cb4422b0c62/PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7", size = 44768 }, + { url = "https://files.pythonhosted.org/packages/e5/30/20467e39523d0cfc2b6227902d3687a16364307260c75e6a1cb4422b0c62/PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7", size = 44768, upload-time = "2023-06-26T05:33:59.951Z" }, ] [[package]] @@ -2624,22 +2639,22 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/c6/a3124dee667a423f2c637cfd262a54d67d8ccf3e160f3c50f622a85b7723/pynacl-1.6.0.tar.gz", hash = "sha256:cb36deafe6e2bce3b286e5d1f3e1c246e0ccdb8808ddb4550bb2792f2df298f2", size = 3505641 } +sdist = { url = "https://files.pythonhosted.org/packages/06/c6/a3124dee667a423f2c637cfd262a54d67d8ccf3e160f3c50f622a85b7723/pynacl-1.6.0.tar.gz", hash = "sha256:cb36deafe6e2bce3b286e5d1f3e1c246e0ccdb8808ddb4550bb2792f2df298f2", size = 3505641, upload-time = "2025-09-10T23:39:22.308Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/63/37/87c72df19857c5b3b47ace6f211a26eb862ada495cc96daa372d96048fca/pynacl-1.6.0-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:f4b3824920e206b4f52abd7de621ea7a44fd3cb5c8daceb7c3612345dfc54f2e", size = 382610 }, - { url = "https://files.pythonhosted.org/packages/0c/64/3ce958a5817fd3cc6df4ec14441c43fd9854405668d73babccf77f9597a3/pynacl-1.6.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:16dd347cdc8ae0b0f6187a2608c0af1c8b7ecbbe6b4a06bff8253c192f696990", size = 798744 }, - { url = "https://files.pythonhosted.org/packages/e4/8a/3f0dd297a0a33fa3739c255feebd0206bb1df0b44c52fbe2caf8e8bc4425/pynacl-1.6.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:16c60daceee88d04f8d41d0a4004a7ed8d9a5126b997efd2933e08e93a3bd850", size = 1397879 }, - { url = "https://files.pythonhosted.org/packages/41/94/028ff0434a69448f61348d50d2c147dda51aabdd4fbc93ec61343332174d/pynacl-1.6.0-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25720bad35dfac34a2bcdd61d9e08d6bfc6041bebc7751d9c9f2446cf1e77d64", size = 833907 }, - { url = "https://files.pythonhosted.org/packages/52/bc/a5cff7f8c30d5f4c26a07dfb0bcda1176ab8b2de86dda3106c00a02ad787/pynacl-1.6.0-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bfaa0a28a1ab718bad6239979a5a57a8d1506d0caf2fba17e524dbb409441cf", size = 1436649 }, - { url = "https://files.pythonhosted.org/packages/7a/20/c397be374fd5d84295046e398de4ba5f0722dc14450f65db76a43c121471/pynacl-1.6.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ef214b90556bb46a485b7da8258e59204c244b1b5b576fb71848819b468c44a7", size = 817142 }, - { url = "https://files.pythonhosted.org/packages/12/30/5efcef3406940cda75296c6d884090b8a9aad2dcc0c304daebb5ae99fb4a/pynacl-1.6.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:49c336dd80ea54780bcff6a03ee1a476be1612423010472e60af83452aa0f442", size = 1401794 }, - { url = "https://files.pythonhosted.org/packages/be/e1/a8fe1248cc17ccb03b676d80fa90763760a6d1247da434844ea388d0816c/pynacl-1.6.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:f3482abf0f9815e7246d461fab597aa179b7524628a4bc36f86a7dc418d2608d", size = 772161 }, - { url = "https://files.pythonhosted.org/packages/a3/76/8a62702fb657d6d9104ce13449db221a345665d05e6a3fdefb5a7cafd2ad/pynacl-1.6.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:140373378e34a1f6977e573033d1dd1de88d2a5d90ec6958c9485b2fd9f3eb90", size = 1370720 }, - { url = "https://files.pythonhosted.org/packages/6d/38/9e9e9b777a1c4c8204053733e1a0269672c0bd40852908c9ad6b6eaba82c/pynacl-1.6.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6b393bc5e5a0eb86bb85b533deb2d2c815666665f840a09e0aa3362bb6088736", size = 791252 }, - { url = "https://files.pythonhosted.org/packages/63/ef/d972ce3d92ae05c9091363cf185e8646933f91c376e97b8be79ea6e96c22/pynacl-1.6.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a25cfede801f01e54179b8ff9514bd7b5944da560b7040939732d1804d25419", size = 1362910 }, - { url = "https://files.pythonhosted.org/packages/35/2c/ee0b373a1861f66a7ca8bdb999331525615061320dd628527a50ba8e8a60/pynacl-1.6.0-cp38-abi3-win32.whl", hash = "sha256:dcdeb41c22ff3c66eef5e63049abf7639e0db4edee57ba70531fc1b6b133185d", size = 226461 }, - { url = "https://files.pythonhosted.org/packages/75/f7/41b6c0b9dd9970173b6acc026bab7b4c187e4e5beef2756d419ad65482da/pynacl-1.6.0-cp38-abi3-win_amd64.whl", hash = "sha256:cf831615cc16ba324240de79d925eacae8265b7691412ac6b24221db157f6bd1", size = 238802 }, - { url = "https://files.pythonhosted.org/packages/8e/0f/462326910c6172fa2c6ed07922b22ffc8e77432b3affffd9e18f444dbfbb/pynacl-1.6.0-cp38-abi3-win_arm64.whl", hash = "sha256:84709cea8f888e618c21ed9a0efdb1a59cc63141c403db8bf56c469b71ad56f2", size = 183846 }, + { url = "https://files.pythonhosted.org/packages/63/37/87c72df19857c5b3b47ace6f211a26eb862ada495cc96daa372d96048fca/pynacl-1.6.0-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:f4b3824920e206b4f52abd7de621ea7a44fd3cb5c8daceb7c3612345dfc54f2e", size = 382610, upload-time = "2025-09-10T23:38:49.459Z" }, + { url = "https://files.pythonhosted.org/packages/0c/64/3ce958a5817fd3cc6df4ec14441c43fd9854405668d73babccf77f9597a3/pynacl-1.6.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:16dd347cdc8ae0b0f6187a2608c0af1c8b7ecbbe6b4a06bff8253c192f696990", size = 798744, upload-time = "2025-09-10T23:38:58.531Z" }, + { url = "https://files.pythonhosted.org/packages/e4/8a/3f0dd297a0a33fa3739c255feebd0206bb1df0b44c52fbe2caf8e8bc4425/pynacl-1.6.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:16c60daceee88d04f8d41d0a4004a7ed8d9a5126b997efd2933e08e93a3bd850", size = 1397879, upload-time = "2025-09-10T23:39:00.44Z" }, + { url = "https://files.pythonhosted.org/packages/41/94/028ff0434a69448f61348d50d2c147dda51aabdd4fbc93ec61343332174d/pynacl-1.6.0-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25720bad35dfac34a2bcdd61d9e08d6bfc6041bebc7751d9c9f2446cf1e77d64", size = 833907, upload-time = "2025-09-10T23:38:50.936Z" }, + { url = "https://files.pythonhosted.org/packages/52/bc/a5cff7f8c30d5f4c26a07dfb0bcda1176ab8b2de86dda3106c00a02ad787/pynacl-1.6.0-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bfaa0a28a1ab718bad6239979a5a57a8d1506d0caf2fba17e524dbb409441cf", size = 1436649, upload-time = "2025-09-10T23:38:52.783Z" }, + { url = "https://files.pythonhosted.org/packages/7a/20/c397be374fd5d84295046e398de4ba5f0722dc14450f65db76a43c121471/pynacl-1.6.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ef214b90556bb46a485b7da8258e59204c244b1b5b576fb71848819b468c44a7", size = 817142, upload-time = "2025-09-10T23:38:54.4Z" }, + { url = "https://files.pythonhosted.org/packages/12/30/5efcef3406940cda75296c6d884090b8a9aad2dcc0c304daebb5ae99fb4a/pynacl-1.6.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:49c336dd80ea54780bcff6a03ee1a476be1612423010472e60af83452aa0f442", size = 1401794, upload-time = "2025-09-10T23:38:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/be/e1/a8fe1248cc17ccb03b676d80fa90763760a6d1247da434844ea388d0816c/pynacl-1.6.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:f3482abf0f9815e7246d461fab597aa179b7524628a4bc36f86a7dc418d2608d", size = 772161, upload-time = "2025-09-10T23:39:01.93Z" }, + { url = "https://files.pythonhosted.org/packages/a3/76/8a62702fb657d6d9104ce13449db221a345665d05e6a3fdefb5a7cafd2ad/pynacl-1.6.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:140373378e34a1f6977e573033d1dd1de88d2a5d90ec6958c9485b2fd9f3eb90", size = 1370720, upload-time = "2025-09-10T23:39:03.531Z" }, + { url = "https://files.pythonhosted.org/packages/6d/38/9e9e9b777a1c4c8204053733e1a0269672c0bd40852908c9ad6b6eaba82c/pynacl-1.6.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6b393bc5e5a0eb86bb85b533deb2d2c815666665f840a09e0aa3362bb6088736", size = 791252, upload-time = "2025-09-10T23:39:05.058Z" }, + { url = "https://files.pythonhosted.org/packages/63/ef/d972ce3d92ae05c9091363cf185e8646933f91c376e97b8be79ea6e96c22/pynacl-1.6.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a25cfede801f01e54179b8ff9514bd7b5944da560b7040939732d1804d25419", size = 1362910, upload-time = "2025-09-10T23:39:06.924Z" }, + { url = "https://files.pythonhosted.org/packages/35/2c/ee0b373a1861f66a7ca8bdb999331525615061320dd628527a50ba8e8a60/pynacl-1.6.0-cp38-abi3-win32.whl", hash = "sha256:dcdeb41c22ff3c66eef5e63049abf7639e0db4edee57ba70531fc1b6b133185d", size = 226461, upload-time = "2025-09-10T23:39:11.894Z" }, + { url = "https://files.pythonhosted.org/packages/75/f7/41b6c0b9dd9970173b6acc026bab7b4c187e4e5beef2756d419ad65482da/pynacl-1.6.0-cp38-abi3-win_amd64.whl", hash = "sha256:cf831615cc16ba324240de79d925eacae8265b7691412ac6b24221db157f6bd1", size = 238802, upload-time = "2025-09-10T23:39:08.966Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0f/462326910c6172fa2c6ed07922b22ffc8e77432b3affffd9e18f444dbfbb/pynacl-1.6.0-cp38-abi3-win_arm64.whl", hash = "sha256:84709cea8f888e618c21ed9a0efdb1a59cc63141c403db8bf56c469b71ad56f2", size = 183846, upload-time = "2025-09-10T23:39:10.552Z" }, ] [[package]] @@ -2650,47 +2665,47 @@ dependencies = [ { name = "cryptography" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073 } +sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268 }, + { url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" }, ] [[package]] name = "pyparsing" version = "3.2.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274, upload-time = "2025-09-21T04:11:06.277Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890 }, + { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890, upload-time = "2025-09-21T04:11:04.117Z" }, ] [[package]] name = "pypdf" version = "6.1.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/85/4c0f12616db83c2e3ef580c3cfa98bd082e88fc8d02e136bad3bede1e3fa/pypdf-6.1.1.tar.gz", hash = "sha256:10f44d49bf2a82e54c3c5ba3cdcbb118f2a44fc57df8ce51d6fb9b1ed9bfbe8b", size = 5074507 } +sdist = { url = "https://files.pythonhosted.org/packages/a6/85/4c0f12616db83c2e3ef580c3cfa98bd082e88fc8d02e136bad3bede1e3fa/pypdf-6.1.1.tar.gz", hash = "sha256:10f44d49bf2a82e54c3c5ba3cdcbb118f2a44fc57df8ce51d6fb9b1ed9bfbe8b", size = 5074507, upload-time = "2025-09-28T13:29:16.165Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/ed/adae13756d9dabdddee483fc7712905bb5585fbf6e922b1a19aca3a29cd1/pypdf-6.1.1-py3-none-any.whl", hash = "sha256:7781f99493208a37a7d4275601d883e19af24e62a525c25844d22157c2e4cde7", size = 323455 }, + { url = "https://files.pythonhosted.org/packages/07/ed/adae13756d9dabdddee483fc7712905bb5585fbf6e922b1a19aca3a29cd1/pypdf-6.1.1-py3-none-any.whl", hash = "sha256:7781f99493208a37a7d4275601d883e19af24e62a525c25844d22157c2e4cde7", size = 323455, upload-time = "2025-09-28T13:29:14.392Z" }, ] [[package]] name = "pypdfium2" version = "4.30.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/14/838b3ba247a0ba92e4df5d23f2bea9478edcfd72b78a39d6ca36ccd84ad2/pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16", size = 140239 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/14/838b3ba247a0ba92e4df5d23f2bea9478edcfd72b78a39d6ca36ccd84ad2/pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16", size = 140239, upload-time = "2024-05-09T18:33:17.552Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/9a/c8ff5cc352c1b60b0b97642ae734f51edbab6e28b45b4fcdfe5306ee3c83/pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab", size = 2837254 }, - { url = "https://files.pythonhosted.org/packages/21/8b/27d4d5409f3c76b985f4ee4afe147b606594411e15ac4dc1c3363c9a9810/pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de", size = 2707624 }, - { url = "https://files.pythonhosted.org/packages/11/63/28a73ca17c24b41a205d658e177d68e198d7dde65a8c99c821d231b6ee3d/pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854", size = 2793126 }, - { url = "https://files.pythonhosted.org/packages/d1/96/53b3ebf0955edbd02ac6da16a818ecc65c939e98fdeb4e0958362bd385c8/pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2", size = 2591077 }, - { url = "https://files.pythonhosted.org/packages/ec/ee/0394e56e7cab8b5b21f744d988400948ef71a9a892cbeb0b200d324ab2c7/pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad", size = 2864431 }, - { url = "https://files.pythonhosted.org/packages/65/cd/3f1edf20a0ef4a212a5e20a5900e64942c5a374473671ac0780eaa08ea80/pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f", size = 2812008 }, - { url = "https://files.pythonhosted.org/packages/c8/91/2d517db61845698f41a2a974de90762e50faeb529201c6b3574935969045/pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163", size = 6181543 }, - { url = "https://files.pythonhosted.org/packages/ba/c4/ed1315143a7a84b2c7616569dfb472473968d628f17c231c39e29ae9d780/pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e", size = 6175911 }, - { url = "https://files.pythonhosted.org/packages/7a/c4/9e62d03f414e0e3051c56d5943c3bf42aa9608ede4e19dc96438364e9e03/pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be", size = 6267430 }, - { url = "https://files.pythonhosted.org/packages/90/47/eda4904f715fb98561e34012826e883816945934a851745570521ec89520/pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e", size = 2775951 }, - { url = "https://files.pythonhosted.org/packages/25/bd/56d9ec6b9f0fc4e0d95288759f3179f0fcd34b1a1526b75673d2f6d5196f/pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c", size = 2892098 }, - { url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118 }, + { url = "https://files.pythonhosted.org/packages/c7/9a/c8ff5cc352c1b60b0b97642ae734f51edbab6e28b45b4fcdfe5306ee3c83/pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab", size = 2837254, upload-time = "2024-05-09T18:32:48.653Z" }, + { url = "https://files.pythonhosted.org/packages/21/8b/27d4d5409f3c76b985f4ee4afe147b606594411e15ac4dc1c3363c9a9810/pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de", size = 2707624, upload-time = "2024-05-09T18:32:51.458Z" }, + { url = "https://files.pythonhosted.org/packages/11/63/28a73ca17c24b41a205d658e177d68e198d7dde65a8c99c821d231b6ee3d/pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854", size = 2793126, upload-time = "2024-05-09T18:32:53.581Z" }, + { url = "https://files.pythonhosted.org/packages/d1/96/53b3ebf0955edbd02ac6da16a818ecc65c939e98fdeb4e0958362bd385c8/pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2", size = 2591077, upload-time = "2024-05-09T18:32:55.99Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/0394e56e7cab8b5b21f744d988400948ef71a9a892cbeb0b200d324ab2c7/pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad", size = 2864431, upload-time = "2024-05-09T18:32:57.911Z" }, + { url = "https://files.pythonhosted.org/packages/65/cd/3f1edf20a0ef4a212a5e20a5900e64942c5a374473671ac0780eaa08ea80/pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f", size = 2812008, upload-time = "2024-05-09T18:32:59.886Z" }, + { url = "https://files.pythonhosted.org/packages/c8/91/2d517db61845698f41a2a974de90762e50faeb529201c6b3574935969045/pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163", size = 6181543, upload-time = "2024-05-09T18:33:02.597Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c4/ed1315143a7a84b2c7616569dfb472473968d628f17c231c39e29ae9d780/pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e", size = 6175911, upload-time = "2024-05-09T18:33:05.376Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c4/9e62d03f414e0e3051c56d5943c3bf42aa9608ede4e19dc96438364e9e03/pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be", size = 6267430, upload-time = "2024-05-09T18:33:08.067Z" }, + { url = "https://files.pythonhosted.org/packages/90/47/eda4904f715fb98561e34012826e883816945934a851745570521ec89520/pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e", size = 2775951, upload-time = "2024-05-09T18:33:10.567Z" }, + { url = "https://files.pythonhosted.org/packages/25/bd/56d9ec6b9f0fc4e0d95288759f3179f0fcd34b1a1526b75673d2f6d5196f/pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c", size = 2892098, upload-time = "2024-05-09T18:33:13.107Z" }, + { url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118, upload-time = "2024-05-09T18:33:15.489Z" }, ] [[package]] @@ -2704,18 +2719,18 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618 } +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750 }, + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, ] [[package]] name = "python-crontab" version = "3.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/99/7f/c54fb7e70b59844526aa4ae321e927a167678660ab51dda979955eafb89a/python_crontab-3.3.0.tar.gz", hash = "sha256:007c8aee68dddf3e04ec4dce0fac124b93bd68be7470fc95d2a9617a15de291b", size = 57626 } +sdist = { url = "https://files.pythonhosted.org/packages/99/7f/c54fb7e70b59844526aa4ae321e927a167678660ab51dda979955eafb89a/python_crontab-3.3.0.tar.gz", hash = "sha256:007c8aee68dddf3e04ec4dce0fac124b93bd68be7470fc95d2a9617a15de291b", size = 57626, upload-time = "2025-07-13T20:05:35.535Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/47/42/bb4afa5b088f64092036221843fc989b7db9d9d302494c1f8b024ee78a46/python_crontab-3.3.0-py3-none-any.whl", hash = "sha256:739a778b1a771379b75654e53fd4df58e5c63a9279a63b5dfe44c0fcc3ee7884", size = 27533 }, + { url = "https://files.pythonhosted.org/packages/47/42/bb4afa5b088f64092036221843fc989b7db9d9d302494c1f8b024ee78a46/python_crontab-3.3.0-py3-none-any.whl", hash = "sha256:739a778b1a771379b75654e53fd4df58e5c63a9279a63b5dfe44c0fcc3ee7884", size = 27533, upload-time = "2025-07-13T20:05:34.266Z" }, ] [[package]] @@ -2725,18 +2740,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] [[package]] name = "python-dotenv" version = "1.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115, upload-time = "2024-01-23T06:33:00.505Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" }, ] [[package]] @@ -2746,18 +2761,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "simple-websocket" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c9/d8/63e5535ab21dc4998ba1cfe13690ccf122883a38f025dca24d6e56c05eba/python_engineio-4.12.3.tar.gz", hash = "sha256:35633e55ec30915e7fc8f7e34ca8d73ee0c080cec8a8cd04faf2d7396f0a7a7a", size = 91910 } +sdist = { url = "https://files.pythonhosted.org/packages/c9/d8/63e5535ab21dc4998ba1cfe13690ccf122883a38f025dca24d6e56c05eba/python_engineio-4.12.3.tar.gz", hash = "sha256:35633e55ec30915e7fc8f7e34ca8d73ee0c080cec8a8cd04faf2d7396f0a7a7a", size = 91910, upload-time = "2025-09-28T06:31:36.765Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/f0/c5aa0a69fd9326f013110653543f36ece4913c17921f3e1dbd78e1b423ee/python_engineio-4.12.3-py3-none-any.whl", hash = "sha256:7c099abb2a27ea7ab429c04da86ab2d82698cdd6c52406cb73766fe454feb7e1", size = 59637 }, + { url = "https://files.pythonhosted.org/packages/d8/f0/c5aa0a69fd9326f013110653543f36ece4913c17921f3e1dbd78e1b423ee/python_engineio-4.12.3-py3-none-any.whl", hash = "sha256:7c099abb2a27ea7ab429c04da86ab2d82698cdd6c52406cb73766fe454feb7e1", size = 59637, upload-time = "2025-09-28T06:31:35.354Z" }, ] [[package]] name = "python-magic" version = "0.4.27" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677 } +sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677, upload-time = "2022-06-07T20:16:59.508Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840 }, + { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" }, ] [[package]] @@ -2768,9 +2783,9 @@ dependencies = [ { name = "bidict" }, { name = "python-engineio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/1a/396d50ccf06ee539fa758ce5623b59a9cb27637fc4b2dc07ed08bf495e77/python_socketio-5.13.0.tar.gz", hash = "sha256:ac4e19a0302ae812e23b712ec8b6427ca0521f7c582d6abb096e36e24a263029", size = 121125 } +sdist = { url = "https://files.pythonhosted.org/packages/21/1a/396d50ccf06ee539fa758ce5623b59a9cb27637fc4b2dc07ed08bf495e77/python_socketio-5.13.0.tar.gz", hash = "sha256:ac4e19a0302ae812e23b712ec8b6427ca0521f7c582d6abb096e36e24a263029", size = 121125, upload-time = "2025-04-12T15:46:59.933Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800 }, + { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800, upload-time = "2025-04-12T15:46:58.412Z" }, ] [[package]] @@ -2780,18 +2795,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "defusedxml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5f/4a/29feb8da6c44f77007dcd29518fea73a3d5653ee02a587ae1f17f1f5ddb5/python3-openid-3.2.0.tar.gz", hash = "sha256:33fbf6928f401e0b790151ed2b5290b02545e8775f982485205a066f874aaeaf", size = 305600 } +sdist = { url = "https://files.pythonhosted.org/packages/5f/4a/29feb8da6c44f77007dcd29518fea73a3d5653ee02a587ae1f17f1f5ddb5/python3-openid-3.2.0.tar.gz", hash = "sha256:33fbf6928f401e0b790151ed2b5290b02545e8775f982485205a066f874aaeaf", size = 305600, upload-time = "2020-06-29T12:15:49.026Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/a5/c6ba13860bdf5525f1ab01e01cc667578d6f1efc8a1dba355700fb04c29b/python3_openid-3.2.0-py3-none-any.whl", hash = "sha256:6626f771e0417486701e0b4daff762e7212e820ca5b29fcc0d05f6f8736dfa6b", size = 133681 }, + { url = "https://files.pythonhosted.org/packages/e0/a5/c6ba13860bdf5525f1ab01e01cc667578d6f1efc8a1dba355700fb04c29b/python3_openid-3.2.0-py3-none-any.whl", hash = "sha256:6626f771e0417486701e0b4daff762e7212e820ca5b29fcc0d05f6f8736dfa6b", size = 133681, upload-time = "2020-06-29T12:15:47.502Z" }, ] [[package]] name = "pytz" version = "2025.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] [[package]] @@ -2799,27 +2814,27 @@ name = "pywin32" version = "311" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543 }, - { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040 }, - { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102 }, + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, ] [[package]] name = "pyyaml" version = "6.0.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, ] [[package]] @@ -2835,18 +2850,18 @@ dependencies = [ { name = "pydantic" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/8b/76c7d325e11d97cb8eb5e261c3759e9ed6664735afbf32fdded5b580690c/qdrant_client-1.15.1.tar.gz", hash = "sha256:631f1f3caebfad0fd0c1fba98f41be81d9962b7bf3ca653bed3b727c0e0cbe0e", size = 295297 } +sdist = { url = "https://files.pythonhosted.org/packages/79/8b/76c7d325e11d97cb8eb5e261c3759e9ed6664735afbf32fdded5b580690c/qdrant_client-1.15.1.tar.gz", hash = "sha256:631f1f3caebfad0fd0c1fba98f41be81d9962b7bf3ca653bed3b727c0e0cbe0e", size = 295297, upload-time = "2025-07-31T19:35:19.627Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/33/d8df6a2b214ffbe4138db9a1efe3248f67dc3c671f82308bea1582ecbbb7/qdrant_client-1.15.1-py3-none-any.whl", hash = "sha256:2b975099b378382f6ca1cfb43f0d59e541be6e16a5892f282a4b8de7eff5cb63", size = 337331 }, + { url = "https://files.pythonhosted.org/packages/ef/33/d8df6a2b214ffbe4138db9a1efe3248f67dc3c671f82308bea1582ecbbb7/qdrant_client-1.15.1-py3-none-any.whl", hash = "sha256:2b975099b378382f6ca1cfb43f0d59e541be6e16a5892f282a4b8de7eff5cb63", size = 337331, upload-time = "2025-07-31T19:35:17.539Z" }, ] [[package]] name = "redis" version = "5.2.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/47/da/d283a37303a995cd36f8b92db85135153dc4f7a8e4441aa827721b442cfb/redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f", size = 4608355 } +sdist = { url = "https://files.pythonhosted.org/packages/47/da/d283a37303a995cd36f8b92db85135153dc4f7a8e4441aa827721b442cfb/redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f", size = 4608355, upload-time = "2024-12-06T09:50:41.956Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/5f/fa26b9b2672cbe30e07d9a5bdf39cf16e3b80b42916757c5f92bca88e4ba/redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4", size = 261502 }, + { url = "https://files.pythonhosted.org/packages/3c/5f/fa26b9b2672cbe30e07d9a5bdf39cf16e3b80b42916757c5f92bca88e4ba/redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4", size = 261502, upload-time = "2024-12-06T09:50:39.656Z" }, ] [[package]] @@ -2858,31 +2873,31 @@ dependencies = [ { name = "rpds-py" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744 } +sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775 }, + { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" }, ] [[package]] name = "regex" version = "2025.9.18" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/49/d3/eaa0d28aba6ad1827ad1e716d9a93e1ba963ada61887498297d3da715133/regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4", size = 400917 } +sdist = { url = "https://files.pythonhosted.org/packages/49/d3/eaa0d28aba6ad1827ad1e716d9a93e1ba963ada61887498297d3da715133/regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4", size = 400917, upload-time = "2025-09-19T00:38:35.79Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/99/05859d87a66ae7098222d65748f11ef7f2dff51bfd7482a4e2256c90d72b/regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e", size = 486335 }, - { url = "https://files.pythonhosted.org/packages/97/7e/d43d4e8b978890932cf7b0957fce58c5b08c66f32698f695b0c2c24a48bf/regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a", size = 289720 }, - { url = "https://files.pythonhosted.org/packages/bb/3b/ff80886089eb5dcf7e0d2040d9aaed539e25a94300403814bb24cc775058/regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab", size = 287257 }, - { url = "https://files.pythonhosted.org/packages/ee/66/243edf49dd8720cba8d5245dd4d6adcb03a1defab7238598c0c97cf549b8/regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5", size = 797463 }, - { url = "https://files.pythonhosted.org/packages/df/71/c9d25a1142c70432e68bb03211d4a82299cd1c1fbc41db9409a394374ef5/regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742", size = 862670 }, - { url = "https://files.pythonhosted.org/packages/f8/8f/329b1efc3a64375a294e3a92d43372bf1a351aa418e83c21f2f01cf6ec41/regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425", size = 910881 }, - { url = "https://files.pythonhosted.org/packages/35/9e/a91b50332a9750519320ed30ec378b74c996f6befe282cfa6bb6cea7e9fd/regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352", size = 802011 }, - { url = "https://files.pythonhosted.org/packages/a4/1d/6be3b8d7856b6e0d7ee7f942f437d0a76e0d5622983abbb6d21e21ab9a17/regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d", size = 786668 }, - { url = "https://files.pythonhosted.org/packages/cb/ce/4a60e53df58bd157c5156a1736d3636f9910bdcc271d067b32b7fcd0c3a8/regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56", size = 856578 }, - { url = "https://files.pythonhosted.org/packages/86/e8/162c91bfe7217253afccde112868afb239f94703de6580fb235058d506a6/regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e", size = 849017 }, - { url = "https://files.pythonhosted.org/packages/35/34/42b165bc45289646ea0959a1bc7531733e90b47c56a72067adfe6b3251f6/regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282", size = 788150 }, - { url = "https://files.pythonhosted.org/packages/79/5d/cdd13b1f3c53afa7191593a7ad2ee24092a5a46417725ffff7f64be8342d/regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459", size = 264536 }, - { url = "https://files.pythonhosted.org/packages/e0/f5/4a7770c9a522e7d2dc1fa3ffc83ab2ab33b0b22b447e62cffef186805302/regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77", size = 275501 }, - { url = "https://files.pythonhosted.org/packages/df/05/9ce3e110e70d225ecbed455b966003a3afda5e58e8aec2964042363a18f4/regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5", size = 268601 }, + { url = "https://files.pythonhosted.org/packages/b0/99/05859d87a66ae7098222d65748f11ef7f2dff51bfd7482a4e2256c90d72b/regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e", size = 486335, upload-time = "2025-09-19T00:36:03.661Z" }, + { url = "https://files.pythonhosted.org/packages/97/7e/d43d4e8b978890932cf7b0957fce58c5b08c66f32698f695b0c2c24a48bf/regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a", size = 289720, upload-time = "2025-09-19T00:36:05.471Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3b/ff80886089eb5dcf7e0d2040d9aaed539e25a94300403814bb24cc775058/regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab", size = 287257, upload-time = "2025-09-19T00:36:07.072Z" }, + { url = "https://files.pythonhosted.org/packages/ee/66/243edf49dd8720cba8d5245dd4d6adcb03a1defab7238598c0c97cf549b8/regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5", size = 797463, upload-time = "2025-09-19T00:36:08.399Z" }, + { url = "https://files.pythonhosted.org/packages/df/71/c9d25a1142c70432e68bb03211d4a82299cd1c1fbc41db9409a394374ef5/regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742", size = 862670, upload-time = "2025-09-19T00:36:10.101Z" }, + { url = "https://files.pythonhosted.org/packages/f8/8f/329b1efc3a64375a294e3a92d43372bf1a351aa418e83c21f2f01cf6ec41/regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425", size = 910881, upload-time = "2025-09-19T00:36:12.223Z" }, + { url = "https://files.pythonhosted.org/packages/35/9e/a91b50332a9750519320ed30ec378b74c996f6befe282cfa6bb6cea7e9fd/regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352", size = 802011, upload-time = "2025-09-19T00:36:13.901Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1d/6be3b8d7856b6e0d7ee7f942f437d0a76e0d5622983abbb6d21e21ab9a17/regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d", size = 786668, upload-time = "2025-09-19T00:36:15.391Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ce/4a60e53df58bd157c5156a1736d3636f9910bdcc271d067b32b7fcd0c3a8/regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56", size = 856578, upload-time = "2025-09-19T00:36:16.845Z" }, + { url = "https://files.pythonhosted.org/packages/86/e8/162c91bfe7217253afccde112868afb239f94703de6580fb235058d506a6/regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e", size = 849017, upload-time = "2025-09-19T00:36:18.597Z" }, + { url = "https://files.pythonhosted.org/packages/35/34/42b165bc45289646ea0959a1bc7531733e90b47c56a72067adfe6b3251f6/regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282", size = 788150, upload-time = "2025-09-19T00:36:20.464Z" }, + { url = "https://files.pythonhosted.org/packages/79/5d/cdd13b1f3c53afa7191593a7ad2ee24092a5a46417725ffff7f64be8342d/regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459", size = 264536, upload-time = "2025-09-19T00:36:21.922Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f5/4a7770c9a522e7d2dc1fa3ffc83ab2ab33b0b22b447e62cffef186805302/regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77", size = 275501, upload-time = "2025-09-19T00:36:23.4Z" }, + { url = "https://files.pythonhosted.org/packages/df/05/9ce3e110e70d225ecbed455b966003a3afda5e58e8aec2964042363a18f4/regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5", size = 268601, upload-time = "2025-09-19T00:36:25.092Z" }, ] [[package]] @@ -2895,9 +2910,9 @@ dependencies = [ { name = "idna" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9d/be/10918a2eac4ae9f02f6cfe6414b7a155ccd8f7f9d4380d62fd5b955065c3/requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1", size = 110794 } +sdist = { url = "https://files.pythonhosted.org/packages/9d/be/10918a2eac4ae9f02f6cfe6414b7a155ccd8f7f9d4380d62fd5b955065c3/requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1", size = 110794, upload-time = "2023-05-22T15:12:44.175Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", size = 62574 }, + { url = "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", size = 62574, upload-time = "2023-05-22T15:12:42.313Z" }, ] [[package]] @@ -2908,9 +2923,9 @@ dependencies = [ { name = "oauthlib" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, ] [[package]] @@ -2920,9 +2935,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888 } +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481 }, + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, ] [[package]] @@ -2933,32 +2948,32 @@ dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441 } +sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441, upload-time = "2025-07-25T07:32:58.125Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/30/3c4d035596d3cf444529e0b2953ad0466f6049528a879d27534700580395/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", size = 243368 }, + { url = "https://files.pythonhosted.org/packages/e3/30/3c4d035596d3cf444529e0b2953ad0466f6049528a879d27534700580395/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", size = 243368, upload-time = "2025-07-25T07:32:56.73Z" }, ] [[package]] name = "rpds-py" version = "0.27.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e9/dd/2c0cbe774744272b0ae725f44032c77bdcab6e8bcf544bffa3b6e70c8dba/rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8", size = 27479 } +sdist = { url = "https://files.pythonhosted.org/packages/e9/dd/2c0cbe774744272b0ae725f44032c77bdcab6e8bcf544bffa3b6e70c8dba/rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8", size = 27479, upload-time = "2025-08-27T12:16:36.024Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/fe/38de28dee5df58b8198c743fe2bea0c785c6d40941b9950bac4cdb71a014/rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90", size = 361887 }, - { url = "https://files.pythonhosted.org/packages/7c/9a/4b6c7eedc7dd90986bf0fab6ea2a091ec11c01b15f8ba0a14d3f80450468/rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5", size = 345795 }, - { url = "https://files.pythonhosted.org/packages/6f/0e/e650e1b81922847a09cca820237b0edee69416a01268b7754d506ade11ad/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e", size = 385121 }, - { url = "https://files.pythonhosted.org/packages/1b/ea/b306067a712988e2bff00dcc7c8f31d26c29b6d5931b461aa4b60a013e33/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881", size = 398976 }, - { url = "https://files.pythonhosted.org/packages/2c/0a/26dc43c8840cb8fe239fe12dbc8d8de40f2365e838f3d395835dde72f0e5/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec", size = 525953 }, - { url = "https://files.pythonhosted.org/packages/22/14/c85e8127b573aaf3a0cbd7fbb8c9c99e735a4a02180c84da2a463b766e9e/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb", size = 407915 }, - { url = "https://files.pythonhosted.org/packages/ed/7b/8f4fee9ba1fb5ec856eb22d725a4efa3deb47f769597c809e03578b0f9d9/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5", size = 386883 }, - { url = "https://files.pythonhosted.org/packages/86/47/28fa6d60f8b74fcdceba81b272f8d9836ac0340570f68f5df6b41838547b/rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a", size = 405699 }, - { url = "https://files.pythonhosted.org/packages/d0/fd/c5987b5e054548df56953a21fe2ebed51fc1ec7c8f24fd41c067b68c4a0a/rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444", size = 423713 }, - { url = "https://files.pythonhosted.org/packages/ac/ba/3c4978b54a73ed19a7d74531be37a8bcc542d917c770e14d372b8daea186/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a", size = 562324 }, - { url = "https://files.pythonhosted.org/packages/b5/6c/6943a91768fec16db09a42b08644b960cff540c66aab89b74be6d4a144ba/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1", size = 593646 }, - { url = "https://files.pythonhosted.org/packages/11/73/9d7a8f4be5f4396f011a6bb7a19fe26303a0dac9064462f5651ced2f572f/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998", size = 558137 }, - { url = "https://files.pythonhosted.org/packages/6e/96/6772cbfa0e2485bcceef8071de7821f81aeac8bb45fbfd5542a3e8108165/rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39", size = 221343 }, - { url = "https://files.pythonhosted.org/packages/67/b6/c82f0faa9af1c6a64669f73a17ee0eeef25aff30bb9a1c318509efe45d84/rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594", size = 232497 }, - { url = "https://files.pythonhosted.org/packages/e1/96/2817b44bd2ed11aebacc9251da03689d56109b9aba5e311297b6902136e2/rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502", size = 222790 }, + { url = "https://files.pythonhosted.org/packages/bd/fe/38de28dee5df58b8198c743fe2bea0c785c6d40941b9950bac4cdb71a014/rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90", size = 361887, upload-time = "2025-08-27T12:13:10.233Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/4b6c7eedc7dd90986bf0fab6ea2a091ec11c01b15f8ba0a14d3f80450468/rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5", size = 345795, upload-time = "2025-08-27T12:13:11.65Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0e/e650e1b81922847a09cca820237b0edee69416a01268b7754d506ade11ad/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e", size = 385121, upload-time = "2025-08-27T12:13:13.008Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ea/b306067a712988e2bff00dcc7c8f31d26c29b6d5931b461aa4b60a013e33/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881", size = 398976, upload-time = "2025-08-27T12:13:14.368Z" }, + { url = "https://files.pythonhosted.org/packages/2c/0a/26dc43c8840cb8fe239fe12dbc8d8de40f2365e838f3d395835dde72f0e5/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec", size = 525953, upload-time = "2025-08-27T12:13:15.774Z" }, + { url = "https://files.pythonhosted.org/packages/22/14/c85e8127b573aaf3a0cbd7fbb8c9c99e735a4a02180c84da2a463b766e9e/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb", size = 407915, upload-time = "2025-08-27T12:13:17.379Z" }, + { url = "https://files.pythonhosted.org/packages/ed/7b/8f4fee9ba1fb5ec856eb22d725a4efa3deb47f769597c809e03578b0f9d9/rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5", size = 386883, upload-time = "2025-08-27T12:13:18.704Z" }, + { url = "https://files.pythonhosted.org/packages/86/47/28fa6d60f8b74fcdceba81b272f8d9836ac0340570f68f5df6b41838547b/rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a", size = 405699, upload-time = "2025-08-27T12:13:20.089Z" }, + { url = "https://files.pythonhosted.org/packages/d0/fd/c5987b5e054548df56953a21fe2ebed51fc1ec7c8f24fd41c067b68c4a0a/rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444", size = 423713, upload-time = "2025-08-27T12:13:21.436Z" }, + { url = "https://files.pythonhosted.org/packages/ac/ba/3c4978b54a73ed19a7d74531be37a8bcc542d917c770e14d372b8daea186/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a", size = 562324, upload-time = "2025-08-27T12:13:22.789Z" }, + { url = "https://files.pythonhosted.org/packages/b5/6c/6943a91768fec16db09a42b08644b960cff540c66aab89b74be6d4a144ba/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1", size = 593646, upload-time = "2025-08-27T12:13:24.122Z" }, + { url = "https://files.pythonhosted.org/packages/11/73/9d7a8f4be5f4396f011a6bb7a19fe26303a0dac9064462f5651ced2f572f/rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998", size = 558137, upload-time = "2025-08-27T12:13:25.557Z" }, + { url = "https://files.pythonhosted.org/packages/6e/96/6772cbfa0e2485bcceef8071de7821f81aeac8bb45fbfd5542a3e8108165/rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39", size = 221343, upload-time = "2025-08-27T12:13:26.967Z" }, + { url = "https://files.pythonhosted.org/packages/67/b6/c82f0faa9af1c6a64669f73a17ee0eeef25aff30bb9a1c318509efe45d84/rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594", size = 232497, upload-time = "2025-08-27T12:13:28.326Z" }, + { url = "https://files.pythonhosted.org/packages/e1/96/2817b44bd2ed11aebacc9251da03689d56109b9aba5e311297b6902136e2/rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502", size = 222790, upload-time = "2025-08-27T12:13:29.71Z" }, ] [[package]] @@ -2968,35 +2983,35 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyasn1" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034 } +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 }, + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] [[package]] name = "ruff" version = "0.13.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/02/df/8d7d8c515d33adfc540e2edf6c6021ea1c5a58a678d8cfce9fae59aabcab/ruff-0.13.2.tar.gz", hash = "sha256:cb12fffd32fb16d32cef4ed16d8c7cdc27ed7c944eaa98d99d01ab7ab0b710ff", size = 5416417 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/84/5716a7fa4758e41bf70e603e13637c42cfb9dbf7ceb07180211b9bbf75ef/ruff-0.13.2-py3-none-linux_armv6l.whl", hash = "sha256:3796345842b55f033a78285e4f1641078f902020d8450cade03aad01bffd81c3", size = 12343254 }, - { url = "https://files.pythonhosted.org/packages/9b/77/c7042582401bb9ac8eff25360e9335e901d7a1c0749a2b28ba4ecb239991/ruff-0.13.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ff7e4dda12e683e9709ac89e2dd436abf31a4d8a8fc3d89656231ed808e231d2", size = 13040891 }, - { url = "https://files.pythonhosted.org/packages/c6/15/125a7f76eb295cb34d19c6778e3a82ace33730ad4e6f28d3427e134a02e0/ruff-0.13.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c75e9d2a2fafd1fdd895d0e7e24b44355984affdde1c412a6f6d3f6e16b22d46", size = 12243588 }, - { url = "https://files.pythonhosted.org/packages/9e/eb/0093ae04a70f81f8be7fd7ed6456e926b65d238fc122311293d033fdf91e/ruff-0.13.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cceac74e7bbc53ed7d15d1042ffe7b6577bf294611ad90393bf9b2a0f0ec7cb6", size = 12491359 }, - { url = "https://files.pythonhosted.org/packages/43/fe/72b525948a6956f07dad4a6f122336b6a05f2e3fd27471cea612349fedb9/ruff-0.13.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6ae3f469b5465ba6d9721383ae9d49310c19b452a161b57507764d7ef15f4b07", size = 12162486 }, - { url = "https://files.pythonhosted.org/packages/6a/e3/0fac422bbbfb2ea838023e0d9fcf1f30183d83ab2482800e2cb892d02dfe/ruff-0.13.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f8f9e3cd6714358238cd6626b9d43026ed19c0c018376ac1ef3c3a04ffb42d8", size = 13871203 }, - { url = "https://files.pythonhosted.org/packages/6b/82/b721c8e3ec5df6d83ba0e45dcf00892c4f98b325256c42c38ef136496cbf/ruff-0.13.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c6ed79584a8f6cbe2e5d7dbacf7cc1ee29cbdb5df1172e77fbdadc8bb85a1f89", size = 14929635 }, - { url = "https://files.pythonhosted.org/packages/c4/a0/ad56faf6daa507b83079a1ad7a11694b87d61e6bf01c66bd82b466f21821/ruff-0.13.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aed130b2fde049cea2019f55deb939103123cdd191105f97a0599a3e753d61b0", size = 14338783 }, - { url = "https://files.pythonhosted.org/packages/47/77/ad1d9156db8f99cd01ee7e29d74b34050e8075a8438e589121fcd25c4b08/ruff-0.13.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1887c230c2c9d65ed1b4e4cfe4d255577ea28b718ae226c348ae68df958191aa", size = 13355322 }, - { url = "https://files.pythonhosted.org/packages/64/8b/e87cfca2be6f8b9f41f0bb12dc48c6455e2d66df46fe61bb441a226f1089/ruff-0.13.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bcb10276b69b3cfea3a102ca119ffe5c6ba3901e20e60cf9efb53fa417633c3", size = 13354427 }, - { url = "https://files.pythonhosted.org/packages/7f/df/bf382f3fbead082a575edb860897287f42b1b3c694bafa16bc9904c11ed3/ruff-0.13.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:afa721017aa55a555b2ff7944816587f1cb813c2c0a882d158f59b832da1660d", size = 13537637 }, - { url = "https://files.pythonhosted.org/packages/51/70/1fb7a7c8a6fc8bd15636288a46e209e81913b87988f26e1913d0851e54f4/ruff-0.13.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1dbc875cf3720c64b3990fef8939334e74cb0ca65b8dbc61d1f439201a38101b", size = 12340025 }, - { url = "https://files.pythonhosted.org/packages/4c/27/1e5b3f1c23ca5dd4106d9d580e5c13d9acb70288bff614b3d7b638378cc9/ruff-0.13.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5b939a1b2a960e9742e9a347e5bbc9b3c3d2c716f86c6ae273d9cbd64f193f22", size = 12133449 }, - { url = "https://files.pythonhosted.org/packages/2d/09/b92a5ccee289f11ab128df57d5911224197d8d55ef3bd2043534ff72ca54/ruff-0.13.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:50e2d52acb8de3804fc5f6e2fa3ae9bdc6812410a9e46837e673ad1f90a18736", size = 13051369 }, - { url = "https://files.pythonhosted.org/packages/89/99/26c9d1c7d8150f45e346dc045cc49f23e961efceb4a70c47dea0960dea9a/ruff-0.13.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3196bc13ab2110c176b9a4ae5ff7ab676faaa1964b330a1383ba20e1e19645f2", size = 13523644 }, - { url = "https://files.pythonhosted.org/packages/f7/00/e7f1501e81e8ec290e79527827af1d88f541d8d26151751b46108978dade/ruff-0.13.2-py3-none-win32.whl", hash = "sha256:7c2a0b7c1e87795fec3404a485096bcd790216c7c146a922d121d8b9c8f1aaac", size = 12245990 }, - { url = "https://files.pythonhosted.org/packages/ee/bd/d9f33a73de84fafd0146c6fba4f497c4565fe8fa8b46874b8e438869abc2/ruff-0.13.2-py3-none-win_amd64.whl", hash = "sha256:17d95fb32218357c89355f6f6f9a804133e404fc1f65694372e02a557edf8585", size = 13324004 }, - { url = "https://files.pythonhosted.org/packages/c3/12/28fa2f597a605884deb0f65c1b1ae05111051b2a7030f5d8a4ff7f4599ba/ruff-0.13.2-py3-none-win_arm64.whl", hash = "sha256:da711b14c530412c827219312b7d7fbb4877fb31150083add7e8c5336549cea7", size = 12484437 }, +sdist = { url = "https://files.pythonhosted.org/packages/02/df/8d7d8c515d33adfc540e2edf6c6021ea1c5a58a678d8cfce9fae59aabcab/ruff-0.13.2.tar.gz", hash = "sha256:cb12fffd32fb16d32cef4ed16d8c7cdc27ed7c944eaa98d99d01ab7ab0b710ff", size = 5416417, upload-time = "2025-09-25T14:54:09.936Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/84/5716a7fa4758e41bf70e603e13637c42cfb9dbf7ceb07180211b9bbf75ef/ruff-0.13.2-py3-none-linux_armv6l.whl", hash = "sha256:3796345842b55f033a78285e4f1641078f902020d8450cade03aad01bffd81c3", size = 12343254, upload-time = "2025-09-25T14:53:27.784Z" }, + { url = "https://files.pythonhosted.org/packages/9b/77/c7042582401bb9ac8eff25360e9335e901d7a1c0749a2b28ba4ecb239991/ruff-0.13.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ff7e4dda12e683e9709ac89e2dd436abf31a4d8a8fc3d89656231ed808e231d2", size = 13040891, upload-time = "2025-09-25T14:53:31.38Z" }, + { url = "https://files.pythonhosted.org/packages/c6/15/125a7f76eb295cb34d19c6778e3a82ace33730ad4e6f28d3427e134a02e0/ruff-0.13.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c75e9d2a2fafd1fdd895d0e7e24b44355984affdde1c412a6f6d3f6e16b22d46", size = 12243588, upload-time = "2025-09-25T14:53:33.543Z" }, + { url = "https://files.pythonhosted.org/packages/9e/eb/0093ae04a70f81f8be7fd7ed6456e926b65d238fc122311293d033fdf91e/ruff-0.13.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cceac74e7bbc53ed7d15d1042ffe7b6577bf294611ad90393bf9b2a0f0ec7cb6", size = 12491359, upload-time = "2025-09-25T14:53:35.892Z" }, + { url = "https://files.pythonhosted.org/packages/43/fe/72b525948a6956f07dad4a6f122336b6a05f2e3fd27471cea612349fedb9/ruff-0.13.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6ae3f469b5465ba6d9721383ae9d49310c19b452a161b57507764d7ef15f4b07", size = 12162486, upload-time = "2025-09-25T14:53:38.171Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e3/0fac422bbbfb2ea838023e0d9fcf1f30183d83ab2482800e2cb892d02dfe/ruff-0.13.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f8f9e3cd6714358238cd6626b9d43026ed19c0c018376ac1ef3c3a04ffb42d8", size = 13871203, upload-time = "2025-09-25T14:53:41.943Z" }, + { url = "https://files.pythonhosted.org/packages/6b/82/b721c8e3ec5df6d83ba0e45dcf00892c4f98b325256c42c38ef136496cbf/ruff-0.13.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c6ed79584a8f6cbe2e5d7dbacf7cc1ee29cbdb5df1172e77fbdadc8bb85a1f89", size = 14929635, upload-time = "2025-09-25T14:53:43.953Z" }, + { url = "https://files.pythonhosted.org/packages/c4/a0/ad56faf6daa507b83079a1ad7a11694b87d61e6bf01c66bd82b466f21821/ruff-0.13.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aed130b2fde049cea2019f55deb939103123cdd191105f97a0599a3e753d61b0", size = 14338783, upload-time = "2025-09-25T14:53:46.205Z" }, + { url = "https://files.pythonhosted.org/packages/47/77/ad1d9156db8f99cd01ee7e29d74b34050e8075a8438e589121fcd25c4b08/ruff-0.13.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1887c230c2c9d65ed1b4e4cfe4d255577ea28b718ae226c348ae68df958191aa", size = 13355322, upload-time = "2025-09-25T14:53:48.164Z" }, + { url = "https://files.pythonhosted.org/packages/64/8b/e87cfca2be6f8b9f41f0bb12dc48c6455e2d66df46fe61bb441a226f1089/ruff-0.13.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bcb10276b69b3cfea3a102ca119ffe5c6ba3901e20e60cf9efb53fa417633c3", size = 13354427, upload-time = "2025-09-25T14:53:50.486Z" }, + { url = "https://files.pythonhosted.org/packages/7f/df/bf382f3fbead082a575edb860897287f42b1b3c694bafa16bc9904c11ed3/ruff-0.13.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:afa721017aa55a555b2ff7944816587f1cb813c2c0a882d158f59b832da1660d", size = 13537637, upload-time = "2025-09-25T14:53:52.887Z" }, + { url = "https://files.pythonhosted.org/packages/51/70/1fb7a7c8a6fc8bd15636288a46e209e81913b87988f26e1913d0851e54f4/ruff-0.13.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1dbc875cf3720c64b3990fef8939334e74cb0ca65b8dbc61d1f439201a38101b", size = 12340025, upload-time = "2025-09-25T14:53:54.88Z" }, + { url = "https://files.pythonhosted.org/packages/4c/27/1e5b3f1c23ca5dd4106d9d580e5c13d9acb70288bff614b3d7b638378cc9/ruff-0.13.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5b939a1b2a960e9742e9a347e5bbc9b3c3d2c716f86c6ae273d9cbd64f193f22", size = 12133449, upload-time = "2025-09-25T14:53:57.089Z" }, + { url = "https://files.pythonhosted.org/packages/2d/09/b92a5ccee289f11ab128df57d5911224197d8d55ef3bd2043534ff72ca54/ruff-0.13.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:50e2d52acb8de3804fc5f6e2fa3ae9bdc6812410a9e46837e673ad1f90a18736", size = 13051369, upload-time = "2025-09-25T14:53:59.124Z" }, + { url = "https://files.pythonhosted.org/packages/89/99/26c9d1c7d8150f45e346dc045cc49f23e961efceb4a70c47dea0960dea9a/ruff-0.13.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3196bc13ab2110c176b9a4ae5ff7ab676faaa1964b330a1383ba20e1e19645f2", size = 13523644, upload-time = "2025-09-25T14:54:01.622Z" }, + { url = "https://files.pythonhosted.org/packages/f7/00/e7f1501e81e8ec290e79527827af1d88f541d8d26151751b46108978dade/ruff-0.13.2-py3-none-win32.whl", hash = "sha256:7c2a0b7c1e87795fec3404a485096bcd790216c7c146a922d121d8b9c8f1aaac", size = 12245990, upload-time = "2025-09-25T14:54:03.647Z" }, + { url = "https://files.pythonhosted.org/packages/ee/bd/d9f33a73de84fafd0146c6fba4f497c4565fe8fa8b46874b8e438869abc2/ruff-0.13.2-py3-none-win_amd64.whl", hash = "sha256:17d95fb32218357c89355f6f6f9a804133e404fc1f65694372e02a557edf8585", size = 13324004, upload-time = "2025-09-25T14:54:06.05Z" }, + { url = "https://files.pythonhosted.org/packages/c3/12/28fa2f597a605884deb0f65c1b1ae05111051b2a7030f5d8a4ff7f4599ba/ruff-0.13.2-py3-none-win_arm64.whl", hash = "sha256:da711b14c530412c827219312b7d7fbb4877fb31150083add7e8c5336549cea7", size = 12484437, upload-time = "2025-09-25T14:54:08.022Z" }, ] [[package]] @@ -3008,9 +3023,9 @@ dependencies = [ { name = "aiohttp" }, { name = "fsspec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/75/65/4b4c868cff76c036d11dc75dd91e5696dbf16ce626514166f35d5f4a930f/s3fs-2024.10.0.tar.gz", hash = "sha256:58b8c3650f8b99dbedf361543da3533aac8707035a104db5d80b094617ad4a3f", size = 75916 } +sdist = { url = "https://files.pythonhosted.org/packages/75/65/4b4c868cff76c036d11dc75dd91e5696dbf16ce626514166f35d5f4a930f/s3fs-2024.10.0.tar.gz", hash = "sha256:58b8c3650f8b99dbedf361543da3533aac8707035a104db5d80b094617ad4a3f", size = 75916, upload-time = "2024-10-21T01:45:49.967Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/44/bb9ff095ae7b1b6908480f683b6ca6b71c2105d343a5e5cb25334b01f5fa/s3fs-2024.10.0-py3-none-any.whl", hash = "sha256:7a2025d60d5b1a6025726b3a5e292a8e5aa713abc3b16fd1f81735181f7bb282", size = 29855 }, + { url = "https://files.pythonhosted.org/packages/99/44/bb9ff095ae7b1b6908480f683b6ca6b71c2105d343a5e5cb25334b01f5fa/s3fs-2024.10.0-py3-none-any.whl", hash = "sha256:7a2025d60d5b1a6025726b3a5e292a8e5aa713abc3b16fd1f81735181f7bb282", size = 29855, upload-time = "2024-10-21T01:45:47.905Z" }, ] [package.optional-dependencies] @@ -3025,27 +3040,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c0/0a/1cdbabf9edd0ea7747efdf6c9ab4e7061b085aa7f9bfc36bb1601563b069/s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7", size = 145287 } +sdist = { url = "https://files.pythonhosted.org/packages/c0/0a/1cdbabf9edd0ea7747efdf6c9ab4e7061b085aa7f9bfc36bb1601563b069/s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7", size = 145287, upload-time = "2024-11-20T21:06:05.981Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/66/05/7957af15543b8c9799209506df4660cba7afc4cf94bfb60513827e96bed6/s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e", size = 83175 }, + { url = "https://files.pythonhosted.org/packages/66/05/7957af15543b8c9799209506df4660cba7afc4cf94bfb60513827e96bed6/s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e", size = 83175, upload-time = "2024-11-20T21:06:03.961Z" }, ] [[package]] name = "setuptools" version = "80.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958 } +sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486 }, + { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" }, ] [[package]] name = "shellingham" version = "1.5.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] [[package]] @@ -3055,33 +3070,33 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wsproto" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300 } +sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300, upload-time = "2024-10-10T22:39:31.412Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 }, + { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842, upload-time = "2024-10-10T22:39:29.645Z" }, ] [[package]] name = "singleton-decorator" version = "1.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/33/98/a8b5c919bee1152a9a1afd82014431f8db5882699754de50d1b3aba4d136/singleton-decorator-1.0.0.tar.gz", hash = "sha256:1a90ad8a8a738be591c9c167fdd677c5d4a43d1bc6b1c128227be1c5e03bee07", size = 2791 } +sdist = { url = "https://files.pythonhosted.org/packages/33/98/a8b5c919bee1152a9a1afd82014431f8db5882699754de50d1b3aba4d136/singleton-decorator-1.0.0.tar.gz", hash = "sha256:1a90ad8a8a738be591c9c167fdd677c5d4a43d1bc6b1c128227be1c5e03bee07", size = 2791, upload-time = "2017-08-10T19:52:45.903Z" } [[package]] name = "six" version = "1.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] [[package]] name = "sniffio" version = "1.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] [[package]] @@ -3108,13 +3123,13 @@ dependencies = [ { name = "tomlkit" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/13/bf/7c765991c79d40bde324961ec75b67ba6c00c2491ec894e89c199de5bd20/snowflake_connector_python-3.14.1.tar.gz", hash = "sha256:5ff7a9f1582d1583f86e1c181d29b3ee56e7e6163d14209fc8bf34ae2e234986", size = 772678 } +sdist = { url = "https://files.pythonhosted.org/packages/13/bf/7c765991c79d40bde324961ec75b67ba6c00c2491ec894e89c199de5bd20/snowflake_connector_python-3.14.1.tar.gz", hash = "sha256:5ff7a9f1582d1583f86e1c181d29b3ee56e7e6163d14209fc8bf34ae2e234986", size = 772678, upload-time = "2025-04-21T22:06:11.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/13/126b2c1825f4149d2b9ae39c6a4700224158490725845fbde1d397c59c1c/snowflake_connector_python-3.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f0505a9ff3b00bb476078b8f642da6fea303022b997331c522eb02b80ad129f", size = 987827 }, - { url = "https://files.pythonhosted.org/packages/0b/22/bababb1c5b7a98b604d2b0899274cf6803427409fe62d609e58b1a9ef741/snowflake_connector_python-3.14.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:bf3ca3c1403f4d8cad92f597571c334015fc7be2a917b4e6eb75a66517404577", size = 999123 }, - { url = "https://files.pythonhosted.org/packages/48/83/aae4f574024c81be41e8b3afe65403ab1e4581044d90e8942b2cc02dee19/snowflake_connector_python-3.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cc908f9fd4abf354fad7c5b69557a1da229196f7554126e7aef0441db56c75f", size = 2601753 }, - { url = "https://files.pythonhosted.org/packages/d6/63/e000afe88b217413a84bef6123d49a5931e9073ba2efb215b29dffdd5692/snowflake_connector_python-3.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dab3a11cee219073e5a5bda2aa2d6b2b4fc1a7267d934466e20ea6cfa4db6b", size = 2625995 }, - { url = "https://files.pythonhosted.org/packages/30/9f/aefda344599d45ee5bafdf9bcc2279dbc7e21c2cfcc0fb574a41840613a9/snowflake_connector_python-3.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:079bd59de5702fdce9a3d0fc67061f7fbb959599d5887ebaf9f0828c172f47de", size = 946534 }, + { url = "https://files.pythonhosted.org/packages/48/13/126b2c1825f4149d2b9ae39c6a4700224158490725845fbde1d397c59c1c/snowflake_connector_python-3.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f0505a9ff3b00bb476078b8f642da6fea303022b997331c522eb02b80ad129f", size = 987827, upload-time = "2025-04-21T22:06:20.927Z" }, + { url = "https://files.pythonhosted.org/packages/0b/22/bababb1c5b7a98b604d2b0899274cf6803427409fe62d609e58b1a9ef741/snowflake_connector_python-3.14.1-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:bf3ca3c1403f4d8cad92f597571c334015fc7be2a917b4e6eb75a66517404577", size = 999123, upload-time = "2025-04-21T22:06:22.319Z" }, + { url = "https://files.pythonhosted.org/packages/48/83/aae4f574024c81be41e8b3afe65403ab1e4581044d90e8942b2cc02dee19/snowflake_connector_python-3.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cc908f9fd4abf354fad7c5b69557a1da229196f7554126e7aef0441db56c75f", size = 2601753, upload-time = "2025-04-21T22:05:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/d6/63/e000afe88b217413a84bef6123d49a5931e9073ba2efb215b29dffdd5692/snowflake_connector_python-3.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dab3a11cee219073e5a5bda2aa2d6b2b4fc1a7267d934466e20ea6cfa4db6b", size = 2625995, upload-time = "2025-04-21T22:06:01.624Z" }, + { url = "https://files.pythonhosted.org/packages/30/9f/aefda344599d45ee5bafdf9bcc2279dbc7e21c2cfcc0fb574a41840613a9/snowflake_connector_python-3.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:079bd59de5702fdce9a3d0fc67061f7fbb959599d5887ebaf9f0828c172f47de", size = 946534, upload-time = "2025-04-21T22:06:33.601Z" }, ] [package.optional-dependencies] @@ -3131,9 +3146,9 @@ dependencies = [ { name = "django" }, { name = "social-auth-core" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/f3/be7a7551463a6e7ddf9a4674662ae0fdea54aa4f4c82562d151cf1e41ced/social-auth-app-django-5.3.0.tar.gz", hash = "sha256:8719d57d01d80dcc9629a46e6806889aa9714fe4b658d2ebe3c120450591031d", size = 24519 } +sdist = { url = "https://files.pythonhosted.org/packages/ac/f3/be7a7551463a6e7ddf9a4674662ae0fdea54aa4f4c82562d151cf1e41ced/social-auth-app-django-5.3.0.tar.gz", hash = "sha256:8719d57d01d80dcc9629a46e6806889aa9714fe4b658d2ebe3c120450591031d", size = 24519, upload-time = "2023-09-01T11:30:31.772Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/65/747ad30653d67c9e65c3028b435a224f0fd9e81cf0bbeca2c889bbdf93ae/social_auth_app_django-5.3.0-py3-none-any.whl", hash = "sha256:2e71234656ddebe0c5b5ad450d42ee49f52a3f2d1708687fccf2a2c92d31a624", size = 26373 }, + { url = "https://files.pythonhosted.org/packages/19/65/747ad30653d67c9e65c3028b435a224f0fd9e81cf0bbeca2c889bbdf93ae/social_auth_app_django-5.3.0-py3-none-any.whl", hash = "sha256:2e71234656ddebe0c5b5ad450d42ee49f52a3f2d1708687fccf2a2c92d31a624", size = 26373, upload-time = "2023-09-01T11:30:30.18Z" }, ] [[package]] @@ -3149,27 +3164,27 @@ dependencies = [ { name = "requests" }, { name = "requests-oauthlib" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/87/c0/466383c22767604c573f15aff3ea2c37aacf3c10281f31199c02ac0017ef/social_auth_core-4.7.0.tar.gz", hash = "sha256:2bba127c7b7166a81085ddb0c248d93751b3bc3cdab8569f62d9f70c6bc4ed40", size = 230894 } +sdist = { url = "https://files.pythonhosted.org/packages/87/c0/466383c22767604c573f15aff3ea2c37aacf3c10281f31199c02ac0017ef/social_auth_core-4.7.0.tar.gz", hash = "sha256:2bba127c7b7166a81085ddb0c248d93751b3bc3cdab8569f62d9f70c6bc4ed40", size = 230894, upload-time = "2025-06-27T06:34:27.15Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/3e/1b1ed868b840ecf5e7b02fc8ab20718ac24e184b90057815fee2ebbc107d/social_auth_core-4.7.0-py3-none-any.whl", hash = "sha256:9eef9b49c332d1a3265b37dcc698a7ace97c3fc59df2d874b51576d11d31f6a6", size = 427867 }, + { url = "https://files.pythonhosted.org/packages/e3/3e/1b1ed868b840ecf5e7b02fc8ab20718ac24e184b90057815fee2ebbc107d/social_auth_core-4.7.0-py3-none-any.whl", hash = "sha256:9eef9b49c332d1a3265b37dcc698a7ace97c3fc59df2d874b51576d11d31f6a6", size = 427867, upload-time = "2025-06-27T06:34:25.715Z" }, ] [[package]] name = "sortedcontainers" version = "2.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 }, + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, ] [[package]] name = "soupsieve" version = "2.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 } +sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 }, + { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, ] [[package]] @@ -3180,17 +3195,17 @@ dependencies = [ { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/bc/d59b5d97d27229b0e009bd9098cd81af71c2fa5549c580a0a67b9bed0496/sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417", size = 9762949 } +sdist = { url = "https://files.pythonhosted.org/packages/d7/bc/d59b5d97d27229b0e009bd9098cd81af71c2fa5549c580a0a67b9bed0496/sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417", size = 9762949, upload-time = "2025-08-11T14:24:58.438Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/db/20c78f1081446095450bdc6ee6cc10045fce67a8e003a5876b6eaafc5cc4/sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24", size = 2134891 }, - { url = "https://files.pythonhosted.org/packages/45/0a/3d89034ae62b200b4396f0f95319f7d86e9945ee64d2343dcad857150fa2/sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83", size = 2123061 }, - { url = "https://files.pythonhosted.org/packages/cb/10/2711f7ff1805919221ad5bee205971254845c069ee2e7036847103ca1e4c/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9", size = 3320384 }, - { url = "https://files.pythonhosted.org/packages/6e/0e/3d155e264d2ed2778484006ef04647bc63f55b3e2d12e6a4f787747b5900/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48", size = 3329648 }, - { url = "https://files.pythonhosted.org/packages/5b/81/635100fb19725c931622c673900da5efb1595c96ff5b441e07e3dd61f2be/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687", size = 3258030 }, - { url = "https://files.pythonhosted.org/packages/0c/ed/a99302716d62b4965fded12520c1cbb189f99b17a6d8cf77611d21442e47/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe", size = 3294469 }, - { url = "https://files.pythonhosted.org/packages/5d/a2/3a11b06715149bf3310b55a98b5c1e84a42cfb949a7b800bc75cb4e33abc/sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d", size = 2098906 }, - { url = "https://files.pythonhosted.org/packages/bc/09/405c915a974814b90aa591280623adc6ad6b322f61fd5cff80aeaef216c9/sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a", size = 2126260 }, - { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759 }, + { url = "https://files.pythonhosted.org/packages/61/db/20c78f1081446095450bdc6ee6cc10045fce67a8e003a5876b6eaafc5cc4/sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24", size = 2134891, upload-time = "2025-08-11T15:51:13.019Z" }, + { url = "https://files.pythonhosted.org/packages/45/0a/3d89034ae62b200b4396f0f95319f7d86e9945ee64d2343dcad857150fa2/sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83", size = 2123061, upload-time = "2025-08-11T15:51:14.319Z" }, + { url = "https://files.pythonhosted.org/packages/cb/10/2711f7ff1805919221ad5bee205971254845c069ee2e7036847103ca1e4c/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9", size = 3320384, upload-time = "2025-08-11T15:52:35.088Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0e/3d155e264d2ed2778484006ef04647bc63f55b3e2d12e6a4f787747b5900/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48", size = 3329648, upload-time = "2025-08-11T15:56:34.153Z" }, + { url = "https://files.pythonhosted.org/packages/5b/81/635100fb19725c931622c673900da5efb1595c96ff5b441e07e3dd61f2be/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687", size = 3258030, upload-time = "2025-08-11T15:52:36.933Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ed/a99302716d62b4965fded12520c1cbb189f99b17a6d8cf77611d21442e47/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe", size = 3294469, upload-time = "2025-08-11T15:56:35.553Z" }, + { url = "https://files.pythonhosted.org/packages/5d/a2/3a11b06715149bf3310b55a98b5c1e84a42cfb949a7b800bc75cb4e33abc/sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d", size = 2098906, upload-time = "2025-08-11T15:55:00.645Z" }, + { url = "https://files.pythonhosted.org/packages/bc/09/405c915a974814b90aa591280623adc6ad6b322f61fd5cff80aeaef216c9/sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a", size = 2126260, upload-time = "2025-08-11T15:55:02.965Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759, upload-time = "2025-08-11T15:39:53.024Z" }, ] [package.optional-dependencies] @@ -3202,9 +3217,9 @@ asyncio = [ name = "sqlparse" version = "0.5.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/40/edede8dd6977b0d3da179a342c198ed100dd2aba4be081861ee5911e4da4/sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272", size = 84999 } +sdist = { url = "https://files.pythonhosted.org/packages/e5/40/edede8dd6977b0d3da179a342c198ed100dd2aba4be081861ee5911e4da4/sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272", size = 84999, upload-time = "2024-12-10T12:05:30.728Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415 }, + { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload-time = "2024-12-10T12:05:27.824Z" }, ] [[package]] @@ -3215,27 +3230,27 @@ dependencies = [ { name = "ply" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/99/6f/ef25bbc1aefeb9c905d527f1d3cd3f41f22f40566d33001b8bb14ae0cdaf/stone-3.3.1.tar.gz", hash = "sha256:4ef0397512f609757975f7ec09b35639d72ba7e3e17ce4ddf399578346b4cb50", size = 190888 } +sdist = { url = "https://files.pythonhosted.org/packages/99/6f/ef25bbc1aefeb9c905d527f1d3cd3f41f22f40566d33001b8bb14ae0cdaf/stone-3.3.1.tar.gz", hash = "sha256:4ef0397512f609757975f7ec09b35639d72ba7e3e17ce4ddf399578346b4cb50", size = 190888, upload-time = "2022-01-25T21:32:16.729Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/92/d0c83f63d3518e5f0b8a311937c31347349ec9a47b209ddc17f7566f58fc/stone-3.3.1-py3-none-any.whl", hash = "sha256:e15866fad249c11a963cce3bdbed37758f2e88c8ff4898616bc0caeb1e216047", size = 162257 }, + { url = "https://files.pythonhosted.org/packages/5c/92/d0c83f63d3518e5f0b8a311937c31347349ec9a47b209ddc17f7566f58fc/stone-3.3.1-py3-none-any.whl", hash = "sha256:e15866fad249c11a963cce3bdbed37758f2e88c8ff4898616bc0caeb1e216047", size = 162257, upload-time = "2022-01-25T21:32:15.155Z" }, ] [[package]] name = "striprtf" version = "0.0.26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/20/3d419008265346452d09e5dadfd5d045b64b40d8fc31af40588e6c76997a/striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa", size = 6258 } +sdist = { url = "https://files.pythonhosted.org/packages/25/20/3d419008265346452d09e5dadfd5d045b64b40d8fc31af40588e6c76997a/striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa", size = 6258, upload-time = "2023-07-20T14:30:36.29Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/cf/0fea4f4ba3fc2772ac2419278aa9f6964124d4302117d61bc055758e000c/striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb", size = 6914 }, + { url = "https://files.pythonhosted.org/packages/a3/cf/0fea4f4ba3fc2772ac2419278aa9f6964124d4302117d61bc055758e000c/striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb", size = 6914, upload-time = "2023-07-20T14:30:35.338Z" }, ] [[package]] name = "tenacity" version = "9.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036 } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248 }, + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, ] [[package]] @@ -3246,14 +3261,14 @@ dependencies = [ { name = "regex" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991 } +sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991, upload-time = "2025-02-14T06:03:01.003Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073 }, - { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075 }, - { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754 }, - { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678 }, - { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283 }, - { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897 }, + { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073, upload-time = "2025-02-14T06:02:24.768Z" }, + { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075, upload-time = "2025-02-14T06:02:26.92Z" }, + { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754, upload-time = "2025-02-14T06:02:28.124Z" }, + { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678, upload-time = "2025-02-14T06:02:29.845Z" }, + { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283, upload-time = "2025-02-14T06:02:33.838Z" }, + { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897, upload-time = "2025-02-14T06:02:36.265Z" }, ] [[package]] @@ -3263,29 +3278,29 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c0/44/625db94e91c6196b6574359fa70bfe28e8eabf57a1b894f8f0ec69727fd1/tokenizers-0.15.2.tar.gz", hash = "sha256:e6e9c6e019dd5484be5beafc775ae6c925f4c69a3487040ed09b45e13df2cb91", size = 320256 } +sdist = { url = "https://files.pythonhosted.org/packages/c0/44/625db94e91c6196b6574359fa70bfe28e8eabf57a1b894f8f0ec69727fd1/tokenizers-0.15.2.tar.gz", hash = "sha256:e6e9c6e019dd5484be5beafc775ae6c925f4c69a3487040ed09b45e13df2cb91", size = 320256, upload-time = "2024-02-12T02:28:50.62Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/ca/ea4b5aa70d4d26f2d05620c265b07b5a249157767c1673f5753b8bfc7db1/tokenizers-0.15.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f86593c18d2e6248e72fb91c77d413a815153b8ea4e31f7cd443bdf28e467670", size = 2574444 }, - { url = "https://files.pythonhosted.org/packages/f9/99/5a55a9b6e2db274c0969ad57d989d02efae90f9e558983a561c9b2b7ea1a/tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0774bccc6608eca23eb9d620196687c8b2360624619623cf4ba9dc9bd53e8b51", size = 2411608 }, - { url = "https://files.pythonhosted.org/packages/82/cc/29bb3a25c06b90ce82bb20ef074011481de5c44413a1e1eb10cfd93080fb/tokenizers-0.15.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d0222c5b7c9b26c0b4822a82f6a7011de0a9d3060e1da176f66274b70f846b98", size = 3652367 }, - { url = "https://files.pythonhosted.org/packages/c0/ae/f6a974be9b2e1615f3de3cc9e4fc2897a86357400801c58143c67cbbad2e/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3835738be1de66624fff2f4f6f6684775da4e9c00bde053be7564cbf3545cc66", size = 3529509 }, - { url = "https://files.pythonhosted.org/packages/d6/42/340b91f675b494c4ecc0a256c5dd88b4003dbfde05afff90b970738fdfb4/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0143e7d9dcd811855c1ce1ab9bf5d96d29bf5e528fd6c7824d0465741e8c10fd", size = 3396516 }, - { url = "https://files.pythonhosted.org/packages/6f/b2/8a965abc17fff309eb06e98ce429a19a5e04f731a669a6113b9e182f8a79/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db35825f6d54215f6b6009a7ff3eedee0848c99a6271c870d2826fbbedf31a38", size = 3918811 }, - { url = "https://files.pythonhosted.org/packages/6c/16/dad7b4aa6e34a395aef7ae7b010d8b5ebefdf3df81510de53d7f17d2f0fc/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f5e64b0389a2be47091d8cc53c87859783b837ea1a06edd9d8e04004df55a5c", size = 4025494 }, - { url = "https://files.pythonhosted.org/packages/f6/de/3707df0c1d7bf55e6a4dba724700353bfee8e292fdd8ccfe93416549124d/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e0480c452217edd35eca56fafe2029fb4d368b7c0475f8dfa3c5c9c400a7456", size = 3575314 }, - { url = "https://files.pythonhosted.org/packages/2e/dd/7b8da304d152bb46f13bc2ba5bd545480ab6ce39d94a53eef07f7624d235/tokenizers-0.15.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a33ab881c8fe70474980577e033d0bc9a27b7ab8272896e500708b212995d834", size = 9682779 }, - { url = "https://files.pythonhosted.org/packages/07/aa/66e8a81e07a791ca6ee9d74ee6de1ffbcd3985149f13aeb530bd409baba0/tokenizers-0.15.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a308a607ca9de2c64c1b9ba79ec9a403969715a1b8ba5f998a676826f1a7039d", size = 9995614 }, - { url = "https://files.pythonhosted.org/packages/bf/e1/aed3bc98785c54bd26bf6dd3d2f54cc00de33e8b1f922a23131372eedec8/tokenizers-0.15.2-cp312-none-win32.whl", hash = "sha256:b8fcfa81bcb9447df582c5bc96a031e6df4da2a774b8080d4f02c0c16b42be0b", size = 2011030 }, - { url = "https://files.pythonhosted.org/packages/c9/ea/5800f4941a713b2feed955b6a256aacc1ca68a6699916d2668622c075d38/tokenizers-0.15.2-cp312-none-win_amd64.whl", hash = "sha256:38d7ab43c6825abfc0b661d95f39c7f8af2449364f01d331f3b51c94dcff7221", size = 2180523 }, + { url = "https://files.pythonhosted.org/packages/ae/ca/ea4b5aa70d4d26f2d05620c265b07b5a249157767c1673f5753b8bfc7db1/tokenizers-0.15.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f86593c18d2e6248e72fb91c77d413a815153b8ea4e31f7cd443bdf28e467670", size = 2574444, upload-time = "2024-02-12T02:25:27.417Z" }, + { url = "https://files.pythonhosted.org/packages/f9/99/5a55a9b6e2db274c0969ad57d989d02efae90f9e558983a561c9b2b7ea1a/tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0774bccc6608eca23eb9d620196687c8b2360624619623cf4ba9dc9bd53e8b51", size = 2411608, upload-time = "2024-02-12T02:25:29.74Z" }, + { url = "https://files.pythonhosted.org/packages/82/cc/29bb3a25c06b90ce82bb20ef074011481de5c44413a1e1eb10cfd93080fb/tokenizers-0.15.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d0222c5b7c9b26c0b4822a82f6a7011de0a9d3060e1da176f66274b70f846b98", size = 3652367, upload-time = "2024-02-12T02:25:32.079Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ae/f6a974be9b2e1615f3de3cc9e4fc2897a86357400801c58143c67cbbad2e/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3835738be1de66624fff2f4f6f6684775da4e9c00bde053be7564cbf3545cc66", size = 3529509, upload-time = "2024-02-12T02:25:34.042Z" }, + { url = "https://files.pythonhosted.org/packages/d6/42/340b91f675b494c4ecc0a256c5dd88b4003dbfde05afff90b970738fdfb4/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0143e7d9dcd811855c1ce1ab9bf5d96d29bf5e528fd6c7824d0465741e8c10fd", size = 3396516, upload-time = "2024-02-12T02:25:35.884Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b2/8a965abc17fff309eb06e98ce429a19a5e04f731a669a6113b9e182f8a79/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db35825f6d54215f6b6009a7ff3eedee0848c99a6271c870d2826fbbedf31a38", size = 3918811, upload-time = "2024-02-12T02:25:37.85Z" }, + { url = "https://files.pythonhosted.org/packages/6c/16/dad7b4aa6e34a395aef7ae7b010d8b5ebefdf3df81510de53d7f17d2f0fc/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f5e64b0389a2be47091d8cc53c87859783b837ea1a06edd9d8e04004df55a5c", size = 4025494, upload-time = "2024-02-12T02:25:40.247Z" }, + { url = "https://files.pythonhosted.org/packages/f6/de/3707df0c1d7bf55e6a4dba724700353bfee8e292fdd8ccfe93416549124d/tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e0480c452217edd35eca56fafe2029fb4d368b7c0475f8dfa3c5c9c400a7456", size = 3575314, upload-time = "2024-02-12T02:25:42.212Z" }, + { url = "https://files.pythonhosted.org/packages/2e/dd/7b8da304d152bb46f13bc2ba5bd545480ab6ce39d94a53eef07f7624d235/tokenizers-0.15.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a33ab881c8fe70474980577e033d0bc9a27b7ab8272896e500708b212995d834", size = 9682779, upload-time = "2024-02-12T02:25:44.027Z" }, + { url = "https://files.pythonhosted.org/packages/07/aa/66e8a81e07a791ca6ee9d74ee6de1ffbcd3985149f13aeb530bd409baba0/tokenizers-0.15.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a308a607ca9de2c64c1b9ba79ec9a403969715a1b8ba5f998a676826f1a7039d", size = 9995614, upload-time = "2024-02-12T02:25:46.804Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e1/aed3bc98785c54bd26bf6dd3d2f54cc00de33e8b1f922a23131372eedec8/tokenizers-0.15.2-cp312-none-win32.whl", hash = "sha256:b8fcfa81bcb9447df582c5bc96a031e6df4da2a774b8080d4f02c0c16b42be0b", size = 2011030, upload-time = "2024-02-12T02:25:49.829Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ea/5800f4941a713b2feed955b6a256aacc1ca68a6699916d2668622c075d38/tokenizers-0.15.2-cp312-none-win_amd64.whl", hash = "sha256:38d7ab43c6825abfc0b661d95f39c7f8af2449364f01d331f3b51c94dcff7221", size = 2180523, upload-time = "2024-02-12T02:25:51.542Z" }, ] [[package]] name = "tomlkit" version = "0.13.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207 } +sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901 }, + { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" }, ] [[package]] @@ -3295,9 +3310,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] [[package]] @@ -3310,9 +3325,9 @@ dependencies = [ { name = "shellingham" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/ca/950278884e2ca20547ff3eb109478c6baf6b8cf219318e6bc4f666fad8e8/typer-0.19.2.tar.gz", hash = "sha256:9ad824308ded0ad06cc716434705f691d4ee0bfd0fb081839d2e426860e7fdca", size = 104755 } +sdist = { url = "https://files.pythonhosted.org/packages/21/ca/950278884e2ca20547ff3eb109478c6baf6b8cf219318e6bc4f666fad8e8/typer-0.19.2.tar.gz", hash = "sha256:9ad824308ded0ad06cc716434705f691d4ee0bfd0fb081839d2e426860e7fdca", size = 104755, upload-time = "2025-09-23T09:47:48.256Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/00/22/35617eee79080a5d071d0f14ad698d325ee6b3bf824fc0467c03b30e7fa8/typer-0.19.2-py3-none-any.whl", hash = "sha256:755e7e19670ffad8283db353267cb81ef252f595aa6834a0d1ca9312d9326cb9", size = 46748 }, + { url = "https://files.pythonhosted.org/packages/00/22/35617eee79080a5d071d0f14ad698d325ee6b3bf824fc0467c03b30e7fa8/typer-0.19.2-py3-none-any.whl", hash = "sha256:755e7e19670ffad8283db353267cb81ef252f595aa6834a0d1ca9312d9326cb9", size = 46748, upload-time = "2025-09-23T09:47:46.777Z" }, ] [[package]] @@ -3322,18 +3337,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-setuptools" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2a/98/ea454cea03e5f351323af6a482c65924f3c26c515efd9090dede58f2b4b6/types_cffi-1.17.0.20250915.tar.gz", hash = "sha256:4362e20368f78dabd5c56bca8004752cc890e07a71605d9e0d9e069dbaac8c06", size = 17229 } +sdist = { url = "https://files.pythonhosted.org/packages/2a/98/ea454cea03e5f351323af6a482c65924f3c26c515efd9090dede58f2b4b6/types_cffi-1.17.0.20250915.tar.gz", hash = "sha256:4362e20368f78dabd5c56bca8004752cc890e07a71605d9e0d9e069dbaac8c06", size = 17229, upload-time = "2025-09-15T03:01:25.31Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/ec/092f2b74b49ec4855cdb53050deb9699f7105b8fda6fe034c0781b8687f3/types_cffi-1.17.0.20250915-py3-none-any.whl", hash = "sha256:cef4af1116c83359c11bb4269283c50f0688e9fc1d7f0eeb390f3661546da52c", size = 20112 }, + { url = "https://files.pythonhosted.org/packages/aa/ec/092f2b74b49ec4855cdb53050deb9699f7105b8fda6fe034c0781b8687f3/types_cffi-1.17.0.20250915-py3-none-any.whl", hash = "sha256:cef4af1116c83359c11bb4269283c50f0688e9fc1d7f0eeb390f3661546da52c", size = 20112, upload-time = "2025-09-15T03:01:24.187Z" }, ] [[package]] name = "types-pymysql" version = "1.1.0.20250916" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/12/bda1d977c07e0e47502bede1c44a986dd45946494d89e005e04cdeb0f8de/types_pymysql-1.1.0.20250916.tar.gz", hash = "sha256:98d75731795fcc06723a192786662bdfa760e1e00f22809c104fbb47bac5e29b", size = 22131 } +sdist = { url = "https://files.pythonhosted.org/packages/1f/12/bda1d977c07e0e47502bede1c44a986dd45946494d89e005e04cdeb0f8de/types_pymysql-1.1.0.20250916.tar.gz", hash = "sha256:98d75731795fcc06723a192786662bdfa760e1e00f22809c104fbb47bac5e29b", size = 22131, upload-time = "2025-09-16T02:49:22.039Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/21/eb/a225e32a6e7b196af67ab2f1b07363595f63255374cc3b88bfdab53b4ee8/types_pymysql-1.1.0.20250916-py3-none-any.whl", hash = "sha256:873eb9836bb5e3de4368cc7010ca72775f86e9692a5c7810f8c7f48da082e55b", size = 23063 }, + { url = "https://files.pythonhosted.org/packages/21/eb/a225e32a6e7b196af67ab2f1b07363595f63255374cc3b88bfdab53b4ee8/types_pymysql-1.1.0.20250916-py3-none-any.whl", hash = "sha256:873eb9836bb5e3de4368cc7010ca72775f86e9692a5c7810f8c7f48da082e55b", size = 23063, upload-time = "2025-09-16T02:49:20.933Z" }, ] [[package]] @@ -3344,27 +3359,27 @@ dependencies = [ { name = "cryptography" }, { name = "types-cffi" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/38/011e2a9916e7afca2cc9c14fff1df42285d697ee0dd9903e4292cd1f5bf6/types-pyOpenSSL-24.0.0.20240417.tar.gz", hash = "sha256:38e75fb828d2717be173770bbae8c22811fdec68e2bc3f5833954113eb84237d", size = 8261 } +sdist = { url = "https://files.pythonhosted.org/packages/d8/38/011e2a9916e7afca2cc9c14fff1df42285d697ee0dd9903e4292cd1f5bf6/types-pyOpenSSL-24.0.0.20240417.tar.gz", hash = "sha256:38e75fb828d2717be173770bbae8c22811fdec68e2bc3f5833954113eb84237d", size = 8261, upload-time = "2024-04-17T02:17:34.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/39/4e6dee712d1a93f2f2d39b0f7ebe0ba6168dfe2f6b50efe4b258790b5346/types_pyOpenSSL-24.0.0.20240417-py3-none-any.whl", hash = "sha256:4ce41ddaf383815168b6e21d542fd92135f10a5e82adb3e593a6b79638b0b511", size = 7420 }, + { url = "https://files.pythonhosted.org/packages/9d/39/4e6dee712d1a93f2f2d39b0f7ebe0ba6168dfe2f6b50efe4b258790b5346/types_pyOpenSSL-24.0.0.20240417-py3-none-any.whl", hash = "sha256:4ce41ddaf383815168b6e21d542fd92135f10a5e82adb3e593a6b79638b0b511", size = 7420, upload-time = "2024-04-17T02:17:33.556Z" }, ] [[package]] name = "types-pytz" version = "2025.2.0.20250809" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/07/e2/c774f754de26848f53f05defff5bb21dd9375a059d1ba5b5ea943cf8206e/types_pytz-2025.2.0.20250809.tar.gz", hash = "sha256:222e32e6a29bb28871f8834e8785e3801f2dc4441c715cd2082b271eecbe21e5", size = 10876 } +sdist = { url = "https://files.pythonhosted.org/packages/07/e2/c774f754de26848f53f05defff5bb21dd9375a059d1ba5b5ea943cf8206e/types_pytz-2025.2.0.20250809.tar.gz", hash = "sha256:222e32e6a29bb28871f8834e8785e3801f2dc4441c715cd2082b271eecbe21e5", size = 10876, upload-time = "2025-08-09T03:14:17.453Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/d0/91c24fe54e565f2344d7a6821e6c6bb099841ef09007ea6321a0bac0f808/types_pytz-2025.2.0.20250809-py3-none-any.whl", hash = "sha256:4f55ed1b43e925cf851a756fe1707e0f5deeb1976e15bf844bcaa025e8fbd0db", size = 10095 }, + { url = "https://files.pythonhosted.org/packages/db/d0/91c24fe54e565f2344d7a6821e6c6bb099841ef09007ea6321a0bac0f808/types_pytz-2025.2.0.20250809-py3-none-any.whl", hash = "sha256:4f55ed1b43e925cf851a756fe1707e0f5deeb1976e15bf844bcaa025e8fbd0db", size = 10095, upload-time = "2025-08-09T03:14:16.674Z" }, ] [[package]] name = "types-pyyaml" version = "6.0.12.20250915" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522 } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338 }, + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, ] [[package]] @@ -3375,9 +3390,9 @@ dependencies = [ { name = "cryptography" }, { name = "types-pyopenssl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3a/95/c054d3ac940e8bac4ca216470c80c26688a0e79e09f520a942bb27da3386/types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e", size = 49679 } +sdist = { url = "https://files.pythonhosted.org/packages/3a/95/c054d3ac940e8bac4ca216470c80c26688a0e79e09f520a942bb27da3386/types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e", size = 49679, upload-time = "2024-10-04T02:43:59.224Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/82/7d25dce10aad92d2226b269bce2f85cfd843b4477cd50245d7d40ecf8f89/types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed", size = 58737 }, + { url = "https://files.pythonhosted.org/packages/55/82/7d25dce10aad92d2226b269bce2f85cfd843b4477cd50245d7d40ecf8f89/types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed", size = 58737, upload-time = "2024-10-04T02:43:57.968Z" }, ] [[package]] @@ -3387,18 +3402,18 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535 } +sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535, upload-time = "2023-09-27T06:19:38.443Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516 }, + { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516, upload-time = "2023-09-27T06:19:36.373Z" }, ] [[package]] name = "types-setuptools" version = "80.9.0.20250822" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/19/bd/1e5f949b7cb740c9f0feaac430e301b8f1c5f11a81e26324299ea671a237/types_setuptools-80.9.0.20250822.tar.gz", hash = "sha256:070ea7716968ec67a84c7f7768d9952ff24d28b65b6594797a464f1b3066f965", size = 41296 } +sdist = { url = "https://files.pythonhosted.org/packages/19/bd/1e5f949b7cb740c9f0feaac430e301b8f1c5f11a81e26324299ea671a237/types_setuptools-80.9.0.20250822.tar.gz", hash = "sha256:070ea7716968ec67a84c7f7768d9952ff24d28b65b6594797a464f1b3066f965", size = 41296, upload-time = "2025-08-22T03:02:08.771Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/2d/475bf15c1cdc172e7a0d665b6e373ebfb1e9bf734d3f2f543d668b07a142/types_setuptools-80.9.0.20250822-py3-none-any.whl", hash = "sha256:53bf881cb9d7e46ed12c76ef76c0aaf28cfe6211d3fab12e0b83620b1a8642c3", size = 63179 }, + { url = "https://files.pythonhosted.org/packages/b6/2d/475bf15c1cdc172e7a0d665b6e373ebfb1e9bf734d3f2f543d668b07a142/types_setuptools-80.9.0.20250822-py3-none-any.whl", hash = "sha256:53bf881cb9d7e46ed12c76ef76c0aaf28cfe6211d3fab12e0b83620b1a8642c3", size = 63179, upload-time = "2025-08-22T03:02:07.643Z" }, ] [[package]] @@ -3408,27 +3423,27 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-pytz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e7/cf/e4d446e57c0b14ed1da4de180d2a4cac773b667f183e83bdad76ea6e2238/types-tzlocal-5.1.0.1.tar.gz", hash = "sha256:b84a115c0c68f0d0fa9af1c57f0645eeef0e539147806faf1f95ac3ac01ce47b", size = 3549 } +sdist = { url = "https://files.pythonhosted.org/packages/e7/cf/e4d446e57c0b14ed1da4de180d2a4cac773b667f183e83bdad76ea6e2238/types-tzlocal-5.1.0.1.tar.gz", hash = "sha256:b84a115c0c68f0d0fa9af1c57f0645eeef0e539147806faf1f95ac3ac01ce47b", size = 3549, upload-time = "2023-10-24T02:15:07.127Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/13/caeb438290df069ddda6f055d0eb14337ada293c7d43ab89419ba4b1a778/types_tzlocal-5.1.0.1-py3-none-any.whl", hash = "sha256:0302e8067c86936de8f7e0aaedc2cfbf240080802c603df0f80312fbd4efb926", size = 3005 }, + { url = "https://files.pythonhosted.org/packages/f8/13/caeb438290df069ddda6f055d0eb14337ada293c7d43ab89419ba4b1a778/types_tzlocal-5.1.0.1-py3-none-any.whl", hash = "sha256:0302e8067c86936de8f7e0aaedc2cfbf240080802c603df0f80312fbd4efb926", size = 3005, upload-time = "2023-10-24T02:15:05.815Z" }, ] [[package]] name = "types-urllib3" version = "1.26.25.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239 } +sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239, upload-time = "2023-07-20T15:19:31.307Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377 }, + { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377, upload-time = "2023-07-20T15:19:30.379Z" }, ] [[package]] name = "typing-extensions" version = "4.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] [[package]] @@ -3439,9 +3454,9 @@ dependencies = [ { name = "mypy-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825 } +sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827 }, + { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" }, ] [[package]] @@ -3451,37 +3466,37 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726 } +sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552 }, + { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, ] [[package]] name = "tzdata" version = "2025.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, ] [[package]] name = "ujson" version = "5.11.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/d9/3f17e3c5773fb4941c68d9a37a47b1a79c9649d6c56aefbed87cc409d18a/ujson-5.11.0.tar.gz", hash = "sha256:e204ae6f909f099ba6b6b942131cee359ddda2b6e4ea39c12eb8b991fe2010e0", size = 7156583 } +sdist = { url = "https://files.pythonhosted.org/packages/43/d9/3f17e3c5773fb4941c68d9a37a47b1a79c9649d6c56aefbed87cc409d18a/ujson-5.11.0.tar.gz", hash = "sha256:e204ae6f909f099ba6b6b942131cee359ddda2b6e4ea39c12eb8b991fe2010e0", size = 7156583, upload-time = "2025-08-20T11:57:02.452Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/ef/a9cb1fce38f699123ff012161599fb9f2ff3f8d482b4b18c43a2dc35073f/ujson-5.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7895f0d2d53bd6aea11743bd56e3cb82d729980636cd0ed9b89418bf66591702", size = 55434 }, - { url = "https://files.pythonhosted.org/packages/b1/05/dba51a00eb30bd947791b173766cbed3492269c150a7771d2750000c965f/ujson-5.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12b5e7e22a1fe01058000d1b317d3b65cc3daf61bd2ea7a2b76721fe160fa74d", size = 53190 }, - { url = "https://files.pythonhosted.org/packages/03/3c/fd11a224f73fbffa299fb9644e425f38b38b30231f7923a088dd513aabb4/ujson-5.11.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0180a480a7d099082501cad1fe85252e4d4bf926b40960fb3d9e87a3a6fbbc80", size = 57600 }, - { url = "https://files.pythonhosted.org/packages/55/b9/405103cae24899df688a3431c776e00528bd4799e7d68820e7ebcf824f92/ujson-5.11.0-cp312-cp312-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:fa79fdb47701942c2132a9dd2297a1a85941d966d8c87bfd9e29b0cf423f26cc", size = 59791 }, - { url = "https://files.pythonhosted.org/packages/17/7b/2dcbc2bbfdbf68f2368fb21ab0f6735e872290bb604c75f6e06b81edcb3f/ujson-5.11.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8254e858437c00f17cb72e7a644fc42dad0ebb21ea981b71df6e84b1072aaa7c", size = 57356 }, - { url = "https://files.pythonhosted.org/packages/d1/71/fea2ca18986a366c750767b694430d5ded6b20b6985fddca72f74af38a4c/ujson-5.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1aa8a2ab482f09f6c10fba37112af5f957689a79ea598399c85009f2f29898b5", size = 1036313 }, - { url = "https://files.pythonhosted.org/packages/a3/bb/d4220bd7532eac6288d8115db51710fa2d7d271250797b0bfba9f1e755af/ujson-5.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a638425d3c6eed0318df663df44480f4a40dc87cc7c6da44d221418312f6413b", size = 1195782 }, - { url = "https://files.pythonhosted.org/packages/80/47/226e540aa38878ce1194454385701d82df538ccb5ff8db2cf1641dde849a/ujson-5.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e3cff632c1d78023b15f7e3a81c3745cd3f94c044d1e8fa8efbd6b161997bbc", size = 1088817 }, - { url = "https://files.pythonhosted.org/packages/7e/81/546042f0b23c9040d61d46ea5ca76f0cc5e0d399180ddfb2ae976ebff5b5/ujson-5.11.0-cp312-cp312-win32.whl", hash = "sha256:be6b0eaf92cae8cdee4d4c9e074bde43ef1c590ed5ba037ea26c9632fb479c88", size = 39757 }, - { url = "https://files.pythonhosted.org/packages/44/1b/27c05dc8c9728f44875d74b5bfa948ce91f6c33349232619279f35c6e817/ujson-5.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b7b136cc6abc7619124fd897ef75f8e63105298b5ca9bdf43ebd0e1fa0ee105f", size = 43859 }, - { url = "https://files.pythonhosted.org/packages/22/2d/37b6557c97c3409c202c838aa9c960ca3896843b4295c4b7bb2bbd260664/ujson-5.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:6cd2df62f24c506a0ba322d5e4fe4466d47a9467b57e881ee15a31f7ecf68ff6", size = 38361 }, + { url = "https://files.pythonhosted.org/packages/b9/ef/a9cb1fce38f699123ff012161599fb9f2ff3f8d482b4b18c43a2dc35073f/ujson-5.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7895f0d2d53bd6aea11743bd56e3cb82d729980636cd0ed9b89418bf66591702", size = 55434, upload-time = "2025-08-20T11:55:34.987Z" }, + { url = "https://files.pythonhosted.org/packages/b1/05/dba51a00eb30bd947791b173766cbed3492269c150a7771d2750000c965f/ujson-5.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12b5e7e22a1fe01058000d1b317d3b65cc3daf61bd2ea7a2b76721fe160fa74d", size = 53190, upload-time = "2025-08-20T11:55:36.384Z" }, + { url = "https://files.pythonhosted.org/packages/03/3c/fd11a224f73fbffa299fb9644e425f38b38b30231f7923a088dd513aabb4/ujson-5.11.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0180a480a7d099082501cad1fe85252e4d4bf926b40960fb3d9e87a3a6fbbc80", size = 57600, upload-time = "2025-08-20T11:55:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/55/b9/405103cae24899df688a3431c776e00528bd4799e7d68820e7ebcf824f92/ujson-5.11.0-cp312-cp312-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:fa79fdb47701942c2132a9dd2297a1a85941d966d8c87bfd9e29b0cf423f26cc", size = 59791, upload-time = "2025-08-20T11:55:38.877Z" }, + { url = "https://files.pythonhosted.org/packages/17/7b/2dcbc2bbfdbf68f2368fb21ab0f6735e872290bb604c75f6e06b81edcb3f/ujson-5.11.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8254e858437c00f17cb72e7a644fc42dad0ebb21ea981b71df6e84b1072aaa7c", size = 57356, upload-time = "2025-08-20T11:55:40.036Z" }, + { url = "https://files.pythonhosted.org/packages/d1/71/fea2ca18986a366c750767b694430d5ded6b20b6985fddca72f74af38a4c/ujson-5.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1aa8a2ab482f09f6c10fba37112af5f957689a79ea598399c85009f2f29898b5", size = 1036313, upload-time = "2025-08-20T11:55:41.408Z" }, + { url = "https://files.pythonhosted.org/packages/a3/bb/d4220bd7532eac6288d8115db51710fa2d7d271250797b0bfba9f1e755af/ujson-5.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a638425d3c6eed0318df663df44480f4a40dc87cc7c6da44d221418312f6413b", size = 1195782, upload-time = "2025-08-20T11:55:43.357Z" }, + { url = "https://files.pythonhosted.org/packages/80/47/226e540aa38878ce1194454385701d82df538ccb5ff8db2cf1641dde849a/ujson-5.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e3cff632c1d78023b15f7e3a81c3745cd3f94c044d1e8fa8efbd6b161997bbc", size = 1088817, upload-time = "2025-08-20T11:55:45.262Z" }, + { url = "https://files.pythonhosted.org/packages/7e/81/546042f0b23c9040d61d46ea5ca76f0cc5e0d399180ddfb2ae976ebff5b5/ujson-5.11.0-cp312-cp312-win32.whl", hash = "sha256:be6b0eaf92cae8cdee4d4c9e074bde43ef1c590ed5ba037ea26c9632fb479c88", size = 39757, upload-time = "2025-08-20T11:55:46.522Z" }, + { url = "https://files.pythonhosted.org/packages/44/1b/27c05dc8c9728f44875d74b5bfa948ce91f6c33349232619279f35c6e817/ujson-5.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b7b136cc6abc7619124fd897ef75f8e63105298b5ca9bdf43ebd0e1fa0ee105f", size = 43859, upload-time = "2025-08-20T11:55:47.987Z" }, + { url = "https://files.pythonhosted.org/packages/22/2d/37b6557c97c3409c202c838aa9c960ca3896843b4295c4b7bb2bbd260664/ujson-5.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:6cd2df62f24c506a0ba322d5e4fe4466d47a9467b57e881ee15a31f7ecf68ff6", size = 38361, upload-time = "2025-08-20T11:55:49.122Z" }, ] [[package]] @@ -3897,36 +3912,36 @@ requires-dist = [ name = "uritemplate" version = "4.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267 } +sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488 }, + { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" }, ] [[package]] name = "urllib3" version = "1.26.20" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380 } +sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380, upload-time = "2024-08-29T15:43:11.37Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225 }, + { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" }, ] [[package]] name = "validators" version = "0.35.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/53/66/a435d9ae49850b2f071f7ebd8119dd4e84872b01630d6736761e6e7fd847/validators-0.35.0.tar.gz", hash = "sha256:992d6c48a4e77c81f1b4daba10d16c3a9bb0dbb79b3a19ea847ff0928e70497a", size = 73399 } +sdist = { url = "https://files.pythonhosted.org/packages/53/66/a435d9ae49850b2f071f7ebd8119dd4e84872b01630d6736761e6e7fd847/validators-0.35.0.tar.gz", hash = "sha256:992d6c48a4e77c81f1b4daba10d16c3a9bb0dbb79b3a19ea847ff0928e70497a", size = 73399, upload-time = "2025-05-01T05:42:06.7Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/6e/3e955517e22cbdd565f2f8b2e73d52528b14b8bcfdb04f62466b071de847/validators-0.35.0-py3-none-any.whl", hash = "sha256:e8c947097eae7892cb3d26868d637f79f47b4a0554bc6b80065dfe5aac3705dd", size = 44712 }, + { url = "https://files.pythonhosted.org/packages/fa/6e/3e955517e22cbdd565f2f8b2e73d52528b14b8bcfdb04f62466b071de847/validators-0.35.0-py3-none-any.whl", hash = "sha256:e8c947097eae7892cb3d26868d637f79f47b4a0554bc6b80065dfe5aac3705dd", size = 44712, upload-time = "2025-05-01T05:42:04.203Z" }, ] [[package]] name = "vine" version = "5.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bd/e4/d07b5f29d283596b9727dd5275ccbceb63c44a1a82aa9e4bfd20426762ac/vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0", size = 48980 } +sdist = { url = "https://files.pythonhosted.org/packages/bd/e4/d07b5f29d283596b9727dd5275ccbceb63c44a1a82aa9e4bfd20426762ac/vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0", size = 48980, upload-time = "2023-11-05T08:46:53.857Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/03/ff/7c0c86c43b3cbb927e0ccc0255cb4057ceba4799cd44ae95174ce8e8b5b2/vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc", size = 9636 }, + { url = "https://files.pythonhosted.org/packages/03/ff/7c0c86c43b3cbb927e0ccc0255cb4057ceba4799cd44ae95174ce8e8b5b2/vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc", size = 9636, upload-time = "2023-11-05T08:46:51.205Z" }, ] [[package]] @@ -3938,18 +3953,18 @@ dependencies = [ { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/14/37fcdba2808a6c615681cd216fecae00413c9dab44fb2e57805ecf3eaee3/virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a", size = 6003808 } +sdist = { url = "https://files.pythonhosted.org/packages/1c/14/37fcdba2808a6c615681cd216fecae00413c9dab44fb2e57805ecf3eaee3/virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a", size = 6003808, upload-time = "2025-08-13T14:24:07.464Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279 }, + { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" }, ] [[package]] name = "wcwidth" version = "0.2.14" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc723187aeaf24c7f5459922d01e2f794277a3dfb90345/wcwidth-0.2.14.tar.gz", hash = "sha256:4d478375d31bc5395a3c55c40ccdf3354688364cd61c4f6adacaa9215d0b3605", size = 102293 } +sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc723187aeaf24c7f5459922d01e2f794277a3dfb90345/wcwidth-0.2.14.tar.gz", hash = "sha256:4d478375d31bc5395a3c55c40ccdf3354688364cd61c4f6adacaa9215d0b3605", size = 102293, upload-time = "2025-09-22T16:29:53.023Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286 }, + { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" }, ] [[package]] @@ -3965,37 +3980,37 @@ dependencies = [ { name = "pydantic" }, { name = "validators" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bd/0e/e4582b007427187a9fde55fa575db4b766c81929d2b43a3dd8becce50567/weaviate_client-4.17.0.tar.gz", hash = "sha256:731d58d84b0989df4db399b686357ed285fb95971a492ccca8dec90bb2343c51", size = 769019 } +sdist = { url = "https://files.pythonhosted.org/packages/bd/0e/e4582b007427187a9fde55fa575db4b766c81929d2b43a3dd8becce50567/weaviate_client-4.17.0.tar.gz", hash = "sha256:731d58d84b0989df4db399b686357ed285fb95971a492ccca8dec90bb2343c51", size = 769019, upload-time = "2025-09-26T11:20:27.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/c5/2da3a45866da7a935dab8ad07be05dcaee48b3ad4955144583b651929be7/weaviate_client-4.17.0-py3-none-any.whl", hash = "sha256:60e4a355b90537ee1e942ab0b76a94750897a13d9cf13c5a6decbd166d0ca8b5", size = 582763 }, + { url = "https://files.pythonhosted.org/packages/5b/c5/2da3a45866da7a935dab8ad07be05dcaee48b3ad4955144583b651929be7/weaviate_client-4.17.0-py3-none-any.whl", hash = "sha256:60e4a355b90537ee1e942ab0b76a94750897a13d9cf13c5a6decbd166d0ca8b5", size = 582763, upload-time = "2025-09-26T11:20:25.864Z" }, ] [[package]] name = "websocket-client" version = "1.8.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648, upload-time = "2024-04-23T22:16:16.976Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826, upload-time = "2024-04-23T22:16:14.422Z" }, ] [[package]] name = "wrapt" version = "1.17.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547 } +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998 }, - { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020 }, - { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098 }, - { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036 }, - { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156 }, - { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102 }, - { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732 }, - { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705 }, - { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877 }, - { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885 }, - { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591 }, + { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" }, + { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" }, + { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" }, + { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" }, + { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" }, + { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, ] [[package]] @@ -4005,9 +4020,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425 } +sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425, upload-time = "2022-08-23T19:58:21.447Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226 }, + { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226, upload-time = "2022-08-23T19:58:19.96Z" }, ] [[package]] @@ -4018,9 +4033,9 @@ dependencies = [ { name = "pathspec" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/46/f2/cd8b7584a48ee83f0bc94f8a32fea38734cefcdc6f7324c4d3bfc699457b/yamllint-1.37.1.tar.gz", hash = "sha256:81f7c0c5559becc8049470d86046b36e96113637bcbe4753ecef06977c00245d", size = 141613 } +sdist = { url = "https://files.pythonhosted.org/packages/46/f2/cd8b7584a48ee83f0bc94f8a32fea38734cefcdc6f7324c4d3bfc699457b/yamllint-1.37.1.tar.gz", hash = "sha256:81f7c0c5559becc8049470d86046b36e96113637bcbe4753ecef06977c00245d", size = 141613, upload-time = "2025-05-04T08:25:54.355Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dd/b9/be7a4cfdf47e03785f657f94daea8123e838d817be76c684298305bd789f/yamllint-1.37.1-py3-none-any.whl", hash = "sha256:364f0d79e81409f591e323725e6a9f4504c8699ddf2d7263d8d2b539cd66a583", size = 68813 }, + { url = "https://files.pythonhosted.org/packages/dd/b9/be7a4cfdf47e03785f657f94daea8123e838d817be76c684298305bd789f/yamllint-1.37.1-py3-none-any.whl", hash = "sha256:364f0d79e81409f591e323725e6a9f4504c8699ddf2d7263d8d2b539cd66a583", size = 68813, upload-time = "2025-05-04T08:25:52.552Z" }, ] [[package]] @@ -4032,42 +4047,42 @@ dependencies = [ { name = "multidict" }, { name = "propcache" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3c/fb/efaa23fa4e45537b827620f04cf8f3cd658b76642205162e072703a5b963/yarl-1.20.1.tar.gz", hash = "sha256:d017a4997ee50c91fd5466cef416231bb82177b93b029906cefc542ce14c35ac", size = 186428 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/9a/cb7fad7d73c69f296eda6815e4a2c7ed53fc70c2f136479a91c8e5fbdb6d/yarl-1.20.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdcc4cd244e58593a4379fe60fdee5ac0331f8eb70320a24d591a3be197b94a9", size = 133667 }, - { url = "https://files.pythonhosted.org/packages/67/38/688577a1cb1e656e3971fb66a3492501c5a5df56d99722e57c98249e5b8a/yarl-1.20.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b29a2c385a5f5b9c7d9347e5812b6f7ab267193c62d282a540b4fc528c8a9d2a", size = 91025 }, - { url = "https://files.pythonhosted.org/packages/50/ec/72991ae51febeb11a42813fc259f0d4c8e0507f2b74b5514618d8b640365/yarl-1.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1112ae8154186dfe2de4732197f59c05a83dc814849a5ced892b708033f40dc2", size = 89709 }, - { url = "https://files.pythonhosted.org/packages/99/da/4d798025490e89426e9f976702e5f9482005c548c579bdae792a4c37769e/yarl-1.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90bbd29c4fe234233f7fa2b9b121fb63c321830e5d05b45153a2ca68f7d310ee", size = 352287 }, - { url = "https://files.pythonhosted.org/packages/1a/26/54a15c6a567aac1c61b18aa0f4b8aa2e285a52d547d1be8bf48abe2b3991/yarl-1.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:680e19c7ce3710ac4cd964e90dad99bf9b5029372ba0c7cbfcd55e54d90ea819", size = 345429 }, - { url = "https://files.pythonhosted.org/packages/d6/95/9dcf2386cb875b234353b93ec43e40219e14900e046bf6ac118f94b1e353/yarl-1.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a979218c1fdb4246a05efc2cc23859d47c89af463a90b99b7c56094daf25a16", size = 365429 }, - { url = "https://files.pythonhosted.org/packages/91/b2/33a8750f6a4bc224242a635f5f2cff6d6ad5ba651f6edcccf721992c21a0/yarl-1.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255b468adf57b4a7b65d8aad5b5138dce6a0752c139965711bdcb81bc370e1b6", size = 363862 }, - { url = "https://files.pythonhosted.org/packages/98/28/3ab7acc5b51f4434b181b0cee8f1f4b77a65919700a355fb3617f9488874/yarl-1.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a97d67108e79cfe22e2b430d80d7571ae57d19f17cda8bb967057ca8a7bf5bfd", size = 355616 }, - { url = "https://files.pythonhosted.org/packages/36/a3/f666894aa947a371724ec7cd2e5daa78ee8a777b21509b4252dd7bd15e29/yarl-1.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8570d998db4ddbfb9a590b185a0a33dbf8aafb831d07a5257b4ec9948df9cb0a", size = 339954 }, - { url = "https://files.pythonhosted.org/packages/f1/81/5f466427e09773c04219d3450d7a1256138a010b6c9f0af2d48565e9ad13/yarl-1.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97c75596019baae7c71ccf1d8cc4738bc08134060d0adfcbe5642f778d1dca38", size = 365575 }, - { url = "https://files.pythonhosted.org/packages/2e/e3/e4b0ad8403e97e6c9972dd587388940a032f030ebec196ab81a3b8e94d31/yarl-1.20.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1c48912653e63aef91ff988c5432832692ac5a1d8f0fb8a33091520b5bbe19ef", size = 365061 }, - { url = "https://files.pythonhosted.org/packages/ac/99/b8a142e79eb86c926f9f06452eb13ecb1bb5713bd01dc0038faf5452e544/yarl-1.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4c3ae28f3ae1563c50f3d37f064ddb1511ecc1d5584e88c6b7c63cf7702a6d5f", size = 364142 }, - { url = "https://files.pythonhosted.org/packages/34/f2/08ed34a4a506d82a1a3e5bab99ccd930a040f9b6449e9fd050320e45845c/yarl-1.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c5e9642f27036283550f5f57dc6156c51084b458570b9d0d96100c8bebb186a8", size = 381894 }, - { url = "https://files.pythonhosted.org/packages/92/f8/9a3fbf0968eac704f681726eff595dce9b49c8a25cd92bf83df209668285/yarl-1.20.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2c26b0c49220d5799f7b22c6838409ee9bc58ee5c95361a4d7831f03cc225b5a", size = 383378 }, - { url = "https://files.pythonhosted.org/packages/af/85/9363f77bdfa1e4d690957cd39d192c4cacd1c58965df0470a4905253b54f/yarl-1.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564ab3d517e3d01c408c67f2e5247aad4019dcf1969982aba3974b4093279004", size = 374069 }, - { url = "https://files.pythonhosted.org/packages/35/99/9918c8739ba271dcd935400cff8b32e3cd319eaf02fcd023d5dcd487a7c8/yarl-1.20.1-cp312-cp312-win32.whl", hash = "sha256:daea0d313868da1cf2fac6b2d3a25c6e3a9e879483244be38c8e6a41f1d876a5", size = 81249 }, - { url = "https://files.pythonhosted.org/packages/eb/83/5d9092950565481b413b31a23e75dd3418ff0a277d6e0abf3729d4d1ce25/yarl-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:48ea7d7f9be0487339828a4de0360d7ce0efc06524a48e1810f945c45b813698", size = 86710 }, - { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542 }, +sdist = { url = "https://files.pythonhosted.org/packages/3c/fb/efaa23fa4e45537b827620f04cf8f3cd658b76642205162e072703a5b963/yarl-1.20.1.tar.gz", hash = "sha256:d017a4997ee50c91fd5466cef416231bb82177b93b029906cefc542ce14c35ac", size = 186428, upload-time = "2025-06-10T00:46:09.923Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/9a/cb7fad7d73c69f296eda6815e4a2c7ed53fc70c2f136479a91c8e5fbdb6d/yarl-1.20.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdcc4cd244e58593a4379fe60fdee5ac0331f8eb70320a24d591a3be197b94a9", size = 133667, upload-time = "2025-06-10T00:43:44.369Z" }, + { url = "https://files.pythonhosted.org/packages/67/38/688577a1cb1e656e3971fb66a3492501c5a5df56d99722e57c98249e5b8a/yarl-1.20.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b29a2c385a5f5b9c7d9347e5812b6f7ab267193c62d282a540b4fc528c8a9d2a", size = 91025, upload-time = "2025-06-10T00:43:46.295Z" }, + { url = "https://files.pythonhosted.org/packages/50/ec/72991ae51febeb11a42813fc259f0d4c8e0507f2b74b5514618d8b640365/yarl-1.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1112ae8154186dfe2de4732197f59c05a83dc814849a5ced892b708033f40dc2", size = 89709, upload-time = "2025-06-10T00:43:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/99/da/4d798025490e89426e9f976702e5f9482005c548c579bdae792a4c37769e/yarl-1.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90bbd29c4fe234233f7fa2b9b121fb63c321830e5d05b45153a2ca68f7d310ee", size = 352287, upload-time = "2025-06-10T00:43:49.924Z" }, + { url = "https://files.pythonhosted.org/packages/1a/26/54a15c6a567aac1c61b18aa0f4b8aa2e285a52d547d1be8bf48abe2b3991/yarl-1.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:680e19c7ce3710ac4cd964e90dad99bf9b5029372ba0c7cbfcd55e54d90ea819", size = 345429, upload-time = "2025-06-10T00:43:51.7Z" }, + { url = "https://files.pythonhosted.org/packages/d6/95/9dcf2386cb875b234353b93ec43e40219e14900e046bf6ac118f94b1e353/yarl-1.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a979218c1fdb4246a05efc2cc23859d47c89af463a90b99b7c56094daf25a16", size = 365429, upload-time = "2025-06-10T00:43:53.494Z" }, + { url = "https://files.pythonhosted.org/packages/91/b2/33a8750f6a4bc224242a635f5f2cff6d6ad5ba651f6edcccf721992c21a0/yarl-1.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255b468adf57b4a7b65d8aad5b5138dce6a0752c139965711bdcb81bc370e1b6", size = 363862, upload-time = "2025-06-10T00:43:55.766Z" }, + { url = "https://files.pythonhosted.org/packages/98/28/3ab7acc5b51f4434b181b0cee8f1f4b77a65919700a355fb3617f9488874/yarl-1.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a97d67108e79cfe22e2b430d80d7571ae57d19f17cda8bb967057ca8a7bf5bfd", size = 355616, upload-time = "2025-06-10T00:43:58.056Z" }, + { url = "https://files.pythonhosted.org/packages/36/a3/f666894aa947a371724ec7cd2e5daa78ee8a777b21509b4252dd7bd15e29/yarl-1.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8570d998db4ddbfb9a590b185a0a33dbf8aafb831d07a5257b4ec9948df9cb0a", size = 339954, upload-time = "2025-06-10T00:43:59.773Z" }, + { url = "https://files.pythonhosted.org/packages/f1/81/5f466427e09773c04219d3450d7a1256138a010b6c9f0af2d48565e9ad13/yarl-1.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97c75596019baae7c71ccf1d8cc4738bc08134060d0adfcbe5642f778d1dca38", size = 365575, upload-time = "2025-06-10T00:44:02.051Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e3/e4b0ad8403e97e6c9972dd587388940a032f030ebec196ab81a3b8e94d31/yarl-1.20.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1c48912653e63aef91ff988c5432832692ac5a1d8f0fb8a33091520b5bbe19ef", size = 365061, upload-time = "2025-06-10T00:44:04.196Z" }, + { url = "https://files.pythonhosted.org/packages/ac/99/b8a142e79eb86c926f9f06452eb13ecb1bb5713bd01dc0038faf5452e544/yarl-1.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4c3ae28f3ae1563c50f3d37f064ddb1511ecc1d5584e88c6b7c63cf7702a6d5f", size = 364142, upload-time = "2025-06-10T00:44:06.527Z" }, + { url = "https://files.pythonhosted.org/packages/34/f2/08ed34a4a506d82a1a3e5bab99ccd930a040f9b6449e9fd050320e45845c/yarl-1.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c5e9642f27036283550f5f57dc6156c51084b458570b9d0d96100c8bebb186a8", size = 381894, upload-time = "2025-06-10T00:44:08.379Z" }, + { url = "https://files.pythonhosted.org/packages/92/f8/9a3fbf0968eac704f681726eff595dce9b49c8a25cd92bf83df209668285/yarl-1.20.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2c26b0c49220d5799f7b22c6838409ee9bc58ee5c95361a4d7831f03cc225b5a", size = 383378, upload-time = "2025-06-10T00:44:10.51Z" }, + { url = "https://files.pythonhosted.org/packages/af/85/9363f77bdfa1e4d690957cd39d192c4cacd1c58965df0470a4905253b54f/yarl-1.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564ab3d517e3d01c408c67f2e5247aad4019dcf1969982aba3974b4093279004", size = 374069, upload-time = "2025-06-10T00:44:12.834Z" }, + { url = "https://files.pythonhosted.org/packages/35/99/9918c8739ba271dcd935400cff8b32e3cd319eaf02fcd023d5dcd487a7c8/yarl-1.20.1-cp312-cp312-win32.whl", hash = "sha256:daea0d313868da1cf2fac6b2d3a25c6e3a9e879483244be38c8e6a41f1d876a5", size = 81249, upload-time = "2025-06-10T00:44:14.731Z" }, + { url = "https://files.pythonhosted.org/packages/eb/83/5d9092950565481b413b31a23e75dd3418ff0a277d6e0abf3729d4d1ce25/yarl-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:48ea7d7f9be0487339828a4de0360d7ce0efc06524a48e1810f945c45b813698", size = 86710, upload-time = "2025-06-10T00:44:16.716Z" }, + { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] [[package]] name = "zipp" version = "3.23.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547 } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276 }, + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, ] [[package]] name = "zipstream-ng" version = "1.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/11/f2/690a35762cf8366ce6f3b644805de970bd6a897ca44ce74184c7b2bc94e7/zipstream_ng-1.9.0.tar.gz", hash = "sha256:a0d94030822d137efbf80dfdc680603c42f804696f41147bb3db895df667daea", size = 37963 } +sdist = { url = "https://files.pythonhosted.org/packages/11/f2/690a35762cf8366ce6f3b644805de970bd6a897ca44ce74184c7b2bc94e7/zipstream_ng-1.9.0.tar.gz", hash = "sha256:a0d94030822d137efbf80dfdc680603c42f804696f41147bb3db895df667daea", size = 37963, upload-time = "2025-08-29T01:03:36.323Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/62/c2da1c495291a52e561257d017585e08906d288035d025ccf636f6b9a266/zipstream_ng-1.9.0-py3-none-any.whl", hash = "sha256:31dc2cf617abdbf28d44f2e08c0d14c8eee2ea0ec26507a7e4d5d5f97c564b7a", size = 24852 }, + { url = "https://files.pythonhosted.org/packages/de/62/c2da1c495291a52e561257d017585e08906d288035d025ccf636f6b9a266/zipstream_ng-1.9.0-py3-none-any.whl", hash = "sha256:31dc2cf617abdbf28d44f2e08c0d14c8eee2ea0ec26507a7e4d5d5f97c564b7a", size = 24852, upload-time = "2025-08-29T01:03:35.046Z" }, ] From d9bc50f069235a847eada8b477433ef62ef2470b Mon Sep 17 00:00:00 2001 From: harini-venkataraman <115449948+harini-venkataraman@users.noreply.github.com> Date: Wed, 11 Mar 2026 21:51:35 +0530 Subject: [PATCH 47/64] Un 3266 fix security hotspot tmp paths (#1851) * UN-3266 fix: replace hardcoded /tmp paths with secure temp dirs in tests Replace hardcoded /tmp/ paths (SonarCloud S5443 security hotspots) with pytest's tmp_path fixture or module-level tempfile.mkdtemp() constants in all affected test files to avoid world-writable directory vulnerabilities. Co-Authored-By: Claude Sonnet 4.6 * UN-3266 fix: resolve ruff linting failures across multiple files - B026: pass url positionally in worker_celery.py to avoid star-arg after keyword - N803: rename MockAsyncResult to mock_async_result in test_tasks.py - E501/I001: fix long line and import sort in llm_whisperer helper - ANN401: replace Any with object|None in dispatcher.py; add noqa in test helpers - F841: remove unused workflow_id and result assignments Co-Authored-By: Claude Sonnet 4.6 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Claude Sonnet 4.6 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- backend/backend/worker_celery.py | 8 ++------ .../prompt_studio/prompt_studio_core_v2/test_tasks.py | 6 +++--- .../adapters/x2text/llm_whisperer_v2/src/helper.py | 4 +++- .../sdk1/src/unstract/sdk1/execution/dispatcher.py | 8 ++++---- unstract/sdk1/tests/test_execution.py | 10 +++++----- workers/file_processing/structure_tool_task.py | 1 - 6 files changed, 17 insertions(+), 20 deletions(-) diff --git a/backend/backend/worker_celery.py b/backend/backend/worker_celery.py index 018f3d485b..956f789ecf 100644 --- a/backend/backend/worker_celery.py +++ b/backend/backend/worker_celery.py @@ -41,14 +41,10 @@ class _WorkerDispatchCelery(Celery): _explicit_broker: str | None = None def connection_for_write(self, url=None, *args, **kwargs): - return super().connection_for_write( - url=url or self._explicit_broker, *args, **kwargs - ) + return super().connection_for_write(url or self._explicit_broker, *args, **kwargs) def connection_for_read(self, url=None, *args, **kwargs): - return super().connection_for_read( - url=url or self._explicit_broker, *args, **kwargs - ) + return super().connection_for_read(url or self._explicit_broker, *args, **kwargs) def get_worker_celery_app() -> Celery: diff --git a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py index 4efef90987..d068be8743 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py +++ b/backend/prompt_studio/prompt_studio_core_v2/test_tasks.py @@ -359,7 +359,7 @@ def test_task_status_url_registered(self): assert "" in str(url.pattern) @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_processing(self, MockAsyncResult): + def test_task_status_processing(self, mock_async_result): """Verify processing response for unfinished task.""" import inspect @@ -370,7 +370,7 @@ def test_task_status_processing(self, MockAsyncResult): assert '"processing"' in source @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_completed(self, MockAsyncResult): + def test_task_status_completed(self, mock_async_result): """Verify completed response structure.""" import inspect @@ -382,7 +382,7 @@ def test_task_status_completed(self, MockAsyncResult): assert "result.result" in source @patch("prompt_studio.prompt_studio_core_v2.views.AsyncResult", create=True) - def test_task_status_failed(self, MockAsyncResult): + def test_task_status_failed(self, mock_async_result): """Verify failed response structure.""" import inspect diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py index 14790065ae..dd63bad72c 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -225,7 +225,9 @@ def get_whisperer_params( WhispererConfig.WAIT_TIMEOUT, WhispererDefaults.WAIT_TIMEOUT, ), - WhispererConfig.WAIT_FOR_COMPLETION: WhispererDefaults.WAIT_FOR_COMPLETION, + WhispererConfig.WAIT_FOR_COMPLETION: ( + WhispererDefaults.WAIT_FOR_COMPLETION + ), } ) if params[WhispererConfig.MODE] == Modes.LOW_COST.value: diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 7fc9c5f720..d5d6867361 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -72,7 +72,7 @@ class ExecutionDispatcher: ) """ - def __init__(self, celery_app: Any = None) -> None: + def __init__(self, celery_app: object | None = None) -> None: """Initialize the dispatcher. Args: @@ -201,10 +201,10 @@ def dispatch_async( def dispatch_with_callback( self, context: ExecutionContext, - on_success: Any = None, - on_error: Any = None, + on_success: object | None = None, + on_error: object | None = None, task_id: str | None = None, - ) -> Any: + ) -> object: """Fire-and-forget dispatch with Celery link callbacks. Sends the task to the executor queue and returns immediately. diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 458c7a8f10..540072ea0d 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -27,7 +27,7 @@ class TestExecutionContext: """Tests for ExecutionContext serialization and validation.""" - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 """Create a default ExecutionContext with optional overrides.""" defaults: dict[str, Any] = { "executor_name": "legacy", @@ -490,7 +490,7 @@ def _clean_registry(self: Self) -> None: """Ensure a clean registry for every test.""" ExecutorRegistry.clear() - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 defaults: dict[str, Any] = { "executor_name": "legacy", "operation": "extract", @@ -586,7 +586,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: class TestExecutionDispatcher: """Tests for ExecutionDispatcher (mocked Celery).""" - def _make_context(self, **overrides: Any) -> ExecutionContext: + def _make_context(self, **overrides: Any) -> ExecutionContext: # noqa: ANN401 defaults: dict[str, Any] = { "executor_name": "legacy", "operation": "extract", @@ -917,7 +917,7 @@ def test_dispatch_with_callback_custom_task_id( dispatcher = ExecutionDispatcher(celery_app=mock_app) ctx = self._make_context() - result = dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") + dispatcher.dispatch_with_callback(ctx, task_id="pre-gen-id-123") call_kwargs = mock_app.send_task.call_args assert call_kwargs[1]["task_id"] == "pre-gen-id-123" @@ -974,7 +974,7 @@ def stream_log( log: str, level: LogLevel = LogLevel.INFO, stage: str = "TOOL_RUN", - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 ) -> None: _level_map = { LogLevel.DEBUG: logging.DEBUG, diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 82c1962b43..e080d1cde8 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -221,7 +221,6 @@ def _execute_structure_tool_impl(params: dict) -> dict: """ # ---- Unpack params ---- organization_id = params["organization_id"] - workflow_id = params.get("workflow_id", "") execution_id = params.get("execution_id", "") file_execution_id = params["file_execution_id"] tool_instance_metadata = params["tool_instance_metadata"] From b715f6484457e751940aa125a2415e9741d554b7 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 21:59:12 +0530 Subject: [PATCH 48/64] UN-3266 fix: resolve SonarCloud bugs S2259 and S1244 in PR #1849 - S2259: guard against None after _discover_plugins() in loader.py to satisfy static analysis on the dict[str,type]|None field type - S1244: replace float equality checks with pytest.approx() in test_answer_prompt.py and test_phase2h.py Co-Authored-By: Claude Sonnet 4.6 --- workers/executor/executors/plugins/loader.py | 2 ++ workers/tests/test_answer_prompt.py | 5 ++--- workers/tests/test_phase2h.py | 4 +--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/workers/executor/executors/plugins/loader.py b/workers/executor/executors/plugins/loader.py index 7a4ed25da5..3f2a54c92a 100644 --- a/workers/executor/executors/plugins/loader.py +++ b/workers/executor/executors/plugins/loader.py @@ -26,6 +26,8 @@ def get(cls, name: str) -> type | None: """Get a plugin class by name. Returns None if not installed.""" if cls._plugins is None: cls._discover_plugins() + if cls._plugins is None: + return None return cls._plugins.get(name) @classmethod diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py index 53dfd4d79f..f38454936f 100644 --- a/workers/tests/test_answer_prompt.py +++ b/workers/tests/test_answer_prompt.py @@ -8,12 +8,11 @@ from unittest.mock import MagicMock, patch import pytest - from executor.executors.constants import ( PromptServiceConstants as PSKeys, ) -from unstract.sdk1.execution.context import ExecutionContext, Operation +from unstract.sdk1.execution.context import ExecutionContext, Operation # --------------------------------------------------------------------------- # Helpers @@ -269,7 +268,7 @@ def test_number_type_converts_to_float(self, mock_shim_cls, mock_deps): ctx = _make_context(prompts=[_make_prompt(output_type="number")]) result = executor._handle_answer_prompt(ctx) - assert result.data[PSKeys.OUTPUT]["field_a"] == 42500000.0 + assert result.data[PSKeys.OUTPUT]["field_a"] == pytest.approx(42500000.0) @patch( "executor.executors.legacy_executor.LegacyExecutor._get_prompt_deps" diff --git a/workers/tests/test_phase2h.py b/workers/tests/test_phase2h.py index cf02c767b2..cca39a3710 100644 --- a/workers/tests/test_phase2h.py +++ b/workers/tests/test_phase2h.py @@ -9,7 +9,6 @@ import pytest import requests as real_requests - from executor.executors.constants import VariableType from executor.executors.exceptions import CustomDataError, LegacyExecutorError from executor.executors.postprocessor import ( @@ -21,7 +20,6 @@ VariableReplacementService, ) - # ============================================================================ # 1. VariableReplacementHelper (15 tests) # ============================================================================ @@ -473,7 +471,7 @@ def test_custom_timeout_passed(self, mock_post): timeout=5.0, ) _, kwargs = mock_post.call_args - assert kwargs["timeout"] == 5.0 + assert kwargs["timeout"] == pytest.approx(5.0) # --- _validate_structured_output --- From e9c23b24cf5b60af5ed0da855c68755c52f5f30f Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 22:38:44 +0530 Subject: [PATCH 49/64] UN-3266 fix: resolve SonarCloud code smells in PR #1849 - S5799: Merge all implicit string concatenations in log messages (legacy_executor.py, tasks.py, dispatcher.py, orchestrator.py, registry.py, variable_replacement.py, structure_tool_task.py) - S1192: Extract duplicate literal to _NO_CELERY_APP_MSG constant in dispatcher.py - S1871: Merge identical elif/else branches in tasks.py and test_sanity_phase6j.py - S1186: Add comment to empty stub method in test_sanity_phase6a.py - S1481: Remove unused local variables in test_sanity_phase6d/e/f/g/h/j and test_phase5d.py - S117: Rename PascalCase local variables to snake_case in test_sanity_phase3/5/6i.py - S5655: Broaden tool type annotation to StreamMixin in IndexingUtils.generate_index_key and PlatformHelper.get_adapter_config - docker:S7031: Merge consecutive RUN instructions in worker-unified.Dockerfile - javascript:S1128: Remove unused pollForCompletion import in usePromptRun.js Co-Authored-By: Claude Sonnet 4.6 --- docker/dockerfiles/worker-unified.Dockerfile | 19 ++++++-------- frontend/src/hooks/usePromptRun.js | 1 - .../src/unstract/sdk1/execution/dispatcher.py | 12 ++++----- .../unstract/sdk1/execution/orchestrator.py | 4 +-- .../src/unstract/sdk1/execution/registry.py | 2 +- unstract/sdk1/src/unstract/sdk1/platform.py | 5 ++-- .../sdk1/src/unstract/sdk1/utils/indexing.py | 4 +-- workers/executor/executors/legacy_executor.py | 25 +++++++++---------- .../executors/variable_replacement.py | 2 +- workers/executor/tasks.py | 9 +------ .../file_processing/structure_tool_task.py | 2 +- workers/tests/test_phase5d.py | 1 - workers/tests/test_sanity_phase3.py | 6 ++--- workers/tests/test_sanity_phase5.py | 20 +++++++-------- workers/tests/test_sanity_phase6a.py | 2 +- workers/tests/test_sanity_phase6d.py | 1 - workers/tests/test_sanity_phase6e.py | 2 +- workers/tests/test_sanity_phase6f.py | 2 +- workers/tests/test_sanity_phase6g.py | 4 +-- workers/tests/test_sanity_phase6h.py | 2 +- workers/tests/test_sanity_phase6i.py | 12 ++++----- workers/tests/test_sanity_phase6j.py | 23 +---------------- 22 files changed, 63 insertions(+), 97 deletions(-) diff --git a/docker/dockerfiles/worker-unified.Dockerfile b/docker/dockerfiles/worker-unified.Dockerfile index 0ea425b623..37558fe660 100644 --- a/docker/dockerfiles/worker-unified.Dockerfile +++ b/docker/dockerfiles/worker-unified.Dockerfile @@ -72,8 +72,12 @@ COPY ${BUILD_CONTEXT_PATH}/ ./ # Set shell with pipefail for proper error handling in pipes SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install project and OpenTelemetry instrumentation (as root to avoid permission issues) -# No symlinks needed - PYTHONPATH handles the paths correctly +# Install project, OpenTelemetry instrumentation, and executor plugins. +# No symlinks needed - PYTHONPATH handles the paths correctly. +# Executor plugins (cloud-only, no-op for OSS) register via setuptools entry points: +# - unstract.executor.executors (executor classes, e.g. table_extractor) +# - unstract.executor.plugins (utility plugins, e.g. highlight-data, challenge) +# Editable installs (-e) ensure Path(__file__) resolves to the source directory. RUN uv sync --group deploy --locked && \ uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement - && \ # Use OpenTelemetry v1 - v2 breaks LiteLLM with instrumentation enabled @@ -81,15 +85,8 @@ RUN uv sync --group deploy --locked && \ uv pip install opentelemetry-instrumentation-openai && \ { chmod +x ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } && \ touch requirements.txt && \ - { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } - -# Install executor plugins from workers/plugins/ (cloud-only, no-op for OSS). -# Plugins register via setuptools entry points in two groups: -# - unstract.executor.executors (executor classes, e.g. table_extractor) -# - unstract.executor.plugins (utility plugins, e.g. highlight-data, challenge) -# Editable installs (-e) ensure Path(__file__) resolves to the source directory, -# giving plugins access to non-Python assets (.md prompts, .txt templates, etc.). -RUN for plugin_dir in /app/plugins/*/; do \ + { chown -R worker:worker ./run-worker.sh ./run-worker-docker.sh 2>/dev/null || true; } && \ + for plugin_dir in /app/plugins/*/; do \ if [ -f "$plugin_dir/pyproject.toml" ] && \ grep -qE 'unstract\.executor\.(executors|plugins)' "$plugin_dir/pyproject.toml" 2>/dev/null; then \ echo "Installing executor plugin: $(basename $plugin_dir)" && \ diff --git a/frontend/src/hooks/usePromptRun.js b/frontend/src/hooks/usePromptRun.js index 11e83dc42f..85d2a16857 100644 --- a/frontend/src/hooks/usePromptRun.js +++ b/frontend/src/hooks/usePromptRun.js @@ -3,7 +3,6 @@ import { generateUUID, PROMPT_RUN_API_STATUSES, PROMPT_RUN_TYPES, - pollForCompletion, } from "../helpers/GetStaticData"; import { useAlertStore } from "../store/alert-store"; import { useCustomToolStore } from "../store/custom-tool-store"; diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index d5d6867361..087ba111f3 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -47,6 +47,7 @@ # waits longer than the worker allows the task to run. _DEFAULT_TIMEOUT_ENV = "EXECUTOR_RESULT_TIMEOUT" _DEFAULT_TIMEOUT = 3600 # 1 hour — matches executor worker default +_NO_CELERY_APP_MSG = "No Celery app configured on ExecutionDispatcher" class ExecutionDispatcher: @@ -112,15 +113,14 @@ def dispatch( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError("No Celery app configured on ExecutionDispatcher") + raise ValueError(_NO_CELERY_APP_MSG) if timeout is None: timeout = int(os.environ.get(_DEFAULT_TIMEOUT_ENV, _DEFAULT_TIMEOUT)) queue = self._get_queue(context.executor_name) logger.info( - "Dispatching execution: executor=%s operation=%s " - "run_id=%s request_id=%s timeout=%ss queue=%s", + "Dispatching execution: executor=%s operation=%s run_id=%s request_id=%s timeout=%ss queue=%s", context.executor_name, context.operation, context.run_id, @@ -150,7 +150,7 @@ def dispatch( ) except Exception as exc: logger.error( - "Dispatch failed: executor=%s operation=%s " "run_id=%s error=%s", + "Dispatch failed: executor=%s operation=%s run_id=%s error=%s", context.executor_name, context.operation, context.run_id, @@ -178,7 +178,7 @@ def dispatch_async( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError("No Celery app configured on ExecutionDispatcher") + raise ValueError(_NO_CELERY_APP_MSG) queue = self._get_queue(context.executor_name) logger.info( @@ -234,7 +234,7 @@ def dispatch_with_callback( ValueError: If no Celery app is configured. """ if self._app is None: - raise ValueError("No Celery app configured on ExecutionDispatcher") + raise ValueError(_NO_CELERY_APP_MSG) queue = self._get_queue(context.executor_name) logger.info( diff --git a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py index 02693a0509..2c0f66f3bb 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/orchestrator.py @@ -57,7 +57,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: except Exception as exc: elapsed = time.monotonic() - start logger.exception( - "Executor %r raised an unhandled exception " "after %.2fs", + "Executor %r raised an unhandled exception after %.2fs", context.executor_name, elapsed, ) @@ -68,7 +68,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: elapsed = time.monotonic() - start logger.info( - "Execution completed: executor=%s operation=%s " "success=%s elapsed=%.2fs", + "Execution completed: executor=%s operation=%s success=%s elapsed=%.2fs", context.executor_name, context.operation, result.success, diff --git a/unstract/sdk1/src/unstract/sdk1/execution/registry.py b/unstract/sdk1/src/unstract/sdk1/execution/registry.py index 999487a2e5..c9ca1fee12 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/registry.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/registry.py @@ -97,7 +97,7 @@ def get(cls, name: str) -> BaseExecutor: if executor_cls is None: available = ", ".join(sorted(cls._registry)) or "(none)" raise KeyError( - f"No executor registered with name {name!r}. " f"Available: {available}" + f"No executor registered with name {name!r}. Available: {available}" ) return executor_cls() diff --git a/unstract/sdk1/src/unstract/sdk1/platform.py b/unstract/sdk1/src/unstract/sdk1/platform.py index e5ce7fc172..c2134405fa 100644 --- a/unstract/sdk1/src/unstract/sdk1/platform.py +++ b/unstract/sdk1/src/unstract/sdk1/platform.py @@ -16,6 +16,7 @@ ) from unstract.sdk1.exceptions import SdkError from unstract.sdk1.tool.base import BaseTool +from unstract.sdk1.tool.stream import StreamMixin from unstract.sdk1.utils.common import Utils from unstract.sdk1.utils.retry_utils import retry_platform_service_call @@ -99,7 +100,7 @@ def is_public_adapter(cls: type[Self], adapter_id: str) -> bool: @retry_platform_service_call def _get_adapter_configuration( cls: type[Self], - tool: BaseTool, + tool: BaseTool | StreamMixin, adapter_instance_id: str, ) -> dict[str, Any]: """Get Adapter. @@ -163,7 +164,7 @@ def _get_adapter_configuration( @classmethod def get_adapter_config( - cls: type[Self], tool: BaseTool, adapter_instance_id: str + cls: type[Self], tool: BaseTool | StreamMixin, adapter_instance_id: str ) -> dict[str, Any] | None: """Get adapter spec by the help of unstract DB tool. diff --git a/unstract/sdk1/src/unstract/sdk1/utils/indexing.py b/unstract/sdk1/src/unstract/sdk1/utils/indexing.py index 0e9bbef92f..e02d0df816 100644 --- a/unstract/sdk1/src/unstract/sdk1/utils/indexing.py +++ b/unstract/sdk1/src/unstract/sdk1/utils/indexing.py @@ -2,7 +2,7 @@ from unstract.sdk1.file_storage import FileStorage, FileStorageProvider from unstract.sdk1.platform import PlatformHelper -from unstract.sdk1.tool.base import BaseTool +from unstract.sdk1.tool.stream import StreamMixin from unstract.sdk1.utils.tool import ToolUtils @@ -14,7 +14,7 @@ def generate_index_key( x2text: str, chunk_size: str, chunk_overlap: str, - tool: BaseTool, + tool: StreamMixin, file_path: str | None = None, file_hash: str | None = None, fs: FileStorage | None = None, diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 0d8978b431..4d8f8d309e 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -86,7 +86,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: if handler_name is None: return ExecutionResult.failure( error=( - f"LegacyExecutor does not support operation " f"'{context.operation}'" + f"LegacyExecutor does not support operation '{context.operation}'" ) ) @@ -105,7 +105,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: result = handler(context) elapsed = time.monotonic() - start logger.info( - "Handler %s completed in %.2fs " "(run_id=%s success=%s)", + "Handler %s completed in %.2fs (run_id=%s success=%s)", handler_name, elapsed, context.run_id, @@ -190,14 +190,13 @@ def _handle_extract(self, context: ExecutionContext) -> ExecutionResult: fs = FileUtils.get_fs_instance(execution_source=execution_source) logger.info( - "Starting text extraction: x2text_adapter=%s file=%s " "run_id=%s", + "Starting text extraction: x2text_adapter=%s file=%s run_id=%s", x2text_instance_id, Path(file_path).name, context.run_id, ) logger.info( - "HIGHLIGHT_DEBUG _handle_extract: enable_highlight=%s " - "x2text_type=%s file=%s run_id=%s", + "HIGHLIGHT_DEBUG _handle_extract: enable_highlight=%s x2text_type=%s file=%s run_id=%s", enable_highlight, type(x2text.x2text_instance).__name__, Path(file_path).name, @@ -351,7 +350,7 @@ def _handle_ide_index(self, context: ExecutionContext) -> ExecutionResult: if not index_params: missing.append("index_params") return ExecutionResult.failure( - error=f"ide_index missing required params: " f"{', '.join(missing)}" + error=f"ide_index missing required params: {', '.join(missing)}" ) # Step 1: Extract @@ -685,7 +684,7 @@ def _run_pipeline_index( indexing_start = datetime.datetime.now() logger.info( - "Pipeline indexing: chunk_size=%s " "chunk_overlap=%s vector_db=%s", + "Pipeline indexing: chunk_size=%s chunk_overlap=%s vector_db=%s", chunk_size, chunk_overlap, vector_db, @@ -1023,7 +1022,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: context_retrieval_metrics: dict[str, Any] = {} logger.info( - "Starting answer_prompt: tool_id=%s prompt_count=%d " "file=%s run_id=%s", + "Starting answer_prompt: tool_id=%s prompt_count=%d file=%s run_id=%s", tool_id, len(prompts), doc_name, @@ -1284,7 +1283,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: if retrieval_strategy in valid_strategies: shim.stream_log(f"Retrieving context for: {prompt_name}") logger.info( - "Performing retrieval: prompt=%s strategy=%s " "chunk_size=%d", + "Performing retrieval: prompt=%s strategy=%s chunk_size=%d", prompt_name, retrieval_strategy, chunk_size, @@ -1331,7 +1330,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: ) else: logger.warning( - "Skipping retrieval: invalid strategy=%s " "for prompt=%s", + "Skipping retrieval: invalid strategy=%s for prompt=%s", retrieval_strategy, prompt_name, ) @@ -1385,7 +1384,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: ) challenger.run() shim.stream_log( - f"Challenge verification completed" f" for: {prompt_name}" + f"Challenge verification completed for: {prompt_name}" ) logger.info( "Challenge completed: prompt=%s", @@ -1585,7 +1584,7 @@ def _handle_single_pass_extraction( {"output": dict, "metadata": dict, "metrics": dict} """ logger.info( - "single_pass_extraction delegating to answer_prompt " "(run_id=%s)", + "single_pass_extraction delegating to answer_prompt (run_id=%s)", context.run_id, ) return self._handle_answer_prompt(context) @@ -1638,7 +1637,7 @@ def _handle_summarize(self, context: ExecutionContext) -> ExecutionResult: if prompt_keys: prompt += f"Focus on these fields: {', '.join(prompt_keys)}\n\n" prompt += ( - f"Context:\n---------------\n{doc_context}\n" f"-----------------\n\nSummary:" + f"Context:\n---------------\n{doc_context}\n-----------------\n\nSummary:" ) shim = ExecutorToolShim( diff --git a/workers/executor/executors/variable_replacement.py b/workers/executor/executors/variable_replacement.py index cca158cba0..023d958569 100644 --- a/workers/executor/executors/variable_replacement.py +++ b/workers/executor/executors/variable_replacement.py @@ -48,7 +48,7 @@ def check_static_variable_run_status( output = structure_output[variable] except KeyError: logger.warning( - "Prompt with %s is not executed yet. " "Unable to replace the variable", + "Prompt with %s is not executed yet. Unable to replace the variable", variable, ) return output diff --git a/workers/executor/tasks.py b/workers/executor/tasks.py index a729870e1c..766ef86343 100644 --- a/workers/executor/tasks.py +++ b/workers/executor/tasks.py @@ -82,13 +82,6 @@ def execute_extraction(self, execution_context_dict: dict) -> dict: "doc_name": str(pipeline_opts.get("source_file_name", "")), "operation": context.operation, } - elif context.operation in ("table_extract", "smart_table_extract"): - context._log_component = { - "tool_id": params.get("tool_id", ""), - "run_id": context.run_id, - "doc_name": str(params.get("file_name", "")), - "operation": context.operation, - } else: context._log_component = { "tool_id": params.get("tool_id", ""), @@ -103,7 +96,7 @@ def execute_extraction(self, execution_context_dict: dict) -> dict: result = orchestrator.execute(context) logger.info( - "execute_extraction complete: " "celery_task_id=%s request_id=%s success=%s", + "execute_extraction complete: celery_task_id=%s request_id=%s success=%s", self.request.id, context.request_id, result.success, diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index e080d1cde8..279e24ba3f 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -308,7 +308,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: ) if skip_extraction_and_indexing: logger.info( - "Skipping extraction and indexing for Excel table " "with valid JSON schema" + "Skipping extraction and indexing for Excel table with valid JSON schema" ) # ---- Step 5: Build pipeline params ---- diff --git a/workers/tests/test_phase5d.py b/workers/tests/test_phase5d.py index c5b0a0640a..0a0489b5dc 100644 --- a/workers/tests/test_phase5d.py +++ b/workers/tests/test_phase5d.py @@ -670,7 +670,6 @@ def test_index_dedup_skips_duplicate_params(self, executor): ) ) index_call_count = 0 - original_index = executor._handle_index def counting_index(ctx): nonlocal index_call_count diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index 30f88565a4..df835e0643 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -214,10 +214,10 @@ def test_structure_tool_single_dispatch( @patch(_PATCH_DISPATCHER) def test_pipeline_params_structure( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -235,7 +235,7 @@ def test_pipeline_params_structure( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result() execute_structure_tool(base_params) diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py index a7da15d1fb..71e6775902 100644 --- a/workers/tests/test_sanity_phase5.py +++ b/workers/tests/test_sanity_phase5.py @@ -112,17 +112,17 @@ def _mock_prompt_deps(llm=None): from executor.executors.answer_prompt import AnswerPromptService - RetrievalService = MagicMock(name="RetrievalService") - RetrievalService.run_retrieval.return_value = ["chunk1"] - RetrievalService.retrieve_complete_context.return_value = ["full doc"] + retrieval_service = MagicMock(name="RetrievalService") + retrieval_service.run_retrieval.return_value = ["chunk1"] + retrieval_service.retrieve_complete_context.return_value = ["full doc"] - VariableReplacementService = MagicMock(name="VariableReplacementService") - VariableReplacementService.is_variables_present.return_value = False + variable_replacement_service = MagicMock(name="VariableReplacementService") + variable_replacement_service.is_variables_present.return_value = False - Index = MagicMock(name="Index") + index_cls = MagicMock(name="Index") index_instance = MagicMock() index_instance.generate_index_key.return_value = "doc-key-1" - Index.return_value = index_instance + index_cls.return_value = index_instance LLM_cls = MagicMock(name="LLM") LLM_cls.return_value = llm @@ -132,9 +132,9 @@ def _mock_prompt_deps(llm=None): return ( AnswerPromptService, - RetrievalService, - VariableReplacementService, - Index, + retrieval_service, + variable_replacement_service, + index_cls, LLM_cls, EmbeddingCompat, VectorDB, diff --git a/workers/tests/test_sanity_phase6a.py b/workers/tests/test_sanity_phase6a.py index d35833fc2c..4c49c7407a 100644 --- a/workers/tests/test_sanity_phase6a.py +++ b/workers/tests/test_sanity_phase6a.py @@ -292,7 +292,7 @@ def test_runtime_checkable(self): class FakeChallenge: def run(self): - pass + pass # Minimal stub to satisfy ChallengeProtocol for isinstance check assert isinstance(FakeChallenge(), ChallengeProtocol) diff --git a/workers/tests/test_sanity_phase6d.py b/workers/tests/test_sanity_phase6d.py index 91cc8cf72c..cd40c1b685 100644 --- a/workers/tests/test_sanity_phase6d.py +++ b/workers/tests/test_sanity_phase6d.py @@ -525,7 +525,6 @@ def test_challenge_mutates_structured_output( def challenge_run_side_effect(): # Simulate challenge replacing the answer with improved version - challenger_instance = mock_challenge_cls.return_value # Access the structured_output passed to constructor so = mock_challenge_cls.call_args.kwargs["structured_output"] so["field1"] = "improved_42" diff --git a/workers/tests/test_sanity_phase6e.py b/workers/tests/test_sanity_phase6e.py index 85d4d60d65..302540b666 100644 --- a/workers/tests/test_sanity_phase6e.py +++ b/workers/tests/test_sanity_phase6e.py @@ -163,7 +163,7 @@ def test_dispatch_sends_to_table_queue(self): execution_source="tool", executor_params={"table_settings": {}}, ) - result = dispatcher.dispatch(ctx) + dispatcher.dispatch(ctx) mock_app.send_task.assert_called_once() call_kwargs = mock_app.send_task.call_args diff --git a/workers/tests/test_sanity_phase6f.py b/workers/tests/test_sanity_phase6f.py index 4a8432f6ef..cf565e692f 100644 --- a/workers/tests/test_sanity_phase6f.py +++ b/workers/tests/test_sanity_phase6f.py @@ -155,7 +155,7 @@ def test_dispatch_sends_to_smart_table_queue(self): execution_source="tool", executor_params={"table_settings": {}}, ) - result = dispatcher.dispatch(ctx) + dispatcher.dispatch(ctx) mock_app.send_task.assert_called_once() call_kwargs = mock_app.send_task.call_args diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py index 8b175f8eec..fe8da04832 100644 --- a/workers/tests/test_sanity_phase6g.py +++ b/workers/tests/test_sanity_phase6g.py @@ -148,7 +148,7 @@ def test_dispatch_sends_to_sps_queue(self): execution_source="tool", executor_params={"tool_settings": {}, "output": {}}, ) - result = dispatcher.dispatch(ctx) + dispatcher.dispatch(ctx) mock_app.send_task.assert_called_once() call_kwargs = mock_app.send_task.call_args @@ -170,7 +170,7 @@ def test_dispatch_sps_index_to_correct_queue(self, tmp_path): execution_source="tool", executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")}, ) - result = dispatcher.dispatch(ctx) + dispatcher.dispatch(ctx) mock_app.send_task.assert_called_once() call_kwargs = mock_app.send_task.call_args diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py index c4249fb05a..c4f31bcbc2 100644 --- a/workers/tests/test_sanity_phase6h.py +++ b/workers/tests/test_sanity_phase6h.py @@ -208,7 +208,7 @@ def test_structure_tool_dispatches_agentic_extract(self, tmp_path): success=True, data={"output": {"field": "value"}} ) - result = _run_agentic_extraction( + _run_agentic_extraction( tool_metadata={"name": "test"}, input_file_path=str(tmp_path / "test.pdf"), output_dir_path=str(tmp_path / "output"), diff --git a/workers/tests/test_sanity_phase6i.py b/workers/tests/test_sanity_phase6i.py index 4de0e8f662..635dfa7ca3 100644 --- a/workers/tests/test_sanity_phase6i.py +++ b/workers/tests/test_sanity_phase6i.py @@ -119,16 +119,16 @@ class TestSummarizeResultShape: @patch(_PATCH_GET_PROMPT_DEPS) def test_summarize_returns_data_key(self, mock_deps): """_handle_summarize returns ExecutionResult with data.data = str.""" - mock_LLM = MagicMock() + mock_llm = MagicMock() mock_llm_instance = MagicMock() - mock_LLM.return_value = mock_llm_instance + mock_llm.return_value = mock_llm_instance mock_deps.return_value = ( MagicMock(), # RetrievalService MagicMock(), # PostProcessor MagicMock(), # VariableReplacement MagicMock(), # JsonRepair - mock_LLM, # LLM + mock_llm, # LLM MagicMock(), # Embedding MagicMock(), # VectorDB ) @@ -234,13 +234,13 @@ class TestSummarizeCeleryChain: @patch(_PATCH_GET_PROMPT_DEPS) def test_summarize_full_celery_chain(self, mock_deps, eager_app): """Summarize through full Celery task chain.""" - mock_LLM = MagicMock() + mock_llm = MagicMock() mock_llm_instance = MagicMock() - mock_LLM.return_value = mock_llm_instance + mock_llm.return_value = mock_llm_instance mock_deps.return_value = ( MagicMock(), MagicMock(), MagicMock(), MagicMock(), - mock_LLM, MagicMock(), MagicMock(), + mock_llm, MagicMock(), MagicMock(), ) with patch( diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py index c52dcdf490..c4e7c6631e 100644 --- a/workers/tests/test_sanity_phase6j.py +++ b/workers/tests/test_sanity_phase6j.py @@ -502,21 +502,7 @@ def test_legacy_works_without_cloud_executors(self, eager_app): # Only legacy should be in registry assert ExecutorRegistry.list_executors() == ["legacy"] - # Legacy operations still work - ctx = ExecutionContext( - executor_name="legacy", - operation="extract", - run_id="run-degrade", - execution_source="tool", - executor_params={ - "tool_id": "t-1", - "file_name": "test.pdf", - "file_hash": "abc", - "PLATFORM_SERVICE_API_KEY": "key", - }, - ) - # This will fail at the handler level (no mocks), but it should - # route correctly and NOT fail at registry/dispatch level + # Legacy executor can be retrieved from the registry executor = ExecutorRegistry.get("legacy") assert executor is not None assert executor.name == "legacy" @@ -595,13 +581,6 @@ def _build_log_component(self, operation, executor_params=None): "doc_name": str(pipeline_opts.get("source_file_name", "")), "operation": ctx.operation, } - elif ctx.operation in ("table_extract", "smart_table_extract"): - return { - "tool_id": params.get("tool_id", ""), - "run_id": ctx.run_id, - "doc_name": str(params.get("file_name", "")), - "operation": ctx.operation, - } else: return { "tool_id": params.get("tool_id", ""), From f59755a70d25330275d448063f8367b16d3444c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 17:09:29 +0000 Subject: [PATCH 50/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/executor/executors/legacy_executor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 4d8f8d309e..543d9b64a5 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -85,9 +85,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: handler_name = self._OPERATION_MAP.get(context.operation) if handler_name is None: return ExecutionResult.failure( - error=( - f"LegacyExecutor does not support operation '{context.operation}'" - ) + error=(f"LegacyExecutor does not support operation '{context.operation}'") ) handler = getattr(self, handler_name) From 4bf97367aebe75985568826f21840a2281cb1cd1 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Wed, 11 Mar 2026 22:49:17 +0530 Subject: [PATCH 51/64] UN-3266 fix: wrap long log message in dispatcher.py to fix E501 Co-Authored-By: Claude Sonnet 4.6 --- unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py index 087ba111f3..54ed4b0f10 100644 --- a/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py +++ b/unstract/sdk1/src/unstract/sdk1/execution/dispatcher.py @@ -120,7 +120,8 @@ def dispatch( queue = self._get_queue(context.executor_name) logger.info( - "Dispatching execution: executor=%s operation=%s run_id=%s request_id=%s timeout=%ss queue=%s", + "Dispatching execution: executor=%s operation=%s" + " run_id=%s request_id=%s timeout=%ss queue=%s", context.executor_name, context.operation, context.run_id, From 053187059adc9ea2b20f12ffcd62552b5ac8c77d Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 00:32:07 +0530 Subject: [PATCH 52/64] UN-3266 fix: resolve remaining SonarCloud S117 naming violations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename PascalCase local variables to snake_case to comply with S117: - legacy_executor.py: rename tuple-unpacked _get_prompt_deps() results (AnswerPromptService→answer_prompt_svc, RetrievalService→retrieval_svc, VariableReplacementService→variable_replacement_svc, LLM→llm_cls, EmbeddingCompat→embedding_compat_cls, VectorDB→vector_db_cls) and update all downstream usages including _apply_type_conversion and _handle_summarize - test_phase1_log_streaming.py: rename Mock* local variables to mock_* snake_case equivalents - test_sanity_phase3.py: rename MockDispatcher→mock_dispatcher_cls and MockShim→mock_shim_cls across all 10 test methods - test_sanity_phase5.py: rename MockShim→mock_shim, MockX2Text→mock_x2text in 6 test methods; MockDispatcher→mock_dispatcher_cls in dispatch test; fix LLM_cls→llm_cls, EmbeddingCompat→embedding_compat_cls, VectorDB→vector_db_cls in _mock_prompt_deps helper Co-Authored-By: Claude Sonnet 4.6 --- workers/executor/executors/legacy_executor.py | 62 ++++++++--------- workers/tests/test_phase1_log_streaming.py | 36 +++++----- workers/tests/test_sanity_phase3.py | 66 +++++++++---------- workers/tests/test_sanity_phase5.py | 60 ++++++++--------- 4 files changed, 112 insertions(+), 112 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 543d9b64a5..999e5d3109 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -853,11 +853,11 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: f"Configured chunking: size={chunk_size}, overlap={chunk_overlap}" ) - Index, EmbeddingCompat, VectorDB = self._get_indexing_deps() + index_cls, embedding_compat, vector_db_cls = self._get_indexing_deps() vector_db = None try: - index = Index( + index = index_cls( tool=shim, run_id=context.run_id, capture_metrics=True, @@ -869,12 +869,12 @@ def _handle_index(self, context: ExecutionContext) -> ExecutionResult: logger.debug("Generated index key: doc_id=%s", doc_id) shim.stream_log("Checking document index status...") - embedding = EmbeddingCompat( + embedding = embedding_compat( adapter_instance_id=embedding_instance_id, tool=shim, kwargs={**usage_kwargs}, ) - vector_db = VectorDB( + vector_db = vector_db_cls( tool=shim, adapter_instance_id=vector_db_instance_id, embedding=embedding, @@ -1029,13 +1029,13 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: # Lazy imports ( - AnswerPromptService, - RetrievalService, - VariableReplacementService, - _Index, # unused — doc_id via IndexingUtils - LLM, - EmbeddingCompat, - VectorDB, + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + _index_cls, # unused — doc_id via IndexingUtils + llm_cls, + embedding_compat_cls, + vector_db_cls, ) = self._get_prompt_deps() # ---- Initialize highlight plugin (if enabled + installed) ---------- @@ -1130,9 +1130,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: shim.stream_log(f"Processing prompt: {prompt_name}") # {{variable}} template replacement - if VariableReplacementService.is_variables_present(prompt_text=prompt_text): + if variable_replacement_svc.is_variables_present(prompt_text=prompt_text): is_ide = execution_source == "ide" - prompt_text = VariableReplacementService.replace_variables_in_prompt( + prompt_text = variable_replacement_svc.replace_variables_in_prompt( prompt=output, structured_output=structured_output, log_events_id=log_events_id, @@ -1152,7 +1152,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: ) # %variable% replacement - output[PSKeys.PROMPTX] = AnswerPromptService.extract_variable( + output[PSKeys.PROMPTX] = answer_prompt_svc.extract_variable( structured_output, variable_names, output, prompt_text ) @@ -1240,7 +1240,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: "run_id": run_id, "execution_id": execution_id, } - llm = LLM( + llm = llm_cls( adapter_instance_id=output[PSKeys.LLM], tool=shim, usage_kwargs={ @@ -1252,12 +1252,12 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: embedding = None vector_db = None if chunk_size > 0: - embedding = EmbeddingCompat( + embedding = embedding_compat_cls( adapter_instance_id=output[PSKeys.EMBEDDING], tool=shim, kwargs={**usage_kwargs}, ) - vector_db = VectorDB( + vector_db = vector_db_cls( tool=shim, adapter_instance_id=output[PSKeys.VECTOR_DB], embedding=embedding, @@ -1287,14 +1287,14 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: chunk_size, ) if chunk_size == 0: - context_list = RetrievalService.retrieve_complete_context( + context_list = retrieval_svc.retrieve_complete_context( execution_source=execution_source, file_path=file_path, context_retrieval_metrics=context_retrieval_metrics, prompt_key=prompt_name, ) else: - context_list = RetrievalService.run_retrieval( + context_list = retrieval_svc.run_retrieval( output=output, doc_id=doc_id, llm=llm, @@ -1315,7 +1315,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: # Run prompt with retrieved context shim.stream_log(f"Running LLM completion for: {prompt_name}") - answer = AnswerPromptService.construct_and_run_prompt( + answer = answer_prompt_svc.construct_and_run_prompt( tool_settings=tool_settings, output=output, llm=llm, @@ -1360,7 +1360,7 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: challenge_llm_id = tool_settings.get(PSKeys.CHALLENGE_LLM) if challenge_llm_id: shim.stream_log(f"Running challenge for: {prompt_name}") - challenge_llm = LLM( + challenge_llm = llm_cls( adapter_instance_id=challenge_llm_id, tool=shim, usage_kwargs={ @@ -1472,7 +1472,7 @@ def _apply_type_conversion( Handles NUMBER, EMAIL, DATE, BOOLEAN, JSON, and TEXT types. """ - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc from executor.executors.constants import PromptServiceConstants as PSKeys prompt_name = output[PSKeys.NAME] @@ -1494,7 +1494,7 @@ def _apply_type_conversion( f"characters. No explanation is required. " f"If you cannot extract the number, output 0." ) - answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) try: structured_output[prompt_name] = float(answer) except Exception: @@ -1511,7 +1511,7 @@ def _apply_type_conversion( f"variable. No explanation is required. If you cannot " f'extract the email, output "NA".' ) - answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer elif output_type == PSKeys.DATE: @@ -1527,7 +1527,7 @@ def _apply_type_conversion( f"If you cannot convert the string into a date, " f'output "NA".' ) - answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer elif output_type == PSKeys.BOOLEAN: @@ -1540,11 +1540,11 @@ def _apply_type_conversion( f"If the context is trying to convey that the answer " f'is true, then return "yes", else return "no".' ) - answer = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) structured_output[prompt_name] = answer.lower() == "yes" elif output_type == PSKeys.JSON: - AnswerPromptService.handle_json( + answer_prompt_svc.handle_json( answer=answer, structured_output=structured_output, output=output, @@ -1645,19 +1645,19 @@ def _handle_summarize(self, context: ExecutionContext) -> ExecutionResult: ) usage_kwargs = {"run_id": context.run_id} - _, _, _, _, LLM, _, _ = self._get_prompt_deps() + _, _, _, _, llm_cls, _, _ = self._get_prompt_deps() shim.stream_log("Initializing LLM for summarization...") try: - llm = LLM( + llm = llm_cls( adapter_instance_id=llm_adapter_id, tool=shim, usage_kwargs={**usage_kwargs}, ) - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc shim.stream_log("Running document summarization...") - summary = AnswerPromptService.run_completion(llm=llm, prompt=prompt) + summary = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) logger.info("Summarization completed: run_id=%s", context.run_id) shim.stream_log("Summarization completed") return ExecutionResult( diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index 95de9b21bc..9c063e19de 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -413,26 +413,26 @@ def test_answer_prompt_enriches_component_with_prompt_key( mock_shim_cls.return_value = mock_shim # Mock prompt deps - MockAnswerPromptService = MagicMock() - MockAnswerPromptService.extract_variable.return_value = "prompt text" - MockRetrievalService = MagicMock() - MockVariableReplacementService = MagicMock() - MockVariableReplacementService.is_variables_present.return_value = ( + mock_answer_prompt_service = MagicMock() + mock_answer_prompt_service.extract_variable.return_value = "prompt text" + mock_retrieval_service = MagicMock() + mock_variable_replacement_service = MagicMock() + mock_variable_replacement_service.is_variables_present.return_value = ( False ) - MockIndex = MagicMock() - MockLLM = MagicMock() - MockEmbeddingCompat = MagicMock() - MockVectorDB = MagicMock() + mock_index = MagicMock() + mock_llm = MagicMock() + mock_embedding_compat = MagicMock() + mock_vector_db = MagicMock() mock_prompt_deps.return_value = ( - MockAnswerPromptService, - MockRetrievalService, - MockVariableReplacementService, - MockIndex, - MockLLM, - MockEmbeddingCompat, - MockVectorDB, + mock_answer_prompt_service, + mock_retrieval_service, + mock_variable_replacement_service, + mock_index, + mock_llm, + mock_embedding_compat, + mock_vector_db, ) ctx = ExecutionContext( @@ -474,10 +474,10 @@ def test_answer_prompt_enriches_component_with_prompt_key( ): executor = LegacyExecutor() # The handler will try retrieval which we need to mock - MockRetrievalService.retrieve_complete_context.return_value = [ + mock_retrieval_service.retrieve_complete_context.return_value = [ "context" ] - MockAnswerPromptService.construct_and_run_prompt.return_value = ( + mock_answer_prompt_service.construct_and_run_prompt.return_value = ( "INV-001" ) diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index df835e0643..c57fd45d9a 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -163,10 +163,10 @@ class TestStructureToolPipeline: @patch(_PATCH_DISPATCHER) def test_structure_tool_single_dispatch( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -184,7 +184,7 @@ def test_structure_tool_single_dispatch( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance pipeline_result = _make_pipeline_result( output={"field_a": "$1M"}, @@ -277,10 +277,10 @@ class TestStructureToolSinglePass: @patch(_PATCH_DISPATCHER) def test_structure_tool_single_pass( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -299,7 +299,7 @@ def test_structure_tool_single_pass( base_params["tool_instance_metadata"]["single_pass_extraction_mode"] = True dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "answer"}, ) @@ -324,10 +324,10 @@ class TestStructureToolSummarize: @patch(_PATCH_DISPATCHER) def test_structure_tool_summarize_flow( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -349,7 +349,7 @@ def test_structure_tool_summarize_flow( base_params["tool_instance_metadata"]["summarize_as_source"] = True dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "answer"}, ) @@ -382,10 +382,10 @@ class TestStructureToolSmartTable: @patch(_PATCH_DISPATCHER) def test_structure_tool_skip_extraction_smart_table( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -408,7 +408,7 @@ def test_structure_tool_skip_extraction_smart_table( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "table_answer"}, ) @@ -433,10 +433,10 @@ class TestStructureToolAgentic: @patch(_PATCH_DISPATCHER) def test_structure_tool_agentic_routing( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, mock_fs, mock_platform_helper, @@ -461,7 +461,7 @@ def test_structure_tool_agentic_routing( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance # Simulate successful agentic extraction agentic_result = ExecutionResult( @@ -488,10 +488,10 @@ class TestStructureToolProfileOverrides: @patch(_PATCH_DISPATCHER) def test_structure_tool_profile_overrides( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -514,7 +514,7 @@ def test_structure_tool_profile_overrides( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "answer"}, ) @@ -535,10 +535,10 @@ class TestStructureToolPipelineFailure: @patch(_PATCH_DISPATCHER) def test_structure_tool_pipeline_failure( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -555,7 +555,7 @@ def test_structure_tool_pipeline_failure( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance pipeline_failure = ExecutionResult.failure( error="X2Text adapter error: connection refused" @@ -578,10 +578,10 @@ class TestStructureToolMultipleOutputs: @patch(_PATCH_DISPATCHER) def test_structure_tool_multiple_outputs( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -604,7 +604,7 @@ def test_structure_tool_multiple_outputs( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "a", "field_b": "b"}, ) @@ -630,10 +630,10 @@ class TestStructureToolOutputWritten: @patch(_PATCH_DISPATCHER) def test_structure_tool_output_written( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -650,7 +650,7 @@ def test_structure_tool_output_written( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "answer"}, ) @@ -690,10 +690,10 @@ class TestStructureToolMetadataFileName: @patch(_PATCH_DISPATCHER) def test_structure_tool_metadata_file_name( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -710,7 +710,7 @@ def test_structure_tool_metadata_file_name( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result( output={"field_a": "answer"}, metadata={"run_id": "123", "file_name": "test.pdf"}, @@ -731,10 +731,10 @@ class TestStructureToolNoSummarize: @patch(_PATCH_DISPATCHER) def test_no_summarize_params_when_disabled( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim_cls, base_params, tool_metadata_regular, mock_fs, @@ -751,7 +751,7 @@ def test_no_summarize_params_when_disabled( } dispatcher_instance = MagicMock() - MockDispatcher.return_value = dispatcher_instance + mock_dispatcher_cls.return_value = dispatcher_instance dispatcher_instance.dispatch.return_value = _make_pipeline_result() execute_structure_tool(base_params) diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py index 71e6775902..31675b8f9c 100644 --- a/workers/tests/test_sanity_phase5.py +++ b/workers/tests/test_sanity_phase5.py @@ -110,7 +110,7 @@ def _mock_prompt_deps(llm=None): if llm is None: llm = _mock_llm() - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls retrieval_service = MagicMock(name="RetrievalService") retrieval_service.run_retrieval.return_value = ["chunk1"] @@ -124,20 +124,20 @@ def _mock_prompt_deps(llm=None): index_instance.generate_index_key.return_value = "doc-key-1" index_cls.return_value = index_instance - LLM_cls = MagicMock(name="LLM") - LLM_cls.return_value = llm + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm - EmbeddingCompat = MagicMock(name="EmbeddingCompat") - VectorDB = MagicMock(name="VectorDB") + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") return ( - AnswerPromptService, + answer_prompt_svc_cls, retrieval_service, variable_replacement_service, index_cls, - LLM_cls, - EmbeddingCompat, - VectorDB, + llm_cls, + embedding_compat_cls, + vector_db_cls, ) @@ -253,8 +253,8 @@ class TestIdeIndexEagerChain: @patch(_PATCH_SHIM) def test_ide_index_success( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, mock_index_deps, eager_app, @@ -265,7 +265,7 @@ def test_ide_index_success( x2t_instance.process.return_value = _mock_process_response( "IDE extracted text" ) - MockX2Text.return_value = x2t_instance + mock_x2text.return_value = x2t_instance fs = MagicMock() fs.exists.return_value = False @@ -326,15 +326,15 @@ def test_ide_index_success( @patch(_PATCH_SHIM) def test_ide_index_extract_failure( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, eager_app, ): """ide_index returns failure if extract fails.""" x2t_instance = MagicMock() x2t_instance.process.side_effect = Exception("X2Text unavailable") - MockX2Text.return_value = x2t_instance + mock_x2text.return_value = x2t_instance fs = MagicMock() fs.exists.return_value = False @@ -394,8 +394,8 @@ class TestStructurePipelineEagerChain: @patch(_PATCH_SHIM) def test_structure_pipeline_normal( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, mock_index_deps, mock_prompt_deps, @@ -406,7 +406,7 @@ def test_structure_pipeline_normal( # Mock extract x2t_instance = MagicMock() x2t_instance.process.return_value = _mock_process_response("Revenue is $1M") - MockX2Text.return_value = x2t_instance + mock_x2text.return_value = x2t_instance fs = MagicMock() fs.exists.return_value = False @@ -496,8 +496,8 @@ def test_structure_pipeline_normal( @patch(_PATCH_SHIM) def test_structure_pipeline_single_pass( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, mock_prompt_deps, _mock_idx_utils, @@ -506,7 +506,7 @@ def test_structure_pipeline_single_pass( """Single pass: extract → single_pass_extraction (no index).""" x2t_instance = MagicMock() x2t_instance.process.return_value = _mock_process_response("Revenue data") - MockX2Text.return_value = x2t_instance + mock_x2text.return_value = x2t_instance fs = MagicMock() fs.exists.return_value = False @@ -574,8 +574,8 @@ def test_structure_pipeline_single_pass( @patch(_PATCH_SHIM) def test_structure_pipeline_skip_extraction( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, mock_prompt_deps, _mock_idx_utils, @@ -633,22 +633,22 @@ def test_structure_pipeline_skip_extraction( result = ExecutionResult.from_dict(result_dict) assert result.success # No extract was called (X2Text not mocked beyond fixture) - MockX2Text.assert_not_called() + mock_x2text.assert_not_called() @patch(_PATCH_FS) @patch(_PATCH_X2TEXT) @patch(_PATCH_SHIM) def test_structure_pipeline_extract_failure( self, - MockShim, - MockX2Text, + mock_shim, + mock_x2text, mock_fs, eager_app, ): """Pipeline extract failure propagated as result failure.""" x2t_instance = MagicMock() x2t_instance.process.side_effect = Exception("X2Text timeout") - MockX2Text.return_value = x2t_instance + mock_x2text.return_value = x2t_instance fs = MagicMock() fs.exists.return_value = False @@ -709,10 +709,10 @@ class TestStructureToolSingleDispatch: ) def test_single_dispatch_normal( self, - MockDispatcher, + mock_dispatcher_cls, mock_create_ph, mock_get_fs, - MockShim, + mock_shim, ): """Normal path sends single structure_pipeline dispatch.""" from file_processing.structure_tool_task import ( @@ -754,7 +754,7 @@ def test_single_dispatch_normal( mock_create_ph.return_value = ph dispatcher = MagicMock() - MockDispatcher.return_value = dispatcher + mock_dispatcher_cls.return_value = dispatcher dispatcher.dispatch.return_value = ExecutionResult( success=True, data={"output": {"f1": "ans"}, "metadata": {}, "metrics": {}}, From a2edb23d5232d69e592574c05885996d4a4cc834 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:02:49 +0000 Subject: [PATCH 53/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/executor/executors/legacy_executor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 999e5d3109..ee1c10c938 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1472,7 +1472,9 @@ def _apply_type_conversion( Handles NUMBER, EMAIL, DATE, BOOLEAN, JSON, and TEXT types. """ - from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc + from executor.executors.answer_prompt import ( + AnswerPromptService as answer_prompt_svc, + ) from executor.executors.constants import PromptServiceConstants as PSKeys prompt_name = output[PSKeys.NAME] @@ -1654,7 +1656,9 @@ def _handle_summarize(self, context: ExecutionContext) -> ExecutionResult: tool=shim, usage_kwargs={**usage_kwargs}, ) - from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc + from executor.executors.answer_prompt import ( + AnswerPromptService as answer_prompt_svc, + ) shim.stream_log("Running document summarization...") summary = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) From 3f86131badfbe776dd278ee984d71fde192b0ecf Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 00:47:58 +0530 Subject: [PATCH 54/64] UN-3266 fix: resolve remaining SonarCloud code smells in PR #1849 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_sanity_phase2/4.py, test_answer_prompt.py: rename PascalCase local variables in _mock_prompt_deps/_mock_deps to snake_case (RetrievalService→retrieval_svc, VariableReplacementService→ variable_replacement_svc, Index→index_cls, LLM_cls→llm_cls, EmbeddingCompat→embedding_compat_cls, VectorDB→vector_db_cls, AnswerPromptService→answer_prompt_svc_cls) — fixes S117 - test_sanity_phase3.py: remove unused local variable "result" — fixes S1481 - structure_tool_task.py: remove redundant json.JSONDecodeError from except clause (subclass of ValueError) — fixes S5713 - shared/workflow/execution/service.py: replace generic Exception with RuntimeError for structure tool failure — fixes S112 - run-worker-docker.sh: define EXECUTOR_WORKER_TYPE constant and replace 10 literal "executor" occurrences — fixes S1192 Co-Authored-By: Claude Sonnet 4.6 --- .../file_processing/structure_tool_task.py | 2 +- workers/run-worker-docker.sh | 19 ++++++---- workers/shared/workflow/execution/service.py | 2 +- workers/tests/test_answer_prompt.py | 38 +++++++++---------- workers/tests/test_sanity_phase2.py | 38 +++++++++---------- workers/tests/test_sanity_phase3.py | 2 +- workers/tests/test_sanity_phase4.py | 38 +++++++++---------- 7 files changed, 71 insertions(+), 68 deletions(-) diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 279e24ba3f..838a806d65 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -176,7 +176,7 @@ def _should_skip_extraction_for_smart_table( schema_data = json.loads(prompt) if schema_data and isinstance(schema_data, dict): return True - except (json.JSONDecodeError, ValueError) as e: + except ValueError as e: logger.warning( "Failed to parse prompt as JSON for smart table: %s", e ) diff --git a/workers/run-worker-docker.sh b/workers/run-worker-docker.sh index 16668a919e..10e9c32196 100755 --- a/workers/run-worker-docker.sh +++ b/workers/run-worker-docker.sh @@ -22,6 +22,9 @@ WORKERS_DIR="/app" # Default environment file ENV_FILE="/app/.env" +# Worker type constant for the executor worker +readonly EXECUTOR_WORKER_TYPE="executor" + # Available core workers (OSS) declare -A WORKERS=( ["api"]="api_deployment" @@ -35,7 +38,7 @@ declare -A WORKERS=( ["log-consumer"]="log_consumer" ["scheduler"]="scheduler" ["schedule"]="scheduler" - ["executor"]="executor" + ["${EXECUTOR_WORKER_TYPE}"]="${EXECUTOR_WORKER_TYPE}" ["all"]="all" ) @@ -52,7 +55,7 @@ declare -A WORKER_QUEUES=( ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["log_consumer"]="celery_log_task_queue" ["scheduler"]="scheduler" - ["executor"]="celery_executor_legacy" + ["${EXECUTOR_WORKER_TYPE}"]="celery_executor_legacy" ) # Worker health ports @@ -64,7 +67,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" - ["executor"]="8088" + ["${EXECUTOR_WORKER_TYPE}"]="8088" ) # Function to print colored output @@ -199,7 +202,7 @@ detect_worker_type_from_args() { *"notifications"*) echo "notification" ;; *"celery_log_task_queue"*) echo "log_consumer" ;; *"scheduler"*) echo "scheduler" ;; - *"executor"*) echo "executor" ;; + *"${EXECUTOR_WORKER_TYPE}"*) echo "${EXECUTOR_WORKER_TYPE}" ;; *"celery"*) echo "general" ;; *) echo "general" ;; # fallback esac @@ -263,7 +266,7 @@ run_worker() { "scheduler") queues="${CELERY_QUEUES_SCHEDULER:-$queues}" ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") queues="${CELERY_QUEUES_EXECUTOR:-$queues}" ;; esac @@ -301,7 +304,7 @@ run_worker() { export SCHEDULER_HEALTH_PORT="${health_port}" export SCHEDULER_METRICS_PORT="${health_port}" ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") export EXECUTOR_HEALTH_PORT="${health_port}" export EXECUTOR_METRICS_PORT="${health_port}" ;; @@ -337,7 +340,7 @@ run_worker() { "scheduler") concurrency="${WORKER_SCHEDULER_CONCURRENCY:-2}" ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") concurrency="${WORKER_EXECUTOR_CONCURRENCY:-2}" ;; *) @@ -548,7 +551,7 @@ if [[ "$1" == *"celery"* ]] || [[ "$1" == *".venv"* ]]; then export SCHEDULER_HEALTH_PORT="8087" export SCHEDULER_METRICS_PORT="8087" ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") export EXECUTOR_HEALTH_PORT="8088" export EXECUTOR_METRICS_PORT="8088" ;; diff --git a/workers/shared/workflow/execution/service.py b/workers/shared/workflow/execution/service.py index f605c02014..0f375846ae 100644 --- a/workers/shared/workflow/execution/service.py +++ b/workers/shared/workflow/execution/service.py @@ -1066,7 +1066,7 @@ def _execute_structure_tool_workflow( result = _execute_structure_tool(params) if not result.get("success"): - raise Exception( + raise RuntimeError( f"Structure tool failed: {result.get('error', 'Unknown error')}" ) diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py index f38454936f..4b6e53f90e 100644 --- a/workers/tests/test_answer_prompt.py +++ b/workers/tests/test_answer_prompt.py @@ -109,34 +109,34 @@ def _mock_deps(llm=None): llm = _mock_llm() # AnswerPromptService — use the real class - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls - RetrievalService = MagicMock(name="RetrievalService") - RetrievalService.run_retrieval.return_value = ["chunk1", "chunk2"] - RetrievalService.retrieve_complete_context.return_value = ["full content"] + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1", "chunk2"] + retrieval_svc.retrieve_complete_context.return_value = ["full content"] - VariableReplacementService = MagicMock(name="VariableReplacementService") - VariableReplacementService.is_variables_present.return_value = False + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False - Index = MagicMock(name="Index") + index_cls = MagicMock(name="Index") index_instance = MagicMock() index_instance.generate_index_key.return_value = "doc-id-1" - Index.return_value = index_instance + index_cls.return_value = index_instance - LLM_cls = MagicMock(name="LLM") - LLM_cls.return_value = llm + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm - EmbeddingCompat = MagicMock(name="EmbeddingCompat") - VectorDB = MagicMock(name="VectorDB") + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") return ( - AnswerPromptService, - RetrievalService, - VariableReplacementService, - Index, - LLM_cls, - EmbeddingCompat, - VectorDB, + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, ) diff --git a/workers/tests/test_sanity_phase2.py b/workers/tests/test_sanity_phase2.py index 2aaeb81730..18a87e51d3 100644 --- a/workers/tests/test_sanity_phase2.py +++ b/workers/tests/test_sanity_phase2.py @@ -121,34 +121,34 @@ def _mock_prompt_deps(llm=None): if llm is None: llm = _mock_llm() - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls - RetrievalService = MagicMock(name="RetrievalService") - RetrievalService.run_retrieval.return_value = ["chunk1", "chunk2"] - RetrievalService.retrieve_complete_context.return_value = ["full content"] + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1", "chunk2"] + retrieval_svc.retrieve_complete_context.return_value = ["full content"] - VariableReplacementService = MagicMock(name="VariableReplacementService") - VariableReplacementService.is_variables_present.return_value = False + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False - Index = MagicMock(name="Index") + index_cls = MagicMock(name="Index") index_instance = MagicMock() index_instance.generate_index_key.return_value = "doc-id-sanity" - Index.return_value = index_instance + index_cls.return_value = index_instance - LLM_cls = MagicMock(name="LLM") - LLM_cls.return_value = llm + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm - EmbeddingCompat = MagicMock(name="EmbeddingCompat") - VectorDB = MagicMock(name="VectorDB") + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") return ( - AnswerPromptService, - RetrievalService, - VariableReplacementService, - Index, - LLM_cls, - EmbeddingCompat, - VectorDB, + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, ) diff --git a/workers/tests/test_sanity_phase3.py b/workers/tests/test_sanity_phase3.py index c57fd45d9a..f2a598024c 100644 --- a/workers/tests/test_sanity_phase3.py +++ b/workers/tests/test_sanity_phase3.py @@ -470,7 +470,7 @@ def test_structure_tool_agentic_routing( ) dispatcher_instance.dispatch.return_value = agentic_result - result = execute_structure_tool(base_params) + execute_structure_tool(base_params) # Should dispatch to agentic executor with agentic_extract operation calls = dispatcher_instance.dispatch.call_args_list diff --git a/workers/tests/test_sanity_phase4.py b/workers/tests/test_sanity_phase4.py index 2d5e72715c..7e94489f3d 100644 --- a/workers/tests/test_sanity_phase4.py +++ b/workers/tests/test_sanity_phase4.py @@ -120,34 +120,34 @@ def _mock_prompt_deps(llm=None): if llm is None: llm = _mock_llm() - from executor.executors.answer_prompt import AnswerPromptService + from executor.executors.answer_prompt import AnswerPromptService as answer_prompt_svc_cls - RetrievalService = MagicMock(name="RetrievalService") - RetrievalService.run_retrieval.return_value = ["chunk1"] - RetrievalService.retrieve_complete_context.return_value = ["full doc"] + retrieval_svc = MagicMock(name="RetrievalService") + retrieval_svc.run_retrieval.return_value = ["chunk1"] + retrieval_svc.retrieve_complete_context.return_value = ["full doc"] - VariableReplacementService = MagicMock(name="VariableReplacementService") - VariableReplacementService.is_variables_present.return_value = False + variable_replacement_svc = MagicMock(name="VariableReplacementService") + variable_replacement_svc.is_variables_present.return_value = False - Index = MagicMock(name="Index") + index_cls = MagicMock(name="Index") index_instance = MagicMock() index_instance.generate_index_key.return_value = "doc-ide-key" - Index.return_value = index_instance + index_cls.return_value = index_instance - LLM_cls = MagicMock(name="LLM") - LLM_cls.return_value = llm + llm_cls = MagicMock(name="LLM") + llm_cls.return_value = llm - EmbeddingCompat = MagicMock(name="EmbeddingCompat") - VectorDB = MagicMock(name="VectorDB") + embedding_compat_cls = MagicMock(name="EmbeddingCompat") + vector_db_cls = MagicMock(name="VectorDB") return ( - AnswerPromptService, - RetrievalService, - VariableReplacementService, - Index, - LLM_cls, - EmbeddingCompat, - VectorDB, + answer_prompt_svc_cls, + retrieval_svc, + variable_replacement_svc, + index_cls, + llm_cls, + embedding_compat_cls, + vector_db_cls, ) From 45e61c42b9992bc6eaba04a4ec9ba6ba11973bb4 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 01:27:09 +0530 Subject: [PATCH 55/64] UN-3266 fix: resolve SonarCloud cognitive complexity and code smell violations - Reduce cognitive complexity in answer_prompt.py: - Extract _build_grammar_notes, _run_webhook_postprocess helpers - _is_safe_public_url: extracted _resolve_host_addresses helper - handle_json: early-return pattern eliminates nesting - construct_prompt: delegates grammar loop to _build_grammar_notes - Reduce cognitive complexity in legacy_executor.py: - Extract _execute_single_prompt, _run_table_extraction helpers - Extract _run_challenge_if_enabled, _run_evaluation_if_enabled - Extract _inject_table_settings, _finalize_pipeline_result - Extract _convert_number_answer, _convert_scalar_answer - Extract _sanitize_dict_values helper - _handle_answer_prompt CC reduced from 50 to ~7 - Reduce CC in structure_tool_task.py: guard-clause refactor - Reduce CC in backend: dto.py, deployment_helper.py, api_deployment_views.py, prompt_studio_helper.py - Fix S117: rename PascalCase local vars in test_answer_prompt.py - Fix S1192: extract EXECUTOR_WORKER_TYPE constant in run-worker.sh - Fix S1172: remove unused params from structure_tool_task.py - Fix S5713: remove redundant JSONDecodeError in json_repair_helper.py - Fix S112/S5727 in test_execution.py Co-Authored-By: Claude Sonnet 4.6 --- backend/api_v2/api_deployment_views.py | 34 +- backend/api_v2/deployment_helper.py | 138 ++- .../prompt_studio_helper.py | 49 +- backend/workflow_manager/workflow_v2/dto.py | 22 +- unstract/sdk1/tests/test_execution.py | 5 +- workers/executor/executors/answer_prompt.py | 187 ++-- .../executor/executors/json_repair_helper.py | 2 +- workers/executor/executors/legacy_executor.py | 912 ++++++++++-------- .../file_processing/structure_tool_task.py | 32 +- workers/run-worker.sh | 13 +- workers/tests/test_answer_prompt.py | 12 +- 11 files changed, 799 insertions(+), 607 deletions(-) diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index 33860425f0..e636ca01ec 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -3,7 +3,6 @@ import uuid from typing import Any -from configuration.models import Configuration from django.db.models import F, OuterRef, QuerySet, Subquery from django.http import HttpResponse from permissions.permission import IsOwner, IsOwnerOrSharedUserOrSharedToOrg @@ -211,38 +210,15 @@ def get( status=status.HTTP_422_UNPROCESSABLE_ENTITY, ) - # Process completed execution response_status = status.HTTP_422_UNPROCESSABLE_ENTITY if execution_status_value == CeleryTaskState.COMPLETED.value: response_status = status.HTTP_200_OK - # Ensure workflow identification keys are always in item metadata - api_deployment = deployment_execution_dto.api - organization = api_deployment.organization if api_deployment else None - org_id = str(organization.organization_id) if organization else "" - DeploymentHelper._enrich_result_with_workflow_metadata( - response, organization_id=org_id + DeploymentHelper.process_completed_execution( + response=response, + deployment_execution_dto=deployment_execution_dto, + include_metadata=include_metadata, + include_metrics=include_metrics, ) - # Check if highlight data should be removed using configuration registry - enable_highlight = False # Safe default if the key is unavailable (e.g., OSS) - # Check if the configuration key exists (Cloud deployment) or use settings (OSS) - from configuration.config_registry import ConfigurationRegistry - - if ConfigurationRegistry.is_config_key_available( - "ENABLE_HIGHLIGHT_API_DEPLOYMENT" - ): - enable_highlight = Configuration.get_value_by_organization( - config_key="ENABLE_HIGHLIGHT_API_DEPLOYMENT", - organization=organization, - ) - if not enable_highlight: - response.remove_result_metadata_keys(["highlight_data"]) - response.remove_result_metadata_keys(["extracted_text"]) - if include_metadata or include_metrics: - DeploymentHelper._enrich_result_with_usage_metadata(response) - if not include_metadata and not include_metrics: - response.remove_inner_result_metadata() - if not include_metrics: - response.remove_result_metrics() return Response( data={ "status": response.execution_status, diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 8971f21c62..c41f894b7a 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -298,6 +298,30 @@ def execute_workflow( ) return APIExecutionResponseSerializer(result).data + @staticmethod + def _enrich_item_inner_metadata(item: dict, file_exec_id: str, UsageHelper: Any) -> None: + """Inject per-model usage breakdown into item['result']['metadata'].""" + inner_result = item.get("result") + if not isinstance(inner_result, dict): + return + metadata = inner_result.get("metadata") + if not isinstance(metadata, dict): + return + usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) + if usage_by_model: + metadata.update(usage_by_model) + + @staticmethod + def _enrich_item_top_metadata(item: dict, file_exec_id: str, UsageHelper: Any) -> None: + """Inject aggregated usage totals into item['metadata']['usage'].""" + item_metadata = item.get("metadata") + if not isinstance(item_metadata, dict): + return + aggregated = UsageHelper.get_aggregated_token_count(file_exec_id) + if aggregated: + aggregated["file_execution_id"] = file_exec_id + item_metadata["usage"] = aggregated + @staticmethod def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: """Enrich each file result's metadata with usage data. @@ -319,23 +343,38 @@ def _enrich_result_with_usage_metadata(result: ExecutionResponse) -> None: file_exec_id = item.get("file_execution_id") if not file_exec_id: continue + DeploymentHelper._enrich_item_inner_metadata(item, file_exec_id, UsageHelper) + DeploymentHelper._enrich_item_top_metadata(item, file_exec_id, UsageHelper) - # Enrich inner result metadata with per-model breakdown - inner_result = item.get("result") - if isinstance(inner_result, dict): - metadata = inner_result.get("metadata") - if isinstance(metadata, dict): - usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) - if usage_by_model: - metadata.update(usage_by_model) - - # Enrich top-level item metadata with aggregated usage - item_metadata = item.get("metadata") - if isinstance(item_metadata, dict): - aggregated = UsageHelper.get_aggregated_token_count(file_exec_id) - if aggregated: - aggregated["file_execution_id"] = file_exec_id - item_metadata["usage"] = aggregated + @staticmethod + def _enrich_item_workflow_metadata( + item: dict, + file_exec_id: str, + fe_lookup: dict, + workflow_execution: Any, + organization_id: str, + tag_names: list[str], + ) -> None: + """Populate workflow identification keys into item['metadata'].""" + if not isinstance(item.get("metadata"), dict): + item["metadata"] = {} + metadata = item["metadata"] + fe = fe_lookup.get(str(file_exec_id)) + we = fe.workflow_execution if fe else workflow_execution + if fe: + metadata.setdefault("source_name", fe.file_name) + metadata.setdefault("source_hash", fe.file_hash or "") + metadata.setdefault("file_execution_id", str(fe.id)) + metadata.setdefault("total_elapsed_time", fe.execution_time) + if we: + metadata.setdefault("workflow_id", str(we.workflow_id)) + metadata.setdefault("execution_id", str(we.id)) + metadata.setdefault( + "workflow_start_time", + we.created_at.timestamp() if we.created_at else None, + ) + metadata.setdefault("organization_id", organization_id) + metadata.setdefault("tags", tag_names) @staticmethod def _enrich_result_with_workflow_metadata( @@ -384,31 +423,14 @@ def _enrich_result_with_workflow_metadata( file_exec_id = item.get("file_execution_id") if not file_exec_id: continue - - # Ensure metadata dict exists - if not isinstance(item.get("metadata"), dict): - item["metadata"] = {} - metadata = item["metadata"] - - fe = fe_lookup.get(str(file_exec_id)) - we = fe.workflow_execution if fe else workflow_execution - - # Fill MISSING keys only (setdefault won't overwrite) - if fe: - metadata.setdefault("source_name", fe.file_name) - metadata.setdefault("source_hash", fe.file_hash or "") - metadata.setdefault("file_execution_id", str(fe.id)) - metadata.setdefault("total_elapsed_time", fe.execution_time) - if we: - metadata.setdefault("workflow_id", str(we.workflow_id)) - metadata.setdefault("execution_id", str(we.id)) - metadata.setdefault( - "workflow_start_time", - we.created_at.timestamp() if we.created_at else None, - ) - - metadata.setdefault("organization_id", organization_id) - metadata.setdefault("tags", tag_names) + DeploymentHelper._enrich_item_workflow_metadata( + item=item, + file_exec_id=file_exec_id, + fe_lookup=fe_lookup, + workflow_execution=workflow_execution, + organization_id=organization_id, + tag_names=tag_names, + ) @staticmethod def get_execution_status(execution_id: str) -> ExecutionResponse: @@ -425,6 +447,40 @@ def get_execution_status(execution_id: str) -> ExecutionResponse: ) return execution_response + @staticmethod + def process_completed_execution( + response: ExecutionResponse, + deployment_execution_dto: Any, + include_metadata: bool, + include_metrics: bool, + ) -> None: + """Enrich and clean up the response for a completed execution.""" + from configuration.config_registry import ConfigurationRegistry + + api_deployment = deployment_execution_dto.api + organization = api_deployment.organization if api_deployment else None + org_id = str(organization.organization_id) if organization else "" + DeploymentHelper._enrich_result_with_workflow_metadata( + response, organization_id=org_id + ) + enable_highlight = False + if ConfigurationRegistry.is_config_key_available("ENABLE_HIGHLIGHT_API_DEPLOYMENT"): + from configuration.models import Configuration + + enable_highlight = Configuration.get_value_by_organization( + config_key="ENABLE_HIGHLIGHT_API_DEPLOYMENT", + organization=organization, + ) + if not enable_highlight: + response.remove_result_metadata_keys(["highlight_data"]) + response.remove_result_metadata_keys(["extracted_text"]) + if include_metadata or include_metrics: + DeploymentHelper._enrich_result_with_usage_metadata(response) + if not include_metadata and not include_metrics: + response.remove_inner_result_metadata() + if not include_metrics: + response.remove_result_metrics() + @staticmethod def fetch_presigned_file(url: str) -> InMemoryUploadedFile: """Fetch a file from a presigned URL and convert it to an uploaded file. diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py index fcc146b347..e1f3f1e2f4 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py +++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py @@ -470,6 +470,33 @@ def build_index_payload( return context, cb_kwargs + @staticmethod + def _resolve_llm_ids(tool: Any) -> tuple[str, str]: + """Resolve monitor_llm and challenge_llm IDs for the tool.""" + monitor_llm_instance = tool.monitor_llm + challenge_llm_instance = tool.challenge_llm + if monitor_llm_instance: + monitor_llm = str(monitor_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + monitor_llm = str(dp.llm.id) + if challenge_llm_instance: + challenge_llm = str(challenge_llm_instance.id) + else: + dp = ProfileManager.get_default_llm_profile(tool) + challenge_llm = str(dp.llm.id) + return monitor_llm, challenge_llm + + @staticmethod + def _build_grammar_list(prompt_grammer: Any) -> list[dict[str, Any]]: + """Build the grammar synonym list from the tool's prompt_grammer dict.""" + if not prompt_grammer: + return [] + return [ + {TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms} + for word, synonyms in prompt_grammer.items() + ] + @staticmethod def build_fetch_response_payload( tool: CustomTool, @@ -496,21 +523,7 @@ def build_fetch_response_payload( profile_manager_id=profile_manager_id ) - monitor_llm_instance: AdapterInstance | None = tool.monitor_llm - monitor_llm: str | None = None - challenge_llm_instance: AdapterInstance | None = tool.challenge_llm - challenge_llm: str | None = None - if monitor_llm_instance: - monitor_llm = str(monitor_llm_instance.id) - else: - dp = ProfileManager.get_default_llm_profile(tool) - monitor_llm = str(dp.llm.id) - - if challenge_llm_instance: - challenge_llm = str(challenge_llm_instance.id) - else: - dp = ProfileManager.get_default_llm_profile(tool) - challenge_llm = str(dp.llm.id) + monitor_llm, challenge_llm = PromptStudioHelper._resolve_llm_ids(tool) PromptStudioHelper.validate_adapter_status(profile_manager) PromptStudioHelper.validate_profile_manager_owner_access(profile_manager) @@ -586,11 +599,7 @@ def build_fetch_response_payload( tool_id = str(tool.tool_id) output: dict[str, Any] = {} outputs: list[dict[str, Any]] = [] - grammar_list: list[dict[str, Any]] = [] - prompt_grammer = tool.prompt_grammer - if prompt_grammer: - for word, synonyms in prompt_grammer.items(): - grammar_list.append({TSPKeys.WORD: word, TSPKeys.SYNONYMS: synonyms}) + grammar_list = PromptStudioHelper._build_grammar_list(tool.prompt_grammer) output[TSPKeys.PROMPT] = prompt.prompt output[TSPKeys.ACTIVE] = prompt.active diff --git a/backend/workflow_manager/workflow_v2/dto.py b/backend/workflow_manager/workflow_v2/dto.py index 7c06126db8..19dc06093b 100644 --- a/backend/workflow_manager/workflow_v2/dto.py +++ b/backend/workflow_manager/workflow_v2/dto.py @@ -48,6 +48,19 @@ def __post_init__(self) -> None: self.message = self.message or None self.status_api = self.status_api or None + @staticmethod + def _remove_item_top_metadata(item: dict, keys_to_remove: list[str]) -> None: + """Remove metadata keys from top-level item['metadata'].""" + if "metadata" not in item: + return + if keys_to_remove: + item_metadata = item["metadata"] + if isinstance(item_metadata, dict): + for key in keys_to_remove: + item_metadata.pop(key, None) + else: + item.pop("metadata", None) + def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: """Removes specified keys from the 'metadata' dictionary within each 'result' dictionary in the 'result' list attribute of the instance. If @@ -69,14 +82,7 @@ def remove_result_metadata_keys(self, keys_to_remove: list[str] = []) -> None: self._remove_specific_keys(result=result, keys_to_remove=keys_to_remove) # Handle top-level item["metadata"] (workers cache path) - if "metadata" in item: - if keys_to_remove: - item_metadata = item["metadata"] - if isinstance(item_metadata, dict): - for key in keys_to_remove: - item_metadata.pop(key, None) - else: - item.pop("metadata", None) + self._remove_item_top_metadata(item, keys_to_remove) def remove_inner_result_metadata(self) -> None: """Removes only the inner item["result"]["metadata"] dict (extraction diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 540072ea0d..b8e94335ee 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -387,8 +387,7 @@ def execute(self, context: ExecutionContext) -> ExecutionResult: executor = ExecutorRegistry.get("decorated") assert executor.name == "decorated" - # Decorator returns the class unchanged - assert MyExecutor is not None + assert "decorated" in ExecutorRegistry.list_executors() def test_list_executors(self: Self) -> None: """list_executors() returns sorted names.""" @@ -465,7 +464,7 @@ def test_execute_through_registry(self: Self) -> None: def _make_failing_executor_class( executor_name: str, - exc: Exception, + exc: BaseException, ) -> type[BaseExecutor]: """Build an executor that always raises *exc*.""" diff --git a/workers/executor/executors/answer_prompt.py b/workers/executor/executors/answer_prompt.py index 859f205bba..89936fe598 100644 --- a/workers/executor/executors/answer_prompt.py +++ b/workers/executor/executors/answer_prompt.py @@ -23,6 +23,24 @@ logger = logging.getLogger(__name__) +def _resolve_host_addresses(host: str) -> set[str]: + """Resolve a hostname or IP string to a set of IP address strings.""" + try: + ipaddress.ip_address(host) + return {host} + except ValueError: + pass + try: + return { + sockaddr[0] + for _family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo( + host, None, type=socket.SOCK_STREAM + ) + } + except Exception: + return set() + + def _is_safe_public_url(url: str) -> bool: """Validate webhook URL for SSRF protection. @@ -36,19 +54,7 @@ def _is_safe_public_url(url: str) -> bool: if host in ("localhost",): return False - addrs: set[str] = set() - try: - ipaddress.ip_address(host) - addrs.add(host) - except ValueError: - try: - for _family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo( - host, None, type=socket.SOCK_STREAM - ): - addrs.add(sockaddr[0]) - except Exception: - return False - + addrs = _resolve_host_addresses(host) if not addrs: return False @@ -166,6 +172,23 @@ def construct_and_run_prompt( process_text=process_text, ) + @staticmethod + def _build_grammar_notes(grammar_list: list[dict[str, Any]]) -> str: + """Build grammar synonym notes for prompt injection.""" + if not grammar_list: + return "" + notes = "\n" + for grammar in grammar_list: + word = grammar.get(PSKeys.WORD, "") + synonyms = grammar.get(PSKeys.SYNONYMS, []) if word else [] + if synonyms and word: + notes += ( + f"\nNote: You can consider that the word '{word}' " + f"is the same as {', '.join(synonyms)} " + f"in both the question and the context." + ) + return notes + @staticmethod def construct_prompt( preamble: str, @@ -179,21 +202,7 @@ def construct_prompt( ) -> str: """Build the full prompt string with preamble, grammar, postamble, context.""" prompt = f"{preamble}\n\nQuestion or Instruction: {prompt}" - if grammar_list is not None and len(grammar_list) > 0: - prompt += "\n" - for grammar in grammar_list: - word = "" - synonyms = [] - if PSKeys.WORD in grammar: - word = grammar[PSKeys.WORD] - if PSKeys.SYNONYMS in grammar: - synonyms = grammar[PSKeys.SYNONYMS] - if len(synonyms) > 0 and word != "": - prompt += ( - f"\nNote: You can consider that the word '{word}' " - f"is the same as {', '.join(synonyms)} " - f"in both the question and the context." - ) + prompt += AnswerPromptService._build_grammar_notes(grammar_list) if prompt_type == PSKeys.JSON: json_postamble = os.environ.get( PSKeys.JSON_POSTAMBLE, PSKeys.DEFAULT_JSON_POSTAMBLE @@ -231,11 +240,11 @@ def run_completion( this callback, enabling source attribution. """ try: - from unstract.sdk1.exceptions import RateLimitError as SdkRateLimitError - from unstract.sdk1.exceptions import SdkError + from unstract.sdk1.exceptions import RateLimitError as _sdk_rate_limit_error + from unstract.sdk1.exceptions import SdkError as _sdk_error except ImportError: - SdkRateLimitError = Exception - SdkError = Exception + _sdk_rate_limit_error = Exception + _sdk_error = Exception try: completion = llm.complete( @@ -264,13 +273,42 @@ def run_completion( word_confidence_data ) return answer - except SdkRateLimitError as e: + except _sdk_rate_limit_error as e: raise RateLimitError(f"Rate limit error. {str(e)}") from e - except SdkError as e: + except _sdk_error as e: logger.error("Error fetching response for prompt: %s", e) status_code = getattr(e, "status_code", None) or 500 raise LegacyExecutorError(message=str(e), code=status_code) from e + @staticmethod + def _run_webhook_postprocess( + parsed_data: Any, + webhook_url: str | None, + highlight_data: Any, + ) -> tuple[Any, Any]: + """Run webhook-based postprocessing; return (processed_data, updated_highlight).""" + from executor.executors.postprocessor import postprocess_data + + if not webhook_url: + logger.warning("Postprocessing webhook enabled but URL missing; skipping.") + return parsed_data, None + if not _is_safe_public_url(webhook_url): + logger.warning("Postprocessing webhook URL is not allowed; skipping.") + return parsed_data, None + try: + return postprocess_data( + parsed_data, + webhook_enabled=True, + webhook_url=webhook_url, + highlight_data=highlight_data, + timeout=60, + ) + except Exception as e: + logger.warning( + "Postprocessing webhook failed: %s. Using unprocessed data.", e + ) + return parsed_data, None + @staticmethod def handle_json( answer: str, @@ -288,56 +326,39 @@ def handle_json( ) -> None: """Handle JSON responses from the LLM.""" from executor.executors.json_repair_helper import repair_json_with_best_structure - from executor.executors.postprocessor import postprocess_data prompt_key = output[PSKeys.NAME] if answer.lower() == "na": structured_output[prompt_key] = None - else: - parsed_data = repair_json_with_best_structure(answer) - - if isinstance(parsed_data, str): - logger.error("Error parsing response to JSON") - structured_output[prompt_key] = {} - else: - webhook_enabled = output.get(PSKeys.ENABLE_POSTPROCESSING_WEBHOOK, False) - webhook_url = output.get(PSKeys.POSTPROCESSING_WEBHOOK_URL) - - highlight_data = None - if enable_highlight and metadata and PSKeys.HIGHLIGHT_DATA in metadata: - highlight_data = metadata[PSKeys.HIGHLIGHT_DATA].get(prompt_key) - - processed_data = parsed_data - updated_highlight_data = None - - if webhook_enabled: - if not webhook_url: - logger.warning( - "Postprocessing webhook enabled but URL missing; skipping." - ) - elif not _is_safe_public_url(webhook_url): - logger.warning( - "Postprocessing webhook URL is not allowed; skipping." - ) - else: - try: - processed_data, updated_highlight_data = postprocess_data( - parsed_data, - webhook_enabled=True, - webhook_url=webhook_url, - highlight_data=highlight_data, - timeout=60, - ) - except Exception as e: - logger.warning( - "Postprocessing webhook failed: %s. " - "Using unprocessed data.", - e, - ) - - structured_output[prompt_key] = processed_data - - if enable_highlight and metadata and updated_highlight_data is not None: - metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( - updated_highlight_data - ) + return + + parsed_data = repair_json_with_best_structure(answer) + if isinstance(parsed_data, str): + logger.error("Error parsing response to JSON") + structured_output[prompt_key] = {} + return + + highlight_data = None + if enable_highlight and metadata and PSKeys.HIGHLIGHT_DATA in metadata: + highlight_data = metadata[PSKeys.HIGHLIGHT_DATA].get(prompt_key) + + processed_data = parsed_data + updated_highlight_data = None + + webhook_enabled = output.get(PSKeys.ENABLE_POSTPROCESSING_WEBHOOK, False) + if webhook_enabled: + webhook_url = output.get(PSKeys.POSTPROCESSING_WEBHOOK_URL) + processed_data, updated_highlight_data = ( + AnswerPromptService._run_webhook_postprocess( + parsed_data=parsed_data, + webhook_url=webhook_url, + highlight_data=highlight_data, + ) + ) + + structured_output[prompt_key] = processed_data + + if enable_highlight and metadata and updated_highlight_data is not None: + metadata.setdefault(PSKeys.HIGHLIGHT_DATA, {})[prompt_key] = ( + updated_highlight_data + ) diff --git a/workers/executor/executors/json_repair_helper.py b/workers/executor/executors/json_repair_helper.py index f1cf17c0b0..0a36b1c217 100644 --- a/workers/executor/executors/json_repair_helper.py +++ b/workers/executor/executors/json_repair_helper.py @@ -23,7 +23,7 @@ def repair_json_with_best_structure(json_str: str) -> Any: # Fast path — try strict JSON first try: return json.loads(json_str) - except (json.JSONDecodeError, ValueError): + except ValueError: pass # Try to import json_repair for advanced repair diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index ee1c10c938..432c8b5c58 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -499,19 +499,12 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 4: Table settings injection ---- if not is_single_pass: - outputs = answer_params.get("outputs", []) - extracted_file_path = index_template.get("extracted_file_path", "") - for output in outputs: - if "table_settings" in output: - table_settings = output["table_settings"] - is_dir = table_settings.get("is_directory_mode", False) - if skip_extraction: - table_settings["input_file"] = input_file_path - answer_params["file_path"] = input_file_path - else: - table_settings["input_file"] = extracted_file_path - table_settings["is_directory_mode"] = is_dir - output["table_settings"] = table_settings + self._inject_table_settings( + answer_params=answer_params, + index_template=index_template, + skip_extraction=skip_extraction, + input_file_path=input_file_path, + ) # ---- Step 5: Answer prompt / Single pass ---- mode_label = "single pass" if is_single_pass else "prompt" @@ -537,24 +530,57 @@ def _handle_structure_pipeline(self, context: ExecutionContext) -> ExecutionResu # ---- Step 6: Merge results ---- structured_output = answer_result.data + self._finalize_pipeline_result( + structured_output=structured_output, + source_file_name=source_file_name, + extracted_text=extracted_text, + index_metrics=index_metrics, + ) + + shim.stream_log("Pipeline completed successfully") + return ExecutionResult(success=True, data=structured_output) + + @staticmethod + def _inject_table_settings( + answer_params: dict, + index_template: dict, + skip_extraction: bool, + input_file_path: str, + ) -> None: + """Inject table settings file paths into each output that has them.""" + outputs = answer_params.get("outputs", []) + extracted_file_path = index_template.get("extracted_file_path", "") + for output in outputs: + if "table_settings" not in output: + continue + table_settings = output["table_settings"] + is_dir = table_settings.get("is_directory_mode", False) + if skip_extraction: + table_settings["input_file"] = input_file_path + answer_params["file_path"] = input_file_path + else: + table_settings["input_file"] = extracted_file_path + table_settings["is_directory_mode"] = is_dir + output["table_settings"] = table_settings - # Ensure metadata section + def _finalize_pipeline_result( + self, + structured_output: dict, + source_file_name: str, + extracted_text: str, + index_metrics: dict, + ) -> None: + """Populate metadata/metrics in structured_output after pipeline completion.""" if "metadata" not in structured_output: structured_output["metadata"] = {} structured_output["metadata"]["file_name"] = source_file_name - - # Add extracted text for HITL raw view if extracted_text: structured_output["metadata"]["extracted_text"] = extracted_text - - # Merge index metrics if index_metrics: existing_metrics = structured_output.get("metrics", {}) - merged = self._merge_pipeline_metrics(existing_metrics, index_metrics) - structured_output["metrics"] = merged - - shim.stream_log("Pipeline completed successfully") - return ExecutionResult(success=True, data=structured_output) + structured_output["metrics"] = self._merge_pipeline_metrics( + existing_metrics, index_metrics + ) def _run_pipeline_summarize( self, @@ -951,6 +977,13 @@ def _get_prompt_deps(): ) @staticmethod + @staticmethod + def _sanitize_dict_values(d: dict[str, Any]) -> None: + """Replace 'NA' string values with None inside a dict in-place.""" + for k, v in d.items(): + if isinstance(v, str) and v.lower() == "na": + d[k] = None + def _sanitize_null_values( structured_output: dict[str, Any], ) -> dict[str, Any]: @@ -959,17 +992,13 @@ def _sanitize_null_values( if isinstance(v, str) and v.lower() == "na": structured_output[k] = None elif isinstance(v, list): - for i in range(len(v)): - if isinstance(v[i], str) and v[i].lower() == "na": + for i, item in enumerate(v): + if isinstance(item, str) and item.lower() == "na": v[i] = None - elif isinstance(v[i], dict): - for k1, v1 in v[i].items(): - if isinstance(v1, str) and v1.lower() == "na": - v[i][k1] = None + elif isinstance(item, dict): + LegacyExecutor._sanitize_dict_values(item) elif isinstance(v, dict): - for k1, v1 in v.items(): - if isinstance(v1, str) and v1.lower() == "na": - v[k1] = None + LegacyExecutor._sanitize_dict_values(v) return structured_output def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: @@ -1106,353 +1135,471 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: # ---- Process each prompt ------------------------------------------- for output in prompts: - prompt_name = output[PSKeys.NAME] - prompt_text = output[PSKeys.PROMPT] - chunk_size = output[PSKeys.CHUNK_SIZE] - - logger.debug( - "Prompt config: name=%s chunk_size=%d type=%s", - prompt_name, - chunk_size, - output.get(PSKeys.TYPE, "TEXT"), - ) - - # Enrich component with current prompt_key for log correlation. - prompt_component = { - **self._log_component, - "prompt_key": prompt_name, - } - shim = ExecutorToolShim( + self._execute_single_prompt( + output=output, + context=context, + structured_output=structured_output, + metadata=metadata, + metrics=metrics, + variable_names=variable_names, + context_retrieval_metrics=context_retrieval_metrics, + answer_prompt_svc=answer_prompt_svc, + retrieval_svc=retrieval_svc, + variable_replacement_svc=variable_replacement_svc, + llm_cls=llm_cls, + embedding_compat_cls=embedding_compat_cls, + vector_db_cls=vector_db_cls, + tool_settings=tool_settings, + process_text_fn=process_text_fn, + run_id=run_id, + execution_id=execution_id, + file_hash=file_hash, + file_path=file_path, + doc_name=doc_name, + log_events_id=log_events_id, + tool_id=tool_id, + custom_data=custom_data, + execution_source=execution_source, platform_api_key=platform_api_key, - log_events_id=self._log_events_id, - component=prompt_component, ) - shim.stream_log(f"Processing prompt: {prompt_name}") - - # {{variable}} template replacement - if variable_replacement_svc.is_variables_present(prompt_text=prompt_text): - is_ide = execution_source == "ide" - prompt_text = variable_replacement_svc.replace_variables_in_prompt( - prompt=output, - structured_output=structured_output, - log_events_id=log_events_id, - tool_id=tool_id, - prompt_name=prompt_name, - doc_name=doc_name, - custom_data=custom_data, - is_ide=is_ide, - ) - shim.stream_log(f"Resolved template variables for: {prompt_name}") - logger.info( - "Executing prompt: tool_id=%s name=%s run_id=%s", - tool_id, - prompt_name, - run_id, - ) + pipeline_shim.stream_log(f"All {len(prompts)} prompts processed successfully") + logger.info( + "All prompts processed: tool_id=%s prompt_count=%d file=%s", + tool_id, + len(prompts), + doc_name, + ) - # %variable% replacement - output[PSKeys.PROMPTX] = answer_prompt_svc.extract_variable( - structured_output, variable_names, output, prompt_text - ) + # ---- Sanitize null values ------------------------------------------ + structured_output = self._sanitize_null_values(structured_output) - # Generate doc_id (standalone util — no Index DTOs needed) - from unstract.sdk1.utils.indexing import IndexingUtils + return ExecutionResult( + success=True, + data={ + PSKeys.OUTPUT: structured_output, + PSKeys.METADATA: metadata, + PSKeys.METRICS: metrics, + }, + ) - doc_id = IndexingUtils.generate_index_key( - vector_db=output[PSKeys.VECTOR_DB], - embedding=output[PSKeys.EMBEDDING], - x2text=output[PSKeys.X2TEXT_ADAPTER], - chunk_size=str(output[PSKeys.CHUNK_SIZE]), - chunk_overlap=str(output[PSKeys.CHUNK_OVERLAP]), - tool=shim, - file_hash=file_hash, - file_path=file_path, - ) + @staticmethod + def _convert_number_answer(answer: str, llm: Any, answer_prompt_svc: Any) -> Any: + """Run LLM number extraction and return float or None.""" + if answer.lower() == "na": + return None + prompt = ( + f"Extract the number from the following " + f"text:\n{answer}\n\nOutput just the number. " + f"If the number is expressed in millions " + f"or thousands, expand the number to its numeric value " + f"The number should be directly assignable " + f"to a numeric variable. " + f"It should not have any commas, " + f"percentages or other grouping " + f"characters. No explanation is required. " + f"If you cannot extract the number, output 0." + ) + raw = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) + try: + return float(raw) + except Exception: + return None - # TABLE/RECORD: delegate to TableExtractorExecutor in-process. - # The table executor plugin handles PDF table detection, - # header extraction, and CSV-to-JSON post-processing. - if output.get(PSKeys.TYPE) in (PSKeys.TABLE, PSKeys.RECORD): - from unstract.sdk1.execution.registry import ExecutorRegistry + @staticmethod + def _convert_scalar_answer( + answer: str, llm: Any, answer_prompt_svc: Any, prompt: str + ) -> str | None: + """Run LLM extraction for a scalar (email/date) and return result or None.""" + if answer.lower() == "na": + return None + return answer_prompt_svc.run_completion(llm=llm, prompt=prompt) + + def _run_challenge_if_enabled( + self, + tool_settings: dict[str, Any], + output: dict[str, Any], + structured_output: dict[str, Any], + context_list: list[str], + llm: Any, + llm_cls: Any, + usage_kwargs: dict[str, Any], + run_id: str, + platform_api_key: str, + metadata: dict[str, Any], + shim: Any, + prompt_name: str, + ) -> None: + """Run challenge verification plugin if enabled and available.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.plugins import ExecutorPluginLoader - try: - table_executor = ExecutorRegistry.get("table") - except KeyError: - raise LegacyExecutorError( - message=( - "TABLE extraction requires the table executor " - "plugin. Install the table_extractor plugin." - ) - ) + if not tool_settings.get(PSKeys.ENABLE_CHALLENGE): + return + challenge_cls = ExecutorPluginLoader.get("challenge") + if not challenge_cls: + return + challenge_llm_id = tool_settings.get(PSKeys.CHALLENGE_LLM) + if not challenge_llm_id: + return + shim.stream_log(f"Running challenge for: {prompt_name}") + challenge_llm = llm_cls( + adapter_instance_id=challenge_llm_id, + tool=shim, + usage_kwargs={**usage_kwargs, PSKeys.LLM_USAGE_REASON: PSKeys.CHALLENGE}, + capture_metrics=True, + ) + challenger = challenge_cls( + llm=llm, + challenge_llm=challenge_llm, + context="\n".join(context_list), + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + run_id=run_id, + platform_key=platform_api_key, + metadata=metadata, + ) + challenger.run() + shim.stream_log(f"Challenge verification completed for: {prompt_name}") + logger.info("Challenge completed: prompt=%s", prompt_name) - table_ctx = ExecutionContext( - executor_name="table", - operation="table_extract", - run_id=run_id, - execution_source=execution_source, - organization_id=context.organization_id, - request_id=context.request_id, - executor_params={ - "llm_adapter_instance_id": output.get(PSKeys.LLM, ""), - "table_settings": output.get(PSKeys.TABLE_SETTINGS, {}), - "prompt": output.get(PSKeys.PROMPT, ""), - "PLATFORM_SERVICE_API_KEY": platform_api_key, - "execution_id": execution_id, - "tool_id": tool_id, - "file_name": doc_name, - }, - ) - table_ctx._log_component = self._log_component - table_ctx.log_events_id = self._log_events_id + @staticmethod + def _run_evaluation_if_enabled( + output: dict[str, Any], + context_list: list[str], + structured_output: dict[str, Any], + platform_api_key: str, + shim: Any, + prompt_name: str, + ) -> None: + """Run evaluation plugin if enabled and available.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.plugins import ExecutorPluginLoader - shim.stream_log(f"Running table extraction for: {prompt_name}") - table_result = table_executor.execute(table_ctx) + eval_settings = output.get(PSKeys.EVAL_SETTINGS, {}) + if not eval_settings.get(PSKeys.EVAL_SETTINGS_EVALUATE): + return + evaluator_cls = ExecutorPluginLoader.get("evaluation") + if not evaluator_cls: + return + shim.stream_log(f"Running evaluation for: {prompt_name}") + evaluator = evaluator_cls( + query=output.get(PSKeys.COMBINED_PROMPT, ""), + context="\n".join(context_list), + response=structured_output.get(prompt_name), + reference_answer=output.get("reference_answer", ""), + prompt=output, + structured_output=structured_output, + platform_key=platform_api_key, + ) + evaluator.run() + logger.info("Evaluation completed: prompt=%s", prompt_name) - if table_result.success: - structured_output[prompt_name] = table_result.data.get("output", "") - table_metrics = table_result.data.get("metadata", {}).get( - "metrics", {} - ) - metrics.setdefault(prompt_name, {}).update( - {"table_extraction": table_metrics} - ) - shim.stream_log(f"Table extraction completed for: {prompt_name}") - logger.info("TABLE extraction completed: prompt=%s", prompt_name) - else: - structured_output[prompt_name] = "" - logger.error( - "TABLE extraction failed for prompt=%s: %s", - prompt_name, - table_result.error, - ) - shim.stream_log(f"Completed prompt: {prompt_name}") - continue + def _execute_single_prompt( + self, + output: dict[str, Any], + context: ExecutionContext, + structured_output: dict[str, Any], + metadata: dict[str, Any], + metrics: dict[str, Any], + variable_names: list[str], + context_retrieval_metrics: dict[str, Any], + answer_prompt_svc: Any, + retrieval_svc: Any, + variable_replacement_svc: Any, + llm_cls: Any, + embedding_compat_cls: Any, + vector_db_cls: Any, + tool_settings: dict[str, Any], + process_text_fn: Any, + run_id: str, + execution_id: str, + file_hash: Any, + file_path: str, + doc_name: str, + log_events_id: str, + tool_id: str, + custom_data: dict[str, Any], + execution_source: str, + platform_api_key: str, + ) -> None: + """Execute one prompt: variable replacement, retrieval, LLM, post-process.""" + from executor.executors.constants import PromptServiceConstants as PSKeys + from executor.executors.constants import RetrievalStrategy + from unstract.sdk1.utils.indexing import IndexingUtils - if output.get(PSKeys.TYPE) == PSKeys.LINE_ITEM: - raise LegacyExecutorError( - message="LINE_ITEM extraction is not supported." - ) + prompt_name = output[PSKeys.NAME] + prompt_text = output[PSKeys.PROMPT] + chunk_size = output[PSKeys.CHUNK_SIZE] - # Create adapters - try: - usage_kwargs = { - "run_id": run_id, - "execution_id": execution_id, - } - llm = llm_cls( - adapter_instance_id=output[PSKeys.LLM], + logger.debug( + "Prompt config: name=%s chunk_size=%d type=%s", + prompt_name, + chunk_size, + output.get(PSKeys.TYPE, "TEXT"), + ) + + shim = ExecutorToolShim( + platform_api_key=platform_api_key, + log_events_id=self._log_events_id, + component={**self._log_component, "prompt_key": prompt_name}, + ) + shim.stream_log(f"Processing prompt: {prompt_name}") + + if variable_replacement_svc.is_variables_present(prompt_text=prompt_text): + prompt_text = variable_replacement_svc.replace_variables_in_prompt( + prompt=output, + structured_output=structured_output, + log_events_id=log_events_id, + tool_id=tool_id, + prompt_name=prompt_name, + doc_name=doc_name, + custom_data=custom_data, + is_ide=execution_source == "ide", + ) + shim.stream_log(f"Resolved template variables for: {prompt_name}") + + logger.info( + "Executing prompt: tool_id=%s name=%s run_id=%s", tool_id, prompt_name, run_id + ) + + output[PSKeys.PROMPTX] = answer_prompt_svc.extract_variable( + structured_output, variable_names, output, prompt_text + ) + + doc_id = IndexingUtils.generate_index_key( + vector_db=output[PSKeys.VECTOR_DB], + embedding=output[PSKeys.EMBEDDING], + x2text=output[PSKeys.X2TEXT_ADAPTER], + chunk_size=str(output[PSKeys.CHUNK_SIZE]), + chunk_overlap=str(output[PSKeys.CHUNK_OVERLAP]), + tool=shim, + file_hash=file_hash, + file_path=file_path, + ) + + if output.get(PSKeys.TYPE) in (PSKeys.TABLE, PSKeys.RECORD): + self._run_table_extraction( + output=output, + context=context, + structured_output=structured_output, + metrics=metrics, + run_id=run_id, + execution_id=execution_id, + execution_source=execution_source, + platform_api_key=platform_api_key, + tool_id=tool_id, + doc_name=doc_name, + prompt_name=prompt_name, + shim=shim, + ) + return + + if output.get(PSKeys.TYPE) == PSKeys.LINE_ITEM: + raise LegacyExecutorError(message="LINE_ITEM extraction is not supported.") + + usage_kwargs = {"run_id": run_id, "execution_id": execution_id} + try: + llm = llm_cls( + adapter_instance_id=output[PSKeys.LLM], + tool=shim, + usage_kwargs={**usage_kwargs, PSKeys.LLM_USAGE_REASON: PSKeys.EXTRACTION}, + capture_metrics=True, + ) + vector_db = None + if chunk_size > 0: + embedding = embedding_compat_cls( + adapter_instance_id=output[PSKeys.EMBEDDING], tool=shim, - usage_kwargs={ - **usage_kwargs, - PSKeys.LLM_USAGE_REASON: PSKeys.EXTRACTION, - }, - capture_metrics=True, + kwargs={**usage_kwargs}, ) - embedding = None - vector_db = None - if chunk_size > 0: - embedding = embedding_compat_cls( - adapter_instance_id=output[PSKeys.EMBEDDING], - tool=shim, - kwargs={**usage_kwargs}, - ) - vector_db = vector_db_cls( - tool=shim, - adapter_instance_id=output[PSKeys.VECTOR_DB], - embedding=embedding, - ) - shim.stream_log( - f"Initialized LLM and retrieval adapters for: {prompt_name}" + vector_db = vector_db_cls( + tool=shim, + adapter_instance_id=output[PSKeys.VECTOR_DB], + embedding=embedding, ) - except Exception as e: - msg = f"Couldn't fetch adapter. {e}" - logger.error(msg) - status_code = getattr(e, "status_code", None) or 500 - raise LegacyExecutorError(message=msg, code=status_code) from e - - # ---- Retrieval + Answer ---------------------------------------- - context_list: list[str] = [] - try: - answer = "NA" - retrieval_strategy = output.get(PSKeys.RETRIEVAL_STRATEGY) - valid_strategies = {s.value for s in RetrievalStrategy} - - if retrieval_strategy in valid_strategies: - shim.stream_log(f"Retrieving context for: {prompt_name}") - logger.info( - "Performing retrieval: prompt=%s strategy=%s chunk_size=%d", - prompt_name, - retrieval_strategy, - chunk_size, - ) - if chunk_size == 0: - context_list = retrieval_svc.retrieve_complete_context( - execution_source=execution_source, - file_path=file_path, - context_retrieval_metrics=context_retrieval_metrics, - prompt_key=prompt_name, - ) - else: - context_list = retrieval_svc.run_retrieval( - output=output, - doc_id=doc_id, - llm=llm, - vector_db=vector_db, - retrieval_type=retrieval_strategy, - context_retrieval_metrics=context_retrieval_metrics, - ) - metadata[PSKeys.CONTEXT][prompt_name] = context_list - shim.stream_log( - f"Retrieved {len(context_list)} context chunks" - f" for: {prompt_name}" - ) - logger.debug( - "Retrieved %d context chunks for prompt: %s", - len(context_list), - prompt_name, - ) + shim.stream_log(f"Initialized LLM and retrieval adapters for: {prompt_name}") + except Exception as e: + msg = f"Couldn't fetch adapter. {e}" + logger.error(msg) + raise LegacyExecutorError( + message=msg, code=getattr(e, "status_code", None) or 500 + ) from e - # Run prompt with retrieved context - shim.stream_log(f"Running LLM completion for: {prompt_name}") - answer = answer_prompt_svc.construct_and_run_prompt( - tool_settings=tool_settings, - output=output, - llm=llm, - context="\n".join(context_list), - prompt=PSKeys.PROMPTX, - metadata=metadata, + context_list: list[str] = [] + try: + answer = "NA" + retrieval_strategy = output.get(PSKeys.RETRIEVAL_STRATEGY) + valid_strategies = {s.value for s in RetrievalStrategy} + if retrieval_strategy in valid_strategies: + shim.stream_log(f"Retrieving context for: {prompt_name}") + logger.info( + "Performing retrieval: prompt=%s strategy=%s chunk_size=%d", + prompt_name, + retrieval_strategy, + chunk_size, + ) + if chunk_size == 0: + context_list = retrieval_svc.retrieve_complete_context( execution_source=execution_source, file_path=file_path, - process_text=process_text_fn, + context_retrieval_metrics=context_retrieval_metrics, + prompt_key=prompt_name, ) else: - logger.warning( - "Skipping retrieval: invalid strategy=%s for prompt=%s", - retrieval_strategy, - prompt_name, + context_list = retrieval_svc.run_retrieval( + output=output, + doc_id=doc_id, + llm=llm, + vector_db=vector_db, + retrieval_type=retrieval_strategy, + context_retrieval_metrics=context_retrieval_metrics, ) - - # ---- Type-specific post-processing ------------------------- - self._apply_type_conversion( + metadata[PSKeys.CONTEXT][prompt_name] = context_list + shim.stream_log( + f"Retrieved {len(context_list)} context chunks for: {prompt_name}" + ) + logger.debug( + "Retrieved %d context chunks for prompt: %s", + len(context_list), + prompt_name, + ) + shim.stream_log(f"Running LLM completion for: {prompt_name}") + answer = answer_prompt_svc.construct_and_run_prompt( + tool_settings=tool_settings, output=output, - answer=answer, - structured_output=structured_output, llm=llm, - tool_settings=tool_settings, + context="\n".join(context_list), + prompt=PSKeys.PROMPTX, metadata=metadata, execution_source=execution_source, file_path=file_path, - log_events_id=log_events_id, - tool_id=tool_id, - doc_name=doc_name, + process_text=process_text_fn, + ) + else: + logger.warning( + "Skipping retrieval: invalid strategy=%s for prompt=%s", + retrieval_strategy, + prompt_name, ) - shim.stream_log(f"Applied type conversion for: {prompt_name}") - - # ---- Challenge (quality verification) ---------------------- - if tool_settings.get(PSKeys.ENABLE_CHALLENGE): - from executor.executors.plugins import ( - ExecutorPluginLoader, - ) - challenge_cls = ExecutorPluginLoader.get("challenge") - if challenge_cls: - challenge_llm_id = tool_settings.get(PSKeys.CHALLENGE_LLM) - if challenge_llm_id: - shim.stream_log(f"Running challenge for: {prompt_name}") - challenge_llm = llm_cls( - adapter_instance_id=challenge_llm_id, - tool=shim, - usage_kwargs={ - **usage_kwargs, - PSKeys.LLM_USAGE_REASON: PSKeys.CHALLENGE, - }, - capture_metrics=True, - ) - challenger = challenge_cls( - llm=llm, - challenge_llm=challenge_llm, - context="\n".join(context_list), - tool_settings=tool_settings, - output=output, - structured_output=structured_output, - run_id=run_id, - platform_key=platform_api_key, - metadata=metadata, - ) - challenger.run() - shim.stream_log( - f"Challenge verification completed for: {prompt_name}" - ) - logger.info( - "Challenge completed: prompt=%s", - prompt_name, - ) - - # ---- Evaluation (prompt evaluation) ------------------------ - eval_settings = output.get(PSKeys.EVAL_SETTINGS, {}) - if eval_settings.get(PSKeys.EVAL_SETTINGS_EVALUATE): - from executor.executors.plugins import ( - ExecutorPluginLoader, - ) + self._apply_type_conversion( + output=output, + answer=answer, + structured_output=structured_output, + llm=llm, + tool_settings=tool_settings, + metadata=metadata, + execution_source=execution_source, + file_path=file_path, + log_events_id=log_events_id, + tool_id=tool_id, + doc_name=doc_name, + ) + shim.stream_log(f"Applied type conversion for: {prompt_name}") - evaluator_cls = ExecutorPluginLoader.get("evaluation") - if evaluator_cls: - shim.stream_log(f"Running evaluation for: {prompt_name}") - evaluator = evaluator_cls( - query=output.get(PSKeys.COMBINED_PROMPT, ""), - context="\n".join(context_list), - response=structured_output.get(prompt_name), - reference_answer=output.get("reference_answer", ""), - prompt=output, - structured_output=structured_output, - platform_key=platform_api_key, - ) - evaluator.run() - logger.info( - "Evaluation completed: prompt=%s", - prompt_name, - ) - - shim.stream_log(f"Completed prompt: {prompt_name}") - - # Strip trailing newline - val = structured_output.get(prompt_name) - if isinstance(val, str): - structured_output[prompt_name] = val.rstrip("\n") - - finally: - # Collect metrics - metrics.setdefault(prompt_name, {}).update( - { - "context_retrieval": context_retrieval_metrics.get( - prompt_name, {} - ), - f"{llm.get_usage_reason()}_llm": llm.get_metrics(), - } - ) - if vector_db: - vector_db.close() + self._run_challenge_if_enabled( + tool_settings=tool_settings, + output=output, + structured_output=structured_output, + context_list=context_list, + llm=llm, + llm_cls=llm_cls, + usage_kwargs=usage_kwargs, + run_id=run_id, + platform_api_key=platform_api_key, + metadata=metadata, + shim=shim, + prompt_name=prompt_name, + ) + self._run_evaluation_if_enabled( + output=output, + context_list=context_list, + structured_output=structured_output, + platform_api_key=platform_api_key, + shim=shim, + prompt_name=prompt_name, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") - pipeline_shim.stream_log(f"All {len(prompts)} prompts processed successfully") - logger.info( - "All prompts processed: tool_id=%s prompt_count=%d file=%s", - tool_id, - len(prompts), - doc_name, - ) + val = structured_output.get(prompt_name) + if isinstance(val, str): + structured_output[prompt_name] = val.rstrip("\n") + finally: + metrics.setdefault(prompt_name, {}).update( + { + "context_retrieval": context_retrieval_metrics.get(prompt_name, {}), + f"{llm.get_usage_reason()}_llm": llm.get_metrics(), + } + ) + if vector_db: + vector_db.close() - # ---- Sanitize null values ------------------------------------------ - structured_output = self._sanitize_null_values(structured_output) + def _run_table_extraction( + self, + output: dict[str, Any], + context: ExecutionContext, + structured_output: dict[str, Any], + metrics: dict[str, Any], + run_id: str, + execution_id: str, + execution_source: str, + platform_api_key: str, + tool_id: str, + doc_name: str, + prompt_name: str, + shim: Any, + ) -> None: + """Delegate TABLE/RECORD prompt to the table executor plugin.""" + from executor.executors.constants import PromptServiceConstants as PSKeys - return ExecutionResult( - success=True, - data={ - PSKeys.OUTPUT: structured_output, - PSKeys.METADATA: metadata, - PSKeys.METRICS: metrics, + try: + table_executor = ExecutorRegistry.get("table") + except KeyError: + raise LegacyExecutorError( + message=( + "TABLE extraction requires the table executor " + "plugin. Install the table_extractor plugin." + ) + ) + table_ctx = ExecutionContext( + executor_name="table", + operation="table_extract", + run_id=run_id, + execution_source=execution_source, + organization_id=context.organization_id, + request_id=context.request_id, + executor_params={ + "llm_adapter_instance_id": output.get(PSKeys.LLM, ""), + "table_settings": output.get(PSKeys.TABLE_SETTINGS, {}), + "prompt": output.get(PSKeys.PROMPT, ""), + "PLATFORM_SERVICE_API_KEY": platform_api_key, + "execution_id": execution_id, + "tool_id": tool_id, + "file_name": doc_name, }, ) + table_ctx._log_component = self._log_component + table_ctx.log_events_id = self._log_events_id + + shim.stream_log(f"Running table extraction for: {prompt_name}") + table_result = table_executor.execute(table_ctx) + + if table_result.success: + structured_output[prompt_name] = table_result.data.get("output", "") + table_metrics = table_result.data.get("metadata", {}).get("metrics", {}) + metrics.setdefault(prompt_name, {}).update({"table_extraction": table_metrics}) + shim.stream_log(f"Table extraction completed for: {prompt_name}") + logger.info("TABLE extraction completed: prompt=%s", prompt_name) + else: + structured_output[prompt_name] = "" + logger.error( + "TABLE extraction failed for prompt=%s: %s", + prompt_name, + table_result.error, + ) + shim.stream_log(f"Completed prompt: {prompt_name}") @staticmethod def _apply_type_conversion( @@ -1481,69 +1628,48 @@ def _apply_type_conversion( output_type = output[PSKeys.TYPE] if output_type == PSKeys.NUMBER: - if answer.lower() == "na": - structured_output[prompt_name] = None - else: - prompt = ( - f"Extract the number from the following " - f"text:\n{answer}\n\nOutput just the number. " - f"If the number is expressed in millions " - f"or thousands, expand the number to its numeric value " - f"The number should be directly assignable " - f"to a numeric variable. " - f"It should not have any commas, " - f"percentages or other grouping " - f"characters. No explanation is required. " - f"If you cannot extract the number, output 0." - ) - answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) - try: - structured_output[prompt_name] = float(answer) - except Exception: - structured_output[prompt_name] = None + structured_output[prompt_name] = LegacyExecutor._convert_number_answer( + answer, llm, answer_prompt_svc + ) elif output_type == PSKeys.EMAIL: - if answer.lower() == "na": - structured_output[prompt_name] = None - else: - prompt = ( - f"Extract the email from the following text:\n{answer}" - f"\n\nOutput just the email. " - f"The email should be directly assignable to a string " - f"variable. No explanation is required. If you cannot " - f'extract the email, output "NA".' - ) - answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) - structured_output[prompt_name] = answer + email_prompt = ( + f"Extract the email from the following text:\n{answer}" + f"\n\nOutput just the email. " + f"The email should be directly assignable to a string " + f"variable. No explanation is required. If you cannot " + f'extract the email, output "NA".' + ) + structured_output[prompt_name] = LegacyExecutor._convert_scalar_answer( + answer, llm, answer_prompt_svc, email_prompt + ) elif output_type == PSKeys.DATE: - if answer.lower() == "na": - structured_output[prompt_name] = None - else: - prompt = ( - f"Extract the date from the following text:\n{answer}" - f"\n\nOutput just the date. " - f"The date should be in ISO date time format. " - f"No explanation is required. The date should be " - f"directly assignable to a date variable. " - f"If you cannot convert the string into a date, " - f'output "NA".' - ) - answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) - structured_output[prompt_name] = answer + date_prompt = ( + f"Extract the date from the following text:\n{answer}" + f"\n\nOutput just the date. " + f"The date should be in ISO date time format. " + f"No explanation is required. The date should be " + f"directly assignable to a date variable. " + f"If you cannot convert the string into a date, " + f'output "NA".' + ) + structured_output[prompt_name] = LegacyExecutor._convert_scalar_answer( + answer, llm, answer_prompt_svc, date_prompt + ) elif output_type == PSKeys.BOOLEAN: if answer.lower() == "na": structured_output[prompt_name] = None else: - prompt = ( + bool_prompt = ( f"Extract yes/no from the following text:\n{answer}\n\n" f"Output in single word. " f"If the context is trying to convey that the answer " f'is true, then return "yes", else return "no".' ) - answer = answer_prompt_svc.run_completion(llm=llm, prompt=prompt) - structured_output[prompt_name] = answer.lower() == "yes" + raw = answer_prompt_svc.run_completion(llm=llm, prompt=bool_prompt) + structured_output[prompt_name] = raw.lower() == "yes" elif output_type == PSKeys.JSON: answer_prompt_svc.handle_json( diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 838a806d65..ca443b632e 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -162,25 +162,25 @@ def _override_section( def _should_skip_extraction_for_smart_table( - input_file: str, outputs: list[dict[str, Any]] + outputs: list[dict[str, Any]], ) -> bool: """Check if extraction and indexing should be skipped for smart table. Standalone version of StructureTool._should_skip_extraction_for_smart_table. """ for output in outputs: - if _SK.TABLE_SETTINGS in output: - prompt = output.get(_SK.PROMPT, "") - if prompt and isinstance(prompt, str): - try: - schema_data = json.loads(prompt) - if schema_data and isinstance(schema_data, dict): - return True - except ValueError as e: - logger.warning( - "Failed to parse prompt as JSON for smart table: %s", e - ) - continue + if _SK.TABLE_SETTINGS not in output: + continue + prompt = output.get(_SK.PROMPT, "") + if not prompt or not isinstance(prompt, str): + continue + try: + schema_data = json.loads(prompt) + except ValueError as e: + logger.warning("Failed to parse prompt as JSON for smart table: %s", e) + continue + if isinstance(schema_data, dict) and schema_data: + return True return False @@ -256,7 +256,6 @@ def _execute_structure_tool_impl(params: dict) -> dict: tool_instance_metadata=tool_instance_metadata, dispatcher=dispatcher, shim=shim, - platform_helper=platform_helper, file_execution_id=file_execution_id, organization_id=organization_id, source_file_name=source_file_name, @@ -303,9 +302,7 @@ def _execute_structure_tool_impl(params: dict) -> dict: extracted_input_file = str(execution_run_data_folder / _SK.EXTRACT) # ---- Step 4: Smart table detection ---- - skip_extraction_and_indexing = _should_skip_extraction_for_smart_table( - input_file_path, outputs - ) + skip_extraction_and_indexing = _should_skip_extraction_for_smart_table(outputs) if skip_extraction_and_indexing: logger.info( "Skipping extraction and indexing for Excel table with valid JSON schema" @@ -536,7 +533,6 @@ def _run_agentic_extraction( tool_instance_metadata: dict, dispatcher: ExecutionDispatcher, shim: Any, - platform_helper: Any, file_execution_id: str, organization_id: str, source_file_name: str, diff --git a/workers/run-worker.sh b/workers/run-worker.sh index abd6931534..27d9fc8893 100755 --- a/workers/run-worker.sh +++ b/workers/run-worker.sh @@ -21,6 +21,9 @@ WORKERS_DIR="$SCRIPT_DIR" # Default environment file ENV_FILE="$WORKERS_DIR/.env" +# Worker type constant for the executor worker +readonly EXECUTOR_WORKER_TYPE="executor" + # Available workers declare -A WORKERS=( ["api"]="api-deployment" @@ -37,7 +40,7 @@ declare -A WORKERS=( ["notify"]="notification" ["scheduler"]="scheduler" ["schedule"]="scheduler" - ["executor"]="executor" + ["${EXECUTOR_WORKER_TYPE}"]="${EXECUTOR_WORKER_TYPE}" ["all"]="all" ) @@ -53,7 +56,7 @@ declare -A WORKER_QUEUES=( ["log_consumer"]="celery_log_task_queue" ["notification"]="notifications,notifications_webhook,notifications_email,notifications_sms,notifications_priority" ["scheduler"]="scheduler" - ["executor"]="celery_executor_legacy" + ["${EXECUTOR_WORKER_TYPE}"]="celery_executor_legacy" ) # Worker health ports @@ -65,7 +68,7 @@ declare -A WORKER_HEALTH_PORTS=( ["log_consumer"]="8084" ["notification"]="8085" ["scheduler"]="8087" - ["executor"]="8088" + ["${EXECUTOR_WORKER_TYPE}"]="8088" ) # Function to display usage @@ -410,7 +413,7 @@ run_worker() { "scheduler") export SCHEDULER_HEALTH_PORT="$health_port" ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") export EXECUTOR_HEALTH_PORT="$health_port" ;; *) @@ -486,7 +489,7 @@ run_worker() { "scheduler") cmd_args+=("--concurrency=2") ;; - "executor") + "${EXECUTOR_WORKER_TYPE}") cmd_args+=("--concurrency=2") ;; *) diff --git a/workers/tests/test_answer_prompt.py b/workers/tests/test_answer_prompt.py index 4b6e53f90e..ebe1675d3d 100644 --- a/workers/tests/test_answer_prompt.py +++ b/workers/tests/test_answer_prompt.py @@ -486,7 +486,7 @@ def test_chunked_retrieval_uses_run_retrieval( llm = _mock_llm() deps = _mock_deps(llm) - _, RetrievalService, *_ = deps + _, retrieval_svc, *_ = deps mock_deps.return_value = deps mock_shim_cls.return_value = MagicMock() @@ -496,7 +496,7 @@ def test_chunked_retrieval_uses_run_retrieval( ) result = executor._handle_answer_prompt(ctx) - RetrievalService.run_retrieval.assert_called_once() + retrieval_svc.run_retrieval.assert_called_once() assert result.success is True @patch( @@ -511,7 +511,7 @@ def test_complete_context_for_chunk_zero( llm = _mock_llm() deps = _mock_deps(llm) - _, RetrievalService, *_ = deps + _, retrieval_svc, *_ = deps mock_deps.return_value = deps mock_shim_cls.return_value = MagicMock() @@ -521,7 +521,7 @@ def test_complete_context_for_chunk_zero( ) result = executor._handle_answer_prompt(ctx) - RetrievalService.retrieve_complete_context.assert_called_once() + retrieval_svc.retrieve_complete_context.assert_called_once() assert result.success is True @patch( @@ -673,9 +673,9 @@ def test_vectordb_closed(self, mock_shim_cls, mock_deps): llm = _mock_llm() deps = _mock_deps(llm) mock_deps.return_value = deps - _, _, _, _, _, _, VectorDB = deps + _, _, _, _, _, _, vector_db_cls = deps vdb_instance = MagicMock() - VectorDB.return_value = vdb_instance + vector_db_cls.return_value = vdb_instance mock_shim_cls.return_value = MagicMock() executor = LegacyExecutor() From 6391c6cb11f4732bf74a594178ce8ba411803e14 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 01:32:13 +0530 Subject: [PATCH 56/64] UN-3266 fix: remove unused RetrievalStrategy import from _handle_answer_prompt Co-Authored-By: Claude Sonnet 4.6 --- workers/executor/executors/legacy_executor.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 432c8b5c58..eacbbcdeb9 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1017,9 +1017,6 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: from executor.executors.constants import ( PromptServiceConstants as PSKeys, ) - from executor.executors.constants import ( - RetrievalStrategy, - ) params: dict[str, Any] = context.executor_params From 0af04847e7dd5cfc4a39ea1f6d514086c08f3cb3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 20:03:12 +0000 Subject: [PATCH 57/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/api_v2/deployment_helper.py | 12 +++++++++--- workers/executor/executors/legacy_executor.py | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index c41f894b7a..4cb41597cd 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -299,7 +299,9 @@ def execute_workflow( return APIExecutionResponseSerializer(result).data @staticmethod - def _enrich_item_inner_metadata(item: dict, file_exec_id: str, UsageHelper: Any) -> None: + def _enrich_item_inner_metadata( + item: dict, file_exec_id: str, UsageHelper: Any + ) -> None: """Inject per-model usage breakdown into item['result']['metadata'].""" inner_result = item.get("result") if not isinstance(inner_result, dict): @@ -312,7 +314,9 @@ def _enrich_item_inner_metadata(item: dict, file_exec_id: str, UsageHelper: Any) metadata.update(usage_by_model) @staticmethod - def _enrich_item_top_metadata(item: dict, file_exec_id: str, UsageHelper: Any) -> None: + def _enrich_item_top_metadata( + item: dict, file_exec_id: str, UsageHelper: Any + ) -> None: """Inject aggregated usage totals into item['metadata']['usage'].""" item_metadata = item.get("metadata") if not isinstance(item_metadata, dict): @@ -464,7 +468,9 @@ def process_completed_execution( response, organization_id=org_id ) enable_highlight = False - if ConfigurationRegistry.is_config_key_available("ENABLE_HIGHLIGHT_API_DEPLOYMENT"): + if ConfigurationRegistry.is_config_key_available( + "ENABLE_HIGHLIGHT_API_DEPLOYMENT" + ): from configuration.models import Configuration enable_highlight = Configuration.get_value_by_organization( diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index eacbbcdeb9..47e0a19f6e 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1324,6 +1324,7 @@ def _execute_single_prompt( """Execute one prompt: variable replacement, retrieval, LLM, post-process.""" from executor.executors.constants import PromptServiceConstants as PSKeys from executor.executors.constants import RetrievalStrategy + from unstract.sdk1.utils.indexing import IndexingUtils prompt_name = output[PSKeys.NAME] @@ -1586,7 +1587,9 @@ def _run_table_extraction( if table_result.success: structured_output[prompt_name] = table_result.data.get("output", "") table_metrics = table_result.data.get("metadata", {}).get("metrics", {}) - metrics.setdefault(prompt_name, {}).update({"table_extraction": table_metrics}) + metrics.setdefault(prompt_name, {}).update( + {"table_extraction": table_metrics} + ) shim.stream_log(f"Table extraction completed for: {prompt_name}") logger.info("TABLE extraction completed: prompt=%s", prompt_name) else: From 807e405b14ecf016530ea5f6698df6b17517b6a4 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 01:35:38 +0530 Subject: [PATCH 58/64] UN-3266 fix: rename UsageHelper params to lowercase (N803) Co-Authored-By: Claude Sonnet 4.6 --- backend/api_v2/deployment_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 4cb41597cd..94727fa4dd 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -300,7 +300,7 @@ def execute_workflow( @staticmethod def _enrich_item_inner_metadata( - item: dict, file_exec_id: str, UsageHelper: Any + item: dict, file_exec_id: str, usage_helper: Any ) -> None: """Inject per-model usage breakdown into item['result']['metadata'].""" inner_result = item.get("result") @@ -309,19 +309,19 @@ def _enrich_item_inner_metadata( metadata = inner_result.get("metadata") if not isinstance(metadata, dict): return - usage_by_model = UsageHelper.get_usage_by_model(file_exec_id) + usage_by_model = usage_helper.get_usage_by_model(file_exec_id) if usage_by_model: metadata.update(usage_by_model) @staticmethod def _enrich_item_top_metadata( - item: dict, file_exec_id: str, UsageHelper: Any + item: dict, file_exec_id: str, usage_helper: Any ) -> None: """Inject aggregated usage totals into item['metadata']['usage'].""" item_metadata = item.get("metadata") if not isinstance(item_metadata, dict): return - aggregated = UsageHelper.get_aggregated_token_count(file_exec_id) + aggregated = usage_helper.get_aggregated_token_count(file_exec_id) if aggregated: aggregated["file_execution_id"] = file_exec_id item_metadata["usage"] = aggregated From 9bdb3f5a121c06d67ab3e8dba2a0c4e626a5f676 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 01:45:20 +0530 Subject: [PATCH 59/64] UN-3266 fix: resolve remaining SonarCloud issues from check run 66691002192 - Add @staticmethod to _sanitize_null_values (fixes S2325 missing self) - Reduce _execute_single_prompt params from 25 to 11 (S107) by grouping services as deps tuple and extracting exec params from context.executor_params - Add NOSONAR suppression for raise exc in test helper (S112) Co-Authored-By: Claude Sonnet 4.6 --- unstract/sdk1/tests/test_execution.py | 2 +- workers/executor/executors/legacy_executor.py | 65 +++++++++---------- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index b8e94335ee..a1e14f2ae7 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -474,7 +474,7 @@ def name(self) -> str: return executor_name def execute(self, context: ExecutionContext) -> ExecutionResult: - raise exc + raise exc # NOSONAR _FailExecutor.__name__ = f"{executor_name.title()}FailExecutor" _FailExecutor.__qualname__ = _FailExecutor.__name__ diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 47e0a19f6e..677bebb01a 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -984,6 +984,7 @@ def _sanitize_dict_values(d: dict[str, Any]) -> None: if isinstance(v, str) and v.lower() == "na": d[k] = None + @staticmethod def _sanitize_null_values( structured_output: dict[str, Any], ) -> dict[str, Any]: @@ -1131,6 +1132,14 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: ) # ---- Process each prompt ------------------------------------------- + _deps = ( + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) for output in prompts: self._execute_single_prompt( output=output, @@ -1140,24 +1149,9 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: metrics=metrics, variable_names=variable_names, context_retrieval_metrics=context_retrieval_metrics, - answer_prompt_svc=answer_prompt_svc, - retrieval_svc=retrieval_svc, - variable_replacement_svc=variable_replacement_svc, - llm_cls=llm_cls, - embedding_compat_cls=embedding_compat_cls, - vector_db_cls=vector_db_cls, + deps=_deps, tool_settings=tool_settings, process_text_fn=process_text_fn, - run_id=run_id, - execution_id=execution_id, - file_hash=file_hash, - file_path=file_path, - doc_name=doc_name, - log_events_id=log_events_id, - tool_id=tool_id, - custom_data=custom_data, - execution_source=execution_source, - platform_api_key=platform_api_key, ) pipeline_shim.stream_log(f"All {len(prompts)} prompts processed successfully") @@ -1302,31 +1296,36 @@ def _execute_single_prompt( metrics: dict[str, Any], variable_names: list[str], context_retrieval_metrics: dict[str, Any], - answer_prompt_svc: Any, - retrieval_svc: Any, - variable_replacement_svc: Any, - llm_cls: Any, - embedding_compat_cls: Any, - vector_db_cls: Any, + deps: tuple, tool_settings: dict[str, Any], process_text_fn: Any, - run_id: str, - execution_id: str, - file_hash: Any, - file_path: str, - doc_name: str, - log_events_id: str, - tool_id: str, - custom_data: dict[str, Any], - execution_source: str, - platform_api_key: str, ) -> None: """Execute one prompt: variable replacement, retrieval, LLM, post-process.""" from executor.executors.constants import PromptServiceConstants as PSKeys from executor.executors.constants import RetrievalStrategy - from unstract.sdk1.utils.indexing import IndexingUtils + ( + answer_prompt_svc, + retrieval_svc, + variable_replacement_svc, + llm_cls, + embedding_compat_cls, + vector_db_cls, + ) = deps + + params = context.executor_params + run_id = context.run_id + execution_id = params.get(PSKeys.EXECUTION_ID, "") + file_hash = params.get(PSKeys.FILE_HASH) + file_path = params.get(PSKeys.FILE_PATH) + doc_name = str(params.get(PSKeys.FILE_NAME, "")) + log_events_id = params.get(PSKeys.LOG_EVENTS_ID, "") + tool_id = params.get(PSKeys.TOOL_ID, "") + custom_data = params.get(PSKeys.CUSTOM_DATA, {}) + execution_source = params.get(PSKeys.EXECUTION_SOURCE, context.execution_source) + platform_api_key = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") + prompt_name = output[PSKeys.NAME] prompt_text = output[PSKeys.PROMPT] chunk_size = output[PSKeys.CHUNK_SIZE] From 18eafe914626c2e36a94b3d3488a9af8a793a3a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 20:15:55 +0000 Subject: [PATCH 60/64] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/executor/executors/legacy_executor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index 677bebb01a..e2de0b3dee 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1303,6 +1303,7 @@ def _execute_single_prompt( """Execute one prompt: variable replacement, retrieval, LLM, post-process.""" from executor.executors.constants import PromptServiceConstants as PSKeys from executor.executors.constants import RetrievalStrategy + from unstract.sdk1.utils.indexing import IndexingUtils ( From 7a01a355bef720bb8ee211fa10e2c4aa0b097e00 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 01:50:21 +0530 Subject: [PATCH 61/64] UN-3266 fix: remove unused locals in _handle_answer_prompt (F841) execution_id, file_hash, log_events_id, custom_data are now extracted inside _execute_single_prompt from context.executor_params. Co-Authored-By: Claude Sonnet 4.6 --- workers/executor/executors/legacy_executor.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index e2de0b3dee..dd3a502d06 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -1026,12 +1026,8 @@ def _handle_answer_prompt(self, context: ExecutionContext) -> ExecutionResult: prompts = params.get(PSKeys.OUTPUTS, []) tool_id: str = params.get(PSKeys.TOOL_ID, "") run_id: str = context.run_id - execution_id: str = params.get(PSKeys.EXECUTION_ID, "") - file_hash = params.get(PSKeys.FILE_HASH) file_path = params.get(PSKeys.FILE_PATH) doc_name = str(params.get(PSKeys.FILE_NAME, "")) - log_events_id: str = params.get(PSKeys.LOG_EVENTS_ID, "") - custom_data: dict[str, Any] = params.get(PSKeys.CUSTOM_DATA, {}) execution_source = params.get(PSKeys.EXECUTION_SOURCE, context.execution_source) platform_api_key: str = params.get(PSKeys.PLATFORM_SERVICE_API_KEY, "") From e3ca0c6b5a98e7029312a4cd88293a8fc34da205 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 14:21:24 +0530 Subject: [PATCH 62/64] fix: resolve Biome linting errors in frontend source files Auto-fixed 48 lint errors across 56 files: import ordering, block statements, unused variable prefixing, and formatting issues. Co-Authored-By: Claude Opus 4.6 --- frontend/src/App.jsx | 4 ++- .../src/components/agency/agency/Agency.jsx | 6 ++-- .../ConfigureConnectorModal.jsx | 10 ++++-- .../markdown-renderer/MarkdownRenderer.jsx | 4 ++- .../components/common/PromptStudioModal.jsx | 4 ++- .../add-llm-profile/AddLlmProfile.jsx | 2 +- .../combined-output/CombinedOutput.jsx | 8 +++-- .../CustomDataSettings.jsx | 4 ++- .../document-parser/DocumentParser.jsx | 4 ++- .../components/custom-tools/header/Header.jsx | 6 ++-- .../custom-tools/import-tool/ImportTool.jsx | 2 +- .../list-of-tools/ListOfTools.jsx | 4 +-- .../manage-llm-profiles/ManageLlmProfiles.jsx | 4 +-- .../custom-tools/notes-card/NotesCard.jsx | 3 +- .../output-analyzer/OutputAnalyzer.jsx | 4 ++- .../output-analyzer/OutputAnalyzerCard.jsx | 8 +++-- .../prompt-card/DisplayPromptResult.jsx | 12 +++++-- .../prompt-card/OutputForIndex.jsx | 8 +++-- .../custom-tools/prompt-card/PromptCard.jsx | 9 ++++-- .../prompt-card/PromptCardItems.jsx | 12 +++++-- .../custom-tools/prompt-card/PromptOutput.jsx | 5 +-- .../custom-tools/prompt-card/PromptRun.jsx | 10 ++++-- .../prompts-reorder/DraggablePrompt.jsx | 8 +++-- .../prompts-reorder/PromptsReorder.jsx | 8 +++-- .../RetrievalStrategyModal.jsx | 24 +++++++++----- .../custom-tools/tool-ide/ToolIde.jsx | 2 +- .../custom-tools/tools-main/ToolsMain.jsx | 2 +- .../tools-main/ToolsMainActionBtns.jsx | 2 +- .../CreateApiDeploymentFromPromptStudio.jsx | 6 ++-- .../components/helpers/auth/RequireAuth.js | 4 +-- .../components/helpers/auth/RequireGuest.js | 4 +-- .../helpers/socket-messages/SocketMessages.js | 12 +++++-- .../input-output/add-source/AddSource.jsx | 4 ++- .../input-output/configure-ds/ConfigureDs.jsx | 14 ++++++--- .../data-source-card/DataSourceCard.jsx | 2 +- .../list-of-sources/ListOfSources.jsx | 4 ++- .../input-output/manage-files/ManageFiles.jsx | 16 +++++++--- .../DisplayLogsAndNotifications.jsx | 4 ++- .../metrics-dashboard/RecentActivity.jsx | 4 ++- .../file-history-modal/FileHistoryModal.jsx | 8 +++-- frontend/src/components/set-org/SetOrg.jsx | 2 +- .../settings/default-triad/DefaultTriad.jsx | 2 +- .../settings/invite/InviteEditUser.jsx | 2 +- .../settings/platform/PlatformSettings.jsx | 2 +- .../src/components/settings/users/Users.jsx | 2 +- .../tool-settings/ToolSettings.jsx | 2 +- frontend/src/hooks/usePromptOutput.js | 3 +- frontend/src/hooks/usePromptRun.js | 12 +++++-- frontend/src/hooks/usePromptStudioSocket.js | 31 ++++++++++++------- frontend/src/hooks/useRequestUrl.js | 4 ++- .../CustomObjectFieldTemplate.jsx | 4 ++- frontend/src/store/alert-store.js | 4 ++- frontend/src/store/prompt-run-queue-store.js | 4 ++- frontend/src/store/prompt-studio-store.js | 4 ++- .../src/store/retrieval-strategies-store.js | 4 ++- frontend/src/store/workflow-store.js | 2 +- 56 files changed, 234 insertions(+), 111 deletions(-) diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index f534442a8e..2a21fd639c 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -49,7 +49,9 @@ function App() { ); useEffect(() => { - if (!alertDetails?.content) return; + if (!alertDetails?.content) { + return; + } notificationAPI.open({ message: alertDetails?.title, diff --git a/frontend/src/components/agency/agency/Agency.jsx b/frontend/src/components/agency/agency/Agency.jsx index 966b2e4fd0..5fd74c8f36 100644 --- a/frontend/src/components/agency/agency/Agency.jsx +++ b/frontend/src/components/agency/agency/Agency.jsx @@ -368,7 +368,7 @@ function Agency() { if (!signal?.aborted) { setDeploymentInfo(deploymentInfo); } - } catch (err) { + } catch (_err) { // Don't show alert for this as it's not critical // Also check if error is due to abort if (signal?.aborted) { @@ -433,7 +433,7 @@ function Agency() { info: `Clicked on 'Deploy as ${deployType}' button`, workflow_name: projectName, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -869,7 +869,7 @@ function Agency() { info: "Clicked on 'Run Workflow' button (Normal Execution)", }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } const workflowId = details?.id; diff --git a/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx b/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx index c55f3cdf4e..86593e00ba 100644 --- a/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx +++ b/frontend/src/components/agency/configure-connector-modal/ConfigureConnectorModal.jsx @@ -199,7 +199,7 @@ function ConfigureConnectorModal({ connector_name: selectedConnector.connector.connector_name, }, ); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } @@ -232,7 +232,9 @@ function ConfigureConnectorModal({ }; const handleAddFolder = () => { - if (!selectedFolderPath) return; + if (!selectedFolderPath) { + return; + } // HACK: For GDrive connectors, strip the "root/" prefix to avoid duplication // since backend will add it back during execution. This helps avoid a migration @@ -482,7 +484,9 @@ function ConfigureConnectorModal({ // Helper function to render connector label const renderConnectorLabel = (connDetails, availableConnectors) => { - if (!connDetails?.id) return undefined; + if (!connDetails?.id) { + return undefined; + } const selectedConnector = availableConnectors.find( (conn) => conn.value === connDetails.id, diff --git a/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx b/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx index 7a6bbee0bb..20655d3d75 100644 --- a/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx +++ b/frontend/src/components/agency/markdown-renderer/MarkdownRenderer.jsx @@ -4,7 +4,9 @@ import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; const MarkdownRenderer = memo(({ markdownText }) => { - if (!markdownText) return null; + if (!markdownText) { + return null; + } return ( {markdownText} diff --git a/frontend/src/components/common/PromptStudioModal.jsx b/frontend/src/components/common/PromptStudioModal.jsx index 606ac0b7bc..503399ec0d 100644 --- a/frontend/src/components/common/PromptStudioModal.jsx +++ b/frontend/src/components/common/PromptStudioModal.jsx @@ -10,7 +10,9 @@ export function PromptStudioModal({ onClose, showModal }) { const { sessionDetails } = useSessionStore(); const handleClose = () => { - if (onClose) onClose(); + if (onClose) { + onClose(); + } }; const handleCreateClick = () => { diff --git a/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx b/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx index 4cfe0c2aba..a020540b26 100644 --- a/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx +++ b/frontend/src/components/custom-tools/add-llm-profile/AddLlmProfile.jsx @@ -330,7 +330,7 @@ function AddLlmProfile({ setPostHogCustomEvent("intent_success_ps_new_llm_profile", { info: "Clicked on 'Add' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } diff --git a/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx b/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx index 2f4ea38825..3036e7b9d6 100644 --- a/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx +++ b/frontend/src/components/custom-tools/combined-output/CombinedOutput.jsx @@ -74,7 +74,9 @@ function CombinedOutput({ docId, setFilledFields, selectedPrompts }) { const handleException = useExceptionHandler(); useEffect(() => { - if (isSimplePromptStudio) return; + if (isSimplePromptStudio) { + return; + } const fetchAdapterInfo = async () => { let url = `/api/v1/unstract/${sessionDetails?.orgId}/adapter/?adapter_type=LLM`; @@ -101,7 +103,9 @@ function CombinedOutput({ docId, setFilledFields, selectedPrompts }) { }, [singlePassExtractMode]); useEffect(() => { - if (!docId || isSinglePassExtractLoading) return; + if (!docId || isSinglePassExtractLoading) { + return; + } const fetchCombinedOutput = async () => { setIsOutputLoading(true); diff --git a/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx b/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx index b0c9842a32..5142dd2b1e 100644 --- a/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx +++ b/frontend/src/components/custom-tools/custom-data-settings/CustomDataSettings.jsx @@ -22,7 +22,9 @@ const CUSTOM_DATA_VARIABLE_REGEX = /\{\{custom_data\.([a-zA-Z0-9_.]+)\}\}/g; // Helper function to extract all custom_data variables from text const extractCustomDataVariables = (text) => { const variables = []; - if (!text) return variables; + if (!text) { + return variables; + } const matches = text.matchAll(CUSTOM_DATA_VARIABLE_REGEX); for (const match of matches) { diff --git a/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx b/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx index 3e633cba81..3d5c891e13 100644 --- a/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx +++ b/frontend/src/components/custom-tools/document-parser/DocumentParser.jsx @@ -211,7 +211,9 @@ function DocumentParser({ const getPromptOutputs = (promptId) => { const keys = Object.keys(promptOutputs || {}); - if (!keys?.length) return {}; + if (!keys?.length) { + return {}; + } const outputs = {}; keys.forEach((key) => { diff --git a/frontend/src/components/custom-tools/header/Header.jsx b/frontend/src/components/custom-tools/header/Header.jsx index 84718ee876..221b7f0fe5 100644 --- a/frontend/src/components/custom-tools/header/Header.jsx +++ b/frontend/src/components/custom-tools/header/Header.jsx @@ -133,7 +133,7 @@ function Header({ info: `Clicked on the 'Export' button`, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -199,7 +199,7 @@ function Header({ tool_id: details?.tool_id, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -260,7 +260,7 @@ function Header({ tool_id: details?.tool_id, tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/import-tool/ImportTool.jsx b/frontend/src/components/custom-tools/import-tool/ImportTool.jsx index 7773c00ea6..e0d972cd99 100644 --- a/frontend/src/components/custom-tools/import-tool/ImportTool.jsx +++ b/frontend/src/components/custom-tools/import-tool/ImportTool.jsx @@ -48,7 +48,7 @@ function ImportTool({ open, setOpen, onImport, loading }) { setProjectData(projectData); setShowAdapterSelection(true); setParseLoading(false); - } catch (error) { + } catch (_error) { message.error("Invalid JSON file"); setParseLoading(false); } diff --git a/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx b/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx index 6978d4e2bb..425e148c99 100644 --- a/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx +++ b/frontend/src/components/custom-tools/list-of-tools/ListOfTools.jsx @@ -208,7 +208,7 @@ function ListOfTools() { setPostHogCustomEvent("intent_new_ps_project", { info: "Clicked on '+ New Project' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -219,7 +219,7 @@ function ListOfTools() { info: "Importing project from projects list", file_name: file.name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx b/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx index d3744584f7..853f8d7803 100644 --- a/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx +++ b/frontend/src/components/custom-tools/manage-llm-profiles/ManageLlmProfiles.jsx @@ -77,7 +77,7 @@ function ManageLlmProfiles() { setPostHogCustomEvent("ps_profile_changed_per_prompt", { info: "Selected default LLM profile", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -178,7 +178,7 @@ function ManageLlmProfiles() { setPostHogCustomEvent("intent_ps_new_llm_profile", { info: "Clicked on 'Add New LLM Profile' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/custom-tools/notes-card/NotesCard.jsx b/frontend/src/components/custom-tools/notes-card/NotesCard.jsx index 38a03f0a9c..e2ddaa4992 100644 --- a/frontend/src/components/custom-tools/notes-card/NotesCard.jsx +++ b/frontend/src/components/custom-tools/notes-card/NotesCard.jsx @@ -38,8 +38,9 @@ function NotesCard({ if ( isPromptDetailsStateUpdated || !Object.keys(promptDetails || {})?.length - ) + ) { return; + } setPromptDetailsState(promptDetails); setIsPromptDetailsStateUpdated(true); }, [promptDetails]); diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx index 334391d2c3..7922a1a0fe 100644 --- a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx +++ b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzer.jsx @@ -48,7 +48,9 @@ function OutputAnalyzer() { }, []); const currentDoc = useMemo(() => { - if (currentDocIndex === -1) return null; + if (currentDocIndex === -1) { + return null; + } return listOfDocs[currentDocIndex]; }, [listOfDocs, currentDocIndex]); diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx index 727701139f..814a317b35 100644 --- a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx +++ b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx @@ -37,7 +37,9 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) { // Memoize the file URL endpoint to prevent unnecessary recalculations const fileUrlEndpoint = useMemo(() => { - if (!doc) return null; + if (!doc) { + return null; + } if (isPublicSource) { return publicDocumentApi?.(id, doc.document_id, null); @@ -72,7 +74,9 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) { // Calculate fill rate const fillRate = useMemo(() => { - if (totalFields === 0) return "0"; + if (totalFields === 0) { + return "0"; + } return ((filledFields / totalFields) * 100).toFixed(2); }, [filledFields, totalFields]); diff --git a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx index 17006246b3..11fb9cf1fb 100644 --- a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx +++ b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx @@ -95,7 +95,9 @@ function DisplayPromptResult({ // Extract confidence from 5th element of highlight data coordinate arrays const extractConfidenceFromHighlightData = (data) => { - if (!data) return null; + if (!data) { + return null; + } const confidenceValues = []; @@ -147,11 +149,15 @@ function DisplayPromptResult({ details?.enable_highlight && details?.enable_word_confidence; const getNestedValue = (obj, path) => { - if (!obj || !path) return undefined; + if (!obj || !path) { + return undefined; + } const normalized = path.replace(/\[(\d+)\]/g, ".$1"); const parts = normalized.split(".").filter((p) => p !== ""); return parts.reduce((acc, part) => { - if (acc === undefined || acc === null) return undefined; + if (acc === undefined || acc === null) { + return undefined; + } const maybeIndex = /^\d+$/.test(part) ? Number(part) : part; return acc[maybeIndex]; }, obj); diff --git a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx index a1d59fa114..0a3dbc891f 100644 --- a/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx +++ b/frontend/src/components/custom-tools/prompt-card/OutputForIndex.jsx @@ -83,14 +83,18 @@ function OutputForIndex({ chunkData, setIsIndexOpen, isIndexOpen }) { }; const renderHighlightedLine = (line, lineIndex, chunkIndex) => { - if (!searchTerm) return line; + if (!searchTerm) { + return line; + } const matchesInLine = highlightedChunks.filter( (chunk) => chunk.lineIndex === lineIndex && chunk.chunkIndex === chunkIndex, ); - if (!matchesInLine?.length) return line; + if (!matchesInLine?.length) { + return line; + } const parts = []; let lastIndex = 0; diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx index 8c481d4c9c..68485f306d 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx @@ -61,8 +61,9 @@ const PromptCard = memo( if ( isPromptDetailsStateUpdated || !Object.keys(promptDetails || {})?.length - ) + ) { return; + } setPromptDetailsState(promptDetails); setIsPromptDetailsStateUpdated(true); }, [promptDetails]); @@ -203,7 +204,9 @@ const PromptCard = memo( }; const flattenHighlightData = (data) => { - if (!data || typeof data !== "object") return data; + if (!data || typeof data !== "object") { + return data; + } const flattened = []; Object.values(data).forEach((value) => { @@ -256,7 +259,7 @@ const PromptCard = memo( setPostHogCustomEvent("ps_prompt_run", { info: "Click on 'Run Prompt' button (Multi Pass)", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx index 18cb947acb..90f4936a3a 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx @@ -113,7 +113,9 @@ function PromptCardItems({ if (adapter) { result.conf[key.label] = adapter?.model || adapter?.adapter_id?.split("|")[0]; - if (adapter?.adapter_type === "LLM") result.icon = adapter?.icon; + if (adapter?.adapter_type === "LLM") { + result.icon = adapter?.icon; + } result.conf["Profile Name"] = profile?.profile_name; } }); @@ -163,8 +165,12 @@ function PromptCardItems({ isDefault: profile?.profile_id === selectedLlmProfileId, })) .sort((a, b) => { - if (a?.isDefault) return -1; // Default profile comes first - if (b?.isDefault) return 1; + if (a?.isDefault) { + return -1; // Default profile comes first + } + if (b?.isDefault) { + return 1; + } return 0; }), ); diff --git a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx index debc1df762..901fc8b71c 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx @@ -95,9 +95,9 @@ function PromptOutput({ ); const handleTable = (profileId, promptOutputData) => { - if (tableSettings?.document_type !== "rent_rolls") + if (tableSettings?.document_type !== "rent_rolls") { return ; - else + } else { return ( <> ); + } }; const getColSpan = () => (componentWidth < 1200 ? 24 : 6); diff --git a/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx b/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx index 48a8369140..2f2c5659c3 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptRun.jsx @@ -1,9 +1,9 @@ import Cookies from "js-cookie"; import { useEffect } from "react"; -import { usePromptRunQueueStore } from "../../../store/prompt-run-queue-store"; import usePromptRun from "../../../hooks/usePromptRun"; import usePromptStudioSocket from "../../../hooks/usePromptStudioSocket"; import { useCustomToolStore } from "../../../store/custom-tool-store"; +import { usePromptRunQueueStore } from "../../../store/prompt-run-queue-store"; import { usePromptRunStatusStore } from "../../../store/prompt-run-status-store"; const MAX_ACTIVE_APIS = 5; @@ -39,7 +39,9 @@ function PromptRun() { // Setup the beforeunload event handler to store queue in cookies const handleBeforeUnload = () => { - if (!PROMPT_RUN_STATE_PERSISTENCE) return; + if (!PROMPT_RUN_STATE_PERSISTENCE) { + return; + } const { queue } = usePromptRunQueueStore.getState(); // Get the latest state dynamically if (queue?.length) { Cookies.set("promptRunQueue", JSON.stringify(queue), { @@ -56,7 +58,9 @@ function PromptRun() { }, [syncPromptRunApisAndStatus]); useEffect(() => { - if (!queue?.length || activeApis >= MAX_ACTIVE_APIS) return; + if (!queue?.length || activeApis >= MAX_ACTIVE_APIS) { + return; + } const canRunApis = MAX_ACTIVE_APIS - activeApis; const apisToRun = queue.slice(0, canRunApis); diff --git a/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx b/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx index c845217c08..e0a8ddea0d 100644 --- a/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx +++ b/frontend/src/components/custom-tools/prompts-reorder/DraggablePrompt.jsx @@ -14,12 +14,16 @@ function DraggablePrompt({ prompt, index, movePrompt, onDrop, cancelDrag }) { const [{ handlerId }, drop] = useDrop({ accept: ItemTypes.PROMPT, hover: (item, monitor) => { - if (!ref.current) return; + if (!ref.current) { + return; + } const dragIndex = item.index; const hoverIndex = index; - if (dragIndex === hoverIndex) return; + if (dragIndex === hoverIndex) { + return; + } // Move the item visually during drag movePrompt(dragIndex, hoverIndex); diff --git a/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx b/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx index d2e2d24a73..5947453ce2 100644 --- a/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx +++ b/frontend/src/components/custom-tools/prompts-reorder/PromptsReorder.jsx @@ -39,7 +39,9 @@ function PromptsReorder({ isOpen, updateReorderedStatus }) { const movePrompt = useCallback( (fromIndex, toIndex) => { - if (fromIndex === toIndex) return; + if (fromIndex === toIndex) { + return; + } // Store the previous state if not already stored if (!previousListOfPrompts.current?.length) { @@ -92,7 +94,9 @@ function PromptsReorder({ isOpen, updateReorderedStatus }) { const onDrop = useCallback( async (fromIndex, toIndex) => { - if (fromIndex === toIndex) return; + if (fromIndex === toIndex) { + return; + } updateReorderedStatus(true); diff --git a/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx b/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx index 3652b1ece1..1549b5c13c 100644 --- a/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx +++ b/frontend/src/components/custom-tools/retrieval-strategy-modal/RetrievalStrategyModal.jsx @@ -87,26 +87,34 @@ const RetrievalStrategyModal = ({ }; const getTokenUsageClassName = (usage) => { - if (usage.includes("Low")) + if (usage.includes("Low")) { return "retrieval-strategy-modal__token-usage-low"; - if (usage.includes("Medium")) + } + if (usage.includes("Medium")) { return "retrieval-strategy-modal__token-usage-medium"; - if (usage.includes("Very High")) + } + if (usage.includes("Very High")) { return "retrieval-strategy-modal__token-usage-high"; - if (usage.includes("High")) + } + if (usage.includes("High")) { return "retrieval-strategy-modal__token-usage-high"; + } return ""; }; const getCostImpactClassName = (impact) => { - if (impact.includes("Low")) + if (impact.includes("Low")) { return "retrieval-strategy-modal__cost-impact-low"; - if (impact.includes("Medium")) + } + if (impact.includes("Medium")) { return "retrieval-strategy-modal__cost-impact-medium"; - if (impact.includes("Very High")) + } + if (impact.includes("Very High")) { return "retrieval-strategy-modal__cost-impact-high"; - if (impact.includes("High")) + } + if (impact.includes("High")) { return "retrieval-strategy-modal__cost-impact-high"; + } return ""; }; diff --git a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx index bea648637a..db17199928 100644 --- a/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx +++ b/frontend/src/components/custom-tools/tool-ide/ToolIde.jsx @@ -221,7 +221,7 @@ function ToolIde() { info: "Exported from reminder bar", tool_name: details?.tool_name, }); - } catch (err) { + } catch (_err) { // Ignore posthog errors } } catch (err) { diff --git a/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx b/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx index c9bab1b24f..5d724f6f85 100644 --- a/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx +++ b/frontend/src/components/custom-tools/tools-main/ToolsMain.jsx @@ -121,7 +121,7 @@ function ToolsMain() { setPostHogCustomEvent("ps_prompt_added", { info: `Clicked on + ${type} button`, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx b/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx index dd9cc1020a..c7053ac921 100644 --- a/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx +++ b/frontend/src/components/custom-tools/tools-main/ToolsMainActionBtns.jsx @@ -77,7 +77,7 @@ function ToolsMainActionBtns() { setPostHogCustomEvent("ps_output_analyser_seen", { info: "Clicked on 'Output Analyzer' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom PostHog event, ignore it and continue } }, [navigate, setPostHogCustomEvent]); diff --git a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx index 7ff2018cf6..8fee98b5e7 100644 --- a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx +++ b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx @@ -95,7 +95,9 @@ const CreateApiDeploymentFromPromptStudio = ({ }, [open, toolDetails, form]); const fetchToolFunctionName = async () => { - if (!toolDetails?.tool_id) return; + if (!toolDetails?.tool_id) { + return; + } try { // Fetch tool list to find the function name for this tool_id @@ -318,7 +320,7 @@ const CreateApiDeploymentFromPromptStudio = ({ tool_name: toolDetails?.tool_name, deployment_name: deploymentDetails.api_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/helpers/auth/RequireAuth.js b/frontend/src/components/helpers/auth/RequireAuth.js index 22244f147f..a856c2e03d 100644 --- a/frontend/src/components/helpers/auth/RequireAuth.js +++ b/frontend/src/components/helpers/auth/RequireAuth.js @@ -42,7 +42,7 @@ const RequireAuth = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "llm-whisperer"; - } catch (error) { + } catch (_error) { // Do nothing } try { @@ -50,7 +50,7 @@ const RequireAuth = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "verticals"; - } catch (error) { + } catch (_error) { // Do nothing } diff --git a/frontend/src/components/helpers/auth/RequireGuest.js b/frontend/src/components/helpers/auth/RequireGuest.js index 7668d5a006..f0cec7ef7c 100644 --- a/frontend/src/components/helpers/auth/RequireGuest.js +++ b/frontend/src/components/helpers/auth/RequireGuest.js @@ -28,7 +28,7 @@ const RequireGuest = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "llm-whisperer"; - } catch (error) { + } catch (_error) { // Do nothing } try { @@ -36,7 +36,7 @@ const RequireGuest = () => { selectedProductStore.useSelectedProductStore( (state) => state?.selectedProduct, ) === "verticals"; - } catch (error) { + } catch (_error) { // Do nothing } diff --git a/frontend/src/components/helpers/socket-messages/SocketMessages.js b/frontend/src/components/helpers/socket-messages/SocketMessages.js index 36cb9b9d60..93d648fedd 100644 --- a/frontend/src/components/helpers/socket-messages/SocketMessages.js +++ b/frontend/src/components/helpers/socket-messages/SocketMessages.js @@ -47,7 +47,9 @@ function SocketMessages() { const logMessagesThrottledUpdate = useMemo( () => throttle((logsBatch) => { - if (!logsBatch.length) return; + if (!logsBatch.length) { + return; + } pushLogMessages(logsBatch); logBufferRef.current = []; }, THROTTLE_DELAY), @@ -111,7 +113,9 @@ function SocketMessages() { // Subscribe/unsubscribe to the socket channel useEffect(() => { - if (!logId) return; + if (!logId) { + return; + } const channel = `logs:${logId}`; socket.on(channel, onMessage); @@ -122,7 +126,9 @@ function SocketMessages() { // Process staged messages sequentially useEffect(() => { - if (pointer > stagedMessages?.length - 1) return; + if (pointer > stagedMessages?.length - 1) { + return; + } const stagedMsg = stagedMessages[pointer]; const timer = setTimeout(() => { diff --git a/frontend/src/components/input-output/add-source/AddSource.jsx b/frontend/src/components/input-output/add-source/AddSource.jsx index 8f0f695c86..9cdd757b8b 100644 --- a/frontend/src/components/input-output/add-source/AddSource.jsx +++ b/frontend/src/components/input-output/add-source/AddSource.jsx @@ -87,7 +87,9 @@ function AddSource({ ]); useEffect(() => { - if (!isLLMWPaidSchema || !transformLlmWhispererFormData) return; + if (!isLLMWPaidSchema || !transformLlmWhispererFormData) { + return; + } const modifiedFormData = transformLlmWhispererFormData(formData); diff --git a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx index d6953d859e..0c36664cb2 100644 --- a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx +++ b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx @@ -60,7 +60,9 @@ function ConfigureDs({ // Determine if OAuth authentication method is selected const isOAuthMethodSelected = () => { - if (!oAuthProvider?.length) return false; + if (!oAuthProvider?.length) { + return false; + } // Check if auth_type is set to a non-OAuth value const data = formData || {}; // If auth_type exists and is not "oauth", then OAuth is not selected @@ -116,7 +118,9 @@ function ConfigureDs({ }, [formData]); useEffect(() => { - if (!metadata) return; + if (!metadata) { + return; + } setFormData(metadata); }, [selectedSourceId, metadata, setFormData]); @@ -203,7 +207,7 @@ function ConfigureDs({ setPostHogCustomEvent(posthogTcEventText[type], { info: `Test connection was triggered: ${selectedSourceName}`, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } @@ -286,7 +290,7 @@ function ConfigureDs({ connector_name: selectedSourceName, }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } else { @@ -306,7 +310,7 @@ function ConfigureDs({ info: "Clicked on 'Submit' button", adpater_name: selectedSourceName, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } } diff --git a/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx b/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx index 899d5eb187..7452ec694b 100644 --- a/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx +++ b/frontend/src/components/input-output/data-source-card/DataSourceCard.jsx @@ -19,7 +19,7 @@ function DataSourceCard({ srcDetails, setSelectedSourceId, type }) { info: "Clicked on the adapters card", adapter_name: srcDetails?.name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx b/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx index 888e716698..c4bee8252d 100644 --- a/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx +++ b/frontend/src/components/input-output/list-of-sources/ListOfSources.jsx @@ -47,7 +47,9 @@ function ListOfSources({ }, 300); const renderModeFilters = () => { - if (!isConnector || connectorMode) return null; + if (!isConnector || connectorMode) { + return null; + } return ( { setFiles([]); setError(""); - if (!selectedConnector) return; + if (!selectedConnector) { + return; + } setLoadingData(true); let cancelled = false; inpService .getFileList(selectedConnector) .then((res) => { - if (cancelled) return; + if (cancelled) { + return; + } setFiles(res.data); setError(""); }) .catch((err) => { - if (cancelled) return; + if (cancelled) { + return; + } const errorDetails = handleException(err, "Error loading files"); setError(errorDetails.content); }) .finally(() => { - if (cancelled) return; + if (cancelled) { + return; + } setLoadingData(false); }); return () => { diff --git a/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx b/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx index ffdd75fa63..81fe1e85c0 100644 --- a/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx +++ b/frontend/src/components/logs-and-notifications/DisplayLogsAndNotifications.jsx @@ -108,7 +108,9 @@ export function DisplayLogsAndNotifications() { const onMouseMove = useCallback( (e) => { - if (!draggingRef.current) return; + if (!draggingRef.current) { + return; + } const diff = startYRef.current - e.clientY; const newHeight = startHeightRef.current + diff; const parentHeight = getParentHeight(); diff --git a/frontend/src/components/metrics-dashboard/RecentActivity.jsx b/frontend/src/components/metrics-dashboard/RecentActivity.jsx index f8bbb3864b..f6f1ad5104 100644 --- a/frontend/src/components/metrics-dashboard/RecentActivity.jsx +++ b/frontend/src/components/metrics-dashboard/RecentActivity.jsx @@ -79,7 +79,9 @@ function RecentActivity({ data, loading }) { const orgName = sessionDetails?.orgName; const handleActivityClick = (item) => { - if (!item.execution_id || !orgName) return; + if (!item.execution_id || !orgName) { + return; + } const typeConfig = TYPE_CONFIG[item.type] || TYPE_CONFIG.workflow; navigate(`/${orgName}/logs/${typeConfig.logType}/${item.execution_id}`, { state: { from: "dashboard" }, diff --git a/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx b/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx index 54bdf355aa..6dfbde0d21 100644 --- a/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx +++ b/frontend/src/components/pipelines-or-deployments/file-history-modal/FileHistoryModal.jsx @@ -245,7 +245,9 @@ const FileHistoryModal = ({ open, setOpen, workflowId, workflowName }) => { // Delete selected file histories (bulk delete by IDs) const handleDeleteSelected = async () => { - if (selectedRowKeys.length === 0) return; + if (selectedRowKeys.length === 0) { + return; + } if (selectedRowKeys.length > MAX_BULK_DELETE) { setAlertDetails({ @@ -439,7 +441,9 @@ const FileHistoryModal = ({ open, setOpen, workflowId, workflowName }) => { width: "12%", responsive: ["md"], render: (date) => { - if (!date) return "N/A"; + if (!date) { + return "N/A"; + } return new Date(date).toLocaleString(); }, }, diff --git a/frontend/src/components/set-org/SetOrg.jsx b/frontend/src/components/set-org/SetOrg.jsx index e612d94788..bb2c00943d 100644 --- a/frontend/src/components/set-org/SetOrg.jsx +++ b/frontend/src/components/set-org/SetOrg.jsx @@ -33,7 +33,7 @@ function SetOrg() { if (state === null || signedInOrgId) { navigate("/"); } - } catch (error) { + } catch (_error) { navigate("/"); } finally { setLoading(false); diff --git a/frontend/src/components/settings/default-triad/DefaultTriad.jsx b/frontend/src/components/settings/default-triad/DefaultTriad.jsx index 2bd1b77a08..31cb045df6 100644 --- a/frontend/src/components/settings/default-triad/DefaultTriad.jsx +++ b/frontend/src/components/settings/default-triad/DefaultTriad.jsx @@ -128,7 +128,7 @@ function DefaultTriad() { info: "Selected default triad", adapter_name: adapterType, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/settings/invite/InviteEditUser.jsx b/frontend/src/components/settings/invite/InviteEditUser.jsx index d10c341e17..334426e47a 100644 --- a/frontend/src/components/settings/invite/InviteEditUser.jsx +++ b/frontend/src/components/settings/invite/InviteEditUser.jsx @@ -135,7 +135,7 @@ function InviteEditUser() { ? "Clicked on 'Invite' button" : "Clicked on 'Update' button"; setPostHogCustomEvent("intent_success_add_user", { info }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index c7010fa9db..23ad3109c7 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -126,7 +126,7 @@ function PlatformSettings() { info: "API Key has been generated", }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } diff --git a/frontend/src/components/settings/users/Users.jsx b/frontend/src/components/settings/users/Users.jsx index 17d971ebe9..55efbd5873 100644 --- a/frontend/src/components/settings/users/Users.jsx +++ b/frontend/src/components/settings/users/Users.jsx @@ -176,7 +176,7 @@ function Users() { setPostHogCustomEvent("intent_add_user", { info: "Clicked on '+ Invite User' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx b/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx index 9bf7c0f2c0..cd084c944b 100644 --- a/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx +++ b/frontend/src/components/tool-settings/tool-settings/ToolSettings.jsx @@ -211,7 +211,7 @@ function ToolSettings({ type }) { setPostHogCustomEvent(posthogEventText[type], { info: `Clicked on '+ ${btnText[type]}' button`, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; diff --git a/frontend/src/hooks/usePromptOutput.js b/frontend/src/hooks/usePromptOutput.js index dd5d02e40e..7da4ffe73c 100644 --- a/frontend/src/hooks/usePromptOutput.js +++ b/frontend/src/hooks/usePromptOutput.js @@ -126,8 +126,9 @@ const usePromptOutput = () => { wordConfidenceData: item?.word_confidence_data, }; - if (item?.is_single_pass_extract && isTokenUsageForSinglePassAdded) + if (item?.is_single_pass_extract && isTokenUsageForSinglePassAdded) { return; + } if (item?.is_single_pass_extract) { const tokenUsageId = generatePromptOutputKeyForSinglePass( diff --git a/frontend/src/hooks/usePromptRun.js b/frontend/src/hooks/usePromptRun.js index 85d2a16857..e1127de8c6 100644 --- a/frontend/src/hooks/usePromptRun.js +++ b/frontend/src/hooks/usePromptRun.js @@ -79,7 +79,9 @@ const usePromptRun = () => { }; const runPrompt = (listOfApis) => { - if (!listOfApis?.length) return; + if (!listOfApis?.length) { + return; + } listOfApis.forEach(runPromptApi); }; @@ -190,14 +192,18 @@ const usePromptRun = () => { }; const params = paramsMap[promptRunType]; - if (!params) return; + if (!params) { + return; + } const paramValues = { promptId, profileId, docId }; const missingParams = params.requiredParams.filter( (param) => !paramValues[param], ); - if (missingParams.length > 0) return; + if (missingParams.length > 0) { + return; + } ({ apiRequestsToQueue, promptRunApiStatus } = prepareApiRequests( params.prompts, diff --git a/frontend/src/hooks/usePromptStudioSocket.js b/frontend/src/hooks/usePromptStudioSocket.js index c5ffa3c765..6b12c55b85 100644 --- a/frontend/src/hooks/usePromptStudioSocket.js +++ b/frontend/src/hooks/usePromptStudioSocket.js @@ -1,7 +1,6 @@ -import { useContext, useEffect, useCallback } from "react"; - -import { SocketContext } from "../helpers/SocketContext"; +import { useCallback, useContext, useEffect } from "react"; import { generateApiRunStatusId } from "../helpers/GetStaticData"; +import { SocketContext } from "../helpers/SocketContext"; import { useAlertStore } from "../store/alert-store"; import { useCustomToolStore } from "../store/custom-tool-store"; import { usePromptRunStatusStore } from "../store/prompt-run-status-store"; @@ -28,7 +27,9 @@ const usePromptStudioSocket = () => { const clearResultStatuses = useCallback( (data) => { - if (!Array.isArray(data)) return; + if (!Array.isArray(data)) { + return; + } data.forEach((item) => { const promptId = item?.prompt_id; const docId = item?.document_manager; @@ -39,7 +40,7 @@ const usePromptStudioSocket = () => { } }); }, - [removePromptStatus] + [removePromptStatus], ); const handleCompleted = useCallback( @@ -63,7 +64,9 @@ const usePromptStudioSocket = () => { }); } else if (operation === "index_document") { const docId = result?.document_id; - if (docId) deleteIndexDoc(docId); + if (docId) { + deleteIndexDoc(docId); + } setAlertDetails({ type: "success", content: result?.message || "Document indexed successfully.", @@ -76,7 +79,7 @@ const usePromptStudioSocket = () => { updateCustomTool, setAlertDetails, deleteIndexDoc, - ] + ], ); const handleFailed = useCallback( @@ -89,7 +92,9 @@ const usePromptStudioSocket = () => { updateCustomTool({ isSinglePassExtractLoading: false }); } else if (operation === "index_document") { const docId = extra?.document_id; - if (docId) deleteIndexDoc(docId); + if (docId) { + deleteIndexDoc(docId); + } } // Clear spinner for prompt operations so buttons re-enable @@ -120,7 +125,7 @@ const usePromptStudioSocket = () => { deleteIndexDoc, removePromptStatus, clearPromptStatusById, - ] + ], ); const onResult = useCallback( @@ -136,15 +141,17 @@ const usePromptStudioSocket = () => { } } catch (err) { setAlertDetails( - handleException(err, "Failed to process prompt studio result") + handleException(err, "Failed to process prompt studio result"), ); } }, - [handleCompleted, handleFailed, setAlertDetails, handleException] + [handleCompleted, handleFailed, setAlertDetails, handleException], ); useEffect(() => { - if (!socket) return; + if (!socket) { + return; + } socket.on(PROMPT_STUDIO_RESULT_EVENT, onResult); return () => { diff --git a/frontend/src/hooks/useRequestUrl.js b/frontend/src/hooks/useRequestUrl.js index 4fd39164f3..c96d32ed97 100644 --- a/frontend/src/hooks/useRequestUrl.js +++ b/frontend/src/hooks/useRequestUrl.js @@ -4,7 +4,9 @@ const useRequestUrl = () => { const { sessionDetails } = useSessionStore(); const getUrl = (url) => { - if (!url) return null; + if (!url) { + return null; + } const baseUrl = `/api/v1/unstract/${sessionDetails?.orgId}/`; return baseUrl + url.replace(/^\//, ""); diff --git a/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx b/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx index 150fef56f2..87f07bbc93 100644 --- a/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx +++ b/frontend/src/layouts/rjsf-form-layout/CustomObjectFieldTemplate.jsx @@ -39,7 +39,9 @@ const CustomObjectFieldTemplate = (props) => { {fieldOrder.map((fieldName) => { const property = propertyMap[fieldName]; - if (!property) return null; + if (!property) { + return null; + } // Skip conditional fields when they shouldn't be shown if ( diff --git a/frontend/src/store/alert-store.js b/frontend/src/store/alert-store.js index b77c6600ad..24549e09ce 100644 --- a/frontend/src/store/alert-store.js +++ b/frontend/src/store/alert-store.js @@ -19,7 +19,9 @@ const STORE_VARIABLES = { const useAlertStore = create((setState) => ({ ...STORE_VARIABLES, setAlertDetails: (details) => { - if (!details) return STORE_VARIABLES; + if (!details) { + return STORE_VARIABLES; + } const isErrorType = details?.type === "error"; const isSuccessType = details?.type === "success"; diff --git a/frontend/src/store/prompt-run-queue-store.js b/frontend/src/store/prompt-run-queue-store.js index 35e888e1dc..267a8f2b26 100644 --- a/frontend/src/store/prompt-run-queue-store.js +++ b/frontend/src/store/prompt-run-queue-store.js @@ -24,7 +24,9 @@ const usePromptRunQueueStore = create((setState, getState) => ({ const existingState = { ...getState() }; const newActiveApis = existingState?.activeApis - numOfApis; - if (newActiveApis < 0) return; + if (newActiveApis < 0) { + return; + } setState({ ...existingState, ...{ activeApis: newActiveApis } }); }, diff --git a/frontend/src/store/prompt-studio-store.js b/frontend/src/store/prompt-studio-store.js index 2195586fa6..b5fe7acd0c 100644 --- a/frontend/src/store/prompt-studio-store.js +++ b/frontend/src/store/prompt-studio-store.js @@ -11,7 +11,9 @@ const usePromptStudioStore = create((set, get) => { ...STORE_VARIABLES, fetchCount: async (getPromptStudioCount) => { // Prevent duplicate calls if already loading or if we already have a count - if (get().isLoading || (get().count > 0 && !get().error)) return; + if (get().isLoading || (get().count > 0 && !get().error)) { + return; + } set({ isLoading: true }); try { diff --git a/frontend/src/store/retrieval-strategies-store.js b/frontend/src/store/retrieval-strategies-store.js index 46645b751d..357608ece8 100644 --- a/frontend/src/store/retrieval-strategies-store.js +++ b/frontend/src/store/retrieval-strategies-store.js @@ -31,7 +31,9 @@ const useRetrievalStrategiesStore = create((set, get) => ({ // Check if strategies need to be fetched (cache for 1 hour) shouldFetch: () => { const { strategies, lastFetched } = get(); - if (!strategies) return true; + if (!strategies) { + return true; + } const oneHour = 60 * 60 * 1000; // 1 hour in milliseconds const now = Date.now(); diff --git a/frontend/src/store/workflow-store.js b/frontend/src/store/workflow-store.js index 0627b0b7d7..c68b1b97df 100644 --- a/frontend/src/store/workflow-store.js +++ b/frontend/src/store/workflow-store.js @@ -60,7 +60,7 @@ const useWorkflowStore = create((setState, getState) => ({ setState(() => { return { ...getState(), ...{ existingState } }; }); - } catch (err) { + } catch (_err) { return; } }, From db3d8c27f7f98db45d4c504a8eb6769d46bfb879 Mon Sep 17 00:00:00 2001 From: harini-venkataraman Date: Thu, 12 Mar 2026 17:39:16 +0530 Subject: [PATCH 63/64] fix: replace dynamic import of SharePermission with static import in Workflows Resolves vite build warning about SharePermission.jsx being both dynamically and statically imported across the codebase. Co-Authored-By: Claude Opus 4.6 --- frontend/src/components/workflows/workflow/Workflows.jsx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/frontend/src/components/workflows/workflow/Workflows.jsx b/frontend/src/components/workflows/workflow/Workflows.jsx index 73da9ec981..9b9bcede50 100644 --- a/frontend/src/components/workflows/workflow/Workflows.jsx +++ b/frontend/src/components/workflows/workflow/Workflows.jsx @@ -12,6 +12,7 @@ import { useWorkflowStore } from "../../../store/workflow-store"; import { CustomButton } from "../../widgets/custom-button/CustomButton.jsx"; import { EmptyState } from "../../widgets/empty-state/EmptyState.jsx"; import { LazyLoader } from "../../widgets/lazy-loader/LazyLoader.jsx"; +import { SharePermission } from "../../widgets/share-permission/SharePermission.jsx"; import { SpinnerLoader } from "../../widgets/spinner-loader/SpinnerLoader.jsx"; import "./Workflows.css"; import { useExceptionHandler } from "../../../hooks/useExceptionHandler.jsx"; @@ -384,11 +385,7 @@ function Workflows() { /> )} {shareOpen && selectedWorkflow && ( - - import("../../widgets/share-permission/SharePermission.jsx") - } - componentName={"SharePermission"} + Date: Thu, 12 Mar 2026 18:04:55 +0530 Subject: [PATCH 64/64] fix: resolve SonarCloud warnings in frontend components - Remove unnecessary try-catch around PostHog event calls - Flip negated condition in PromptOutput.handleTable for clarity Co-Authored-By: Claude Opus 4.6 --- .../custom-tools/prompt-card/PromptCard.jsx | 10 ++--- .../custom-tools/prompt-card/PromptOutput.jsx | 5 +-- .../CreateApiDeploymentFromPromptStudio.jsx | 16 +++----- .../input-output/configure-ds/ConfigureDs.jsx | 38 +++++++------------ 4 files changed, 24 insertions(+), 45 deletions(-) diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx index 68485f306d..1687d693bc 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptCard.jsx @@ -255,13 +255,9 @@ const PromptCard = memo( // Generate the result for the currently selected document const handleRun = (promptRunType, promptId, profileId, documentId) => { - try { - setPostHogCustomEvent("ps_prompt_run", { - info: "Click on 'Run Prompt' button (Multi Pass)", - }); - } catch (_err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent("ps_prompt_run", { + info: "Click on 'Run Prompt' button (Multi Pass)", + }); const validateInputs = () => { if (!selectedDoc) { diff --git a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx index 901fc8b71c..88df329744 100644 --- a/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx +++ b/frontend/src/components/custom-tools/prompt-card/PromptOutput.jsx @@ -95,9 +95,7 @@ function PromptOutput({ ); const handleTable = (profileId, promptOutputData) => { - if (tableSettings?.document_type !== "rent_rolls") { - return ; - } else { + if (tableSettings?.document_type === "rent_rolls") { return ( <> ); } + return ; }; const getColSpan = () => (componentWidth < 1200 ? 24 : 6); diff --git a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx index 8fee98b5e7..78b2a0fbbc 100644 --- a/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx +++ b/frontend/src/components/deployments/create-api-deployment-from-prompt-studio/CreateApiDeploymentFromPromptStudio.jsx @@ -313,16 +313,12 @@ const CreateApiDeploymentFromPromptStudio = ({ return; } - try { - setPostHogCustomEvent("intent_create_api_deployment_from_prompt_studio", { - info: "Creating API deployment from prompt studio", - tool_id: toolDetails?.tool_id, - tool_name: toolDetails?.tool_name, - deployment_name: deploymentDetails.api_name, - }); - } catch (_err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent("intent_create_api_deployment_from_prompt_studio", { + info: "Creating API deployment from prompt studio", + tool_id: toolDetails?.tool_id, + tool_name: toolDetails?.tool_name, + deployment_name: deploymentDetails.api_name, + }); setIsLoading(true); setBackendErrors(null); // Clear any previous errors diff --git a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx index 0c36664cb2..4dc3319572 100644 --- a/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx +++ b/frontend/src/components/input-output/configure-ds/ConfigureDs.jsx @@ -203,13 +203,9 @@ function ConfigureDs({ }; url = getUrl("test_adapters/"); - try { - setPostHogCustomEvent(posthogTcEventText[type], { - info: `Test connection was triggered: ${selectedSourceName}`, - }); - } catch (_err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent(posthogTcEventText[type], { + info: `Test connection was triggered: ${selectedSourceName}`, + }); } if (oAuthProvider?.length > 0 && isOAuthMethodSelected()) { @@ -282,16 +278,12 @@ function ConfigureDs({ url = getUrl("connector/"); - try { - const eventKey = `${type.toUpperCase()}`; - if (posthogConnectorAddedEventText[eventKey]) { - setPostHogCustomEvent(posthogConnectorAddedEventText[eventKey], { - info: `Clicked on 'Submit' button`, - connector_name: selectedSourceName, - }); - } - } catch (_err) { - // If an error occurs while setting custom posthog event, ignore it and continue + const eventKey = `${type.toUpperCase()}`; + if (posthogConnectorAddedEventText[eventKey]) { + setPostHogCustomEvent(posthogConnectorAddedEventText[eventKey], { + info: `Clicked on 'Submit' button`, + connector_name: selectedSourceName, + }); } } else { const adapterMetadata = { ...formData }; @@ -305,14 +297,10 @@ function ConfigureDs({ }; url = getUrl("adapter/"); - try { - setPostHogCustomEvent(posthogSubmitEventText[type], { - info: "Clicked on 'Submit' button", - adpater_name: selectedSourceName, - }); - } catch (_err) { - // If an error occurs while setting custom posthog event, ignore it and continue - } + setPostHogCustomEvent(posthogSubmitEventText[type], { + info: "Clicked on 'Submit' button", + adpater_name: selectedSourceName, + }); } let method = "POST";