Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,33 @@ LLMs are powerful, but their output is as good as the input you provide. LLMWhis

Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/python_client/llm_whisperer_python_client_intro/)

## A note on versions
## Client

There are two versions of the client library available in this package:
This package provides **LLMWhispererClientV2**, the client for LLMWhisperer API v2. It is required for all users on API version 2.0.0 and above.

**LLMWhispererClient**: This is the legacy version of the client library and is recommended for supporting older apps only. This version will be deprecated in the future.
Documentation is available [here](https://docs.unstract.com/llmwhisperer/).

**LLMWhispererClientV2**: This is the latest version of the client library and is recommended for all new users. It is mandatory for all users who are using LLMWhisperer API version 2.0.0 and above (All customers who have signed up after 5th November 2024).
## Running Tests

Documentation for both versions are available [here](https://docs.unstract.com/llmwhisperer/)
Install test dependencies and run all tests:

```bash
uv run --group test pytest
```

To run only unit tests (skipping integration tests):

```bash
uv run --group test pytest tests/unit tests/utils_test.py
```

To run only integration tests:

```bash
uv run --group test pytest tests/integration
```

Integration tests require a valid API key. Copy `sample.env` to `.env` and fill in your credentials before running them.

## Questions and Feedback

Expand Down
1 change: 0 additions & 1 deletion sample.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
LLMWHISPERER_LOG_LEVEL=DEBUG
LLMWHISPERER_API_KEY=
42 changes: 42 additions & 0 deletions src/unstract/llmwhisperer/client_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,48 @@ def get_highlight_data(self, whisper_hash: str, lines: str, extract_all_lines: b
raise LLMWhispererClientException(err)
return json.loads(response.text)

def whisper_detail(self, whisper_hash: str) -> Any:
"""Retrieves the details of a text extraction process.

This method sends a GET request to the '/whisper-detail' endpoint of the LLMWhisperer API.
The response is a JSON object containing metadata about the extraction job.
Refer to https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_detail_api

Args:
whisper_hash (str): The identifier returned when starting the extraction process.

Returns:
Dict[Any, Any]: A dictionary containing the extraction details including
completed_at, mode, processed_pages, processing_started_at,
processing_time_in_seconds, requested_pages, tag, total_pages,
upload_file_size_in_kb, and whisper_hash.

Raises:
LLMWhispererClientException: If the API request fails, it raises an exception with
the error message and status code returned by the API.
"""
self.logger.debug("whisper_detail called")
url = f"{self.base_url}/whisper-detail"
params = {"whisper_hash": whisper_hash}
self.logger.debug("url: %s", url)
self.logger.debug("whisper_hash: %s", whisper_hash)

req = requests.Request("GET", url, headers=self.headers, params=params)
prepared = req.prepare()
response = self._send_request(prepared)
if response.status_code != 200:
if not (response.text or "").strip():
raise LLMWhispererClientException("API error: empty response body", response.status_code)
try:
err = json.loads(response.text)
except json.JSONDecodeError as e:
response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text
raise LLMWhispererClientException(
f"API error: non-JSON response - {response_preview}", response.status_code
) from e
raise LLMWhispererClientException(err, response.status_code)
return json.loads(response.text)

def whisper(
self,
file_path: str = "",
Expand Down
45 changes: 45 additions & 0 deletions tests/integration/client_v2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,51 @@ def test_webhook(client_v2: LLMWhispererClientV2, url: str, token: str, webhook_
assert e.error_message()["status_code"] == 404


def test_whisper_detail(client_v2: LLMWhispererClientV2, data_dir: str) -> None:
"""Test whisper_detail returns extraction metadata after a whisper operation."""
file_path = os.path.join(data_dir, "credit_card.pdf")
whisper_result = client_v2.whisper(
mode="native_text",
output_mode="text",
file_path=file_path,
wait_for_completion=True,
)
whisper_hash = whisper_result["whisper_hash"]

detail = client_v2.whisper_detail(whisper_hash)

assert isinstance(detail, dict)
assert detail["whisper_hash"] == whisper_hash
expected_keys = [
"completed_at",
"mode",
"processed_pages",
"processing_started_at",
"processing_time_in_seconds",
"requested_pages",
"tag",
"total_pages",
"upload_file_size_in_kb",
"whisper_hash",
]
assert set(expected_keys).issubset(
detail.keys()
), f"whisper_detail is missing expected keys: {set(expected_keys) - set(detail.keys())}"
assert detail["mode"] == "native_text"
assert detail["processed_pages"] > 0
assert detail["total_pages"] > 0


def test_whisper_detail_not_found(client_v2: LLMWhispererClientV2) -> None:
"""Test whisper_detail raises exception for a nonexistent whisper_hash."""
with pytest.raises(LLMWhispererClientException) as exc_info:
client_v2.whisper_detail("nonexistent_hash_12345")

error = exc_info.value.error_message()
assert exc_info.value.status_code == 400
assert "message" in error


def assert_error_message(whisper_result: dict) -> None:
assert isinstance(whisper_result, dict)
assert whisper_result["status"] == "error"
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/client_v2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,46 @@ def test_get_webhook_details(mocker: MockerFixture, client_v2: LLMWhispererClien
assert response["webhook_details"]["url"] == WEBHOOK_URL


def test_whisper_detail_success(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper_detail returns extraction details on success."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.text = (
'{"whisper_hash": "abc123", "mode": "high_quality", "processed_pages": 3,'
' "requested_pages": 3, "total_pages": 5, "upload_file_size_in_kb": 120.5,'
' "processing_time_in_seconds": 4.2, "completed_at": "2025-01-01T00:00:00Z",'
' "processing_started_at": "2025-01-01T00:00:00Z", "tag": "default"}'
)
mock_send.return_value = mock_response

response = client_v2.whisper_detail("abc123")

assert response["whisper_hash"] == "abc123"
assert response["mode"] == "high_quality"
assert response["processed_pages"] == 3
assert response["total_pages"] == 5
assert response["upload_file_size_in_kb"] == 120.5
mock_send.assert_called_once()


def test_whisper_detail_not_found(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper_detail raises exception when record is not found."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 400
mock_response.text = '{"message": "Record not found"}'
mock_send.return_value = mock_response

with pytest.raises(LLMWhispererClientException) as exc_info:
client_v2.whisper_detail("nonexistent_hash")

error = exc_info.value.error_message()
assert error["message"] == "Record not found"
assert exc_info.value.status_code == 400
mock_send.assert_called_once()


def test_whisper_json_string_response_error(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper method handles JSON string responses correctly for error
cases."""
Expand Down
Loading