-
Notifications
You must be signed in to change notification settings - Fork 717
FEAT: Add partner integration tests for azure-ai-evaluation red team … #1533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
hannahwestra25
merged 8 commits into
microsoft:main
from
slister1001:partner-integration-tests
Apr 8, 2026
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
8e13830
FEAT: Add partner integration tests for azure-ai-evaluation red team …
slister1001 0a0edfb
fix: address review findings in partner integration tests
slister1001 1830719
fix: ruff-format split f-string in test_converter_contract.py
slister1001 895406c
Address PR review comments: PromptChatTarget -> PromptTarget, fix imp…
slister1001 5d411dd
Address PR review + full PyRIT coverage for azure-ai-evaluation
slister1001 0b8fc2b
Address round 2 review feedback
slister1001 f8e2bb9
Address round 3 review feedback
slister1001 281b21b
Fix ruff N817: remove CamelCase-as-acronym aliases
slister1001 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. |
19 changes: 19 additions & 0 deletions
19
tests/partner_integration/azure_ai_evaluation/test_auth_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for authentication utilities used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module uses: | ||
| - get_azure_openai_auth: Called in _utils/strategy_utils.py to authenticate | ||
| OpenAIChatTarget for tense/translation converter strategies. | ||
| """ | ||
|
|
||
| from pyrit.auth import get_azure_openai_auth | ||
|
|
||
|
|
||
| class TestAuthContract: | ||
| """Validate authentication utility availability.""" | ||
|
|
||
| def test_get_azure_openai_auth_is_callable(self): | ||
| """strategy_utils.py calls get_azure_openai_auth() for OpenAI target auth.""" | ||
| assert callable(get_azure_openai_auth) |
93 changes: 93 additions & 0 deletions
93
tests/partner_integration/azure_ai_evaluation/test_converter_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for PromptConverter interface and specific converters used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module: | ||
| - Extends PromptConverter via _DefaultConverter | ||
| - Imports 20+ specific converters in _agent/_agent_utils.py and strategy_utils.py | ||
| - Uses ConverterResult as the return type | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from pyrit.prompt_converter import ConverterResult, PromptConverter | ||
|
|
||
|
|
||
| class TestPromptConverterContract: | ||
| """Validate PromptConverter base class interface stability.""" | ||
|
|
||
| def test_prompt_converter_has_convert_async(self): | ||
| """_DefaultConverter overrides convert_async.""" | ||
| assert hasattr(PromptConverter, "convert_async") | ||
|
|
||
| def test_prompt_converter_subclassable(self): | ||
| """_DefaultConverter subclasses PromptConverter with convert_async.""" | ||
|
|
||
| class TestConverter(PromptConverter): | ||
| SUPPORTED_INPUT_TYPES = ("text",) | ||
| SUPPORTED_OUTPUT_TYPES = ("text",) | ||
|
|
||
| async def convert_async(self, *, prompt, input_type="text"): | ||
| return ConverterResult(output_text=prompt, output_type="text") | ||
|
|
||
| converter = TestConverter() | ||
| assert isinstance(converter, PromptConverter) | ||
|
|
||
|
|
||
| class TestSpecificConvertersImportable: | ||
| """Validate that all converters imported by azure-ai-evaluation are available. | ||
|
|
||
| These converters are imported in: | ||
| - _agent/_agent_utils.py (20+ converters) | ||
| - _utils/strategy_utils.py (converter instantiation) | ||
| """ | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "converter_name", | ||
| [ | ||
| "AnsiAttackConverter", | ||
| "AsciiArtConverter", | ||
| "AtbashConverter", | ||
| "Base64Converter", | ||
| "BinaryConverter", | ||
| "CaesarConverter", | ||
| "CharacterSpaceConverter", | ||
| # NOTE: _agent/_agent_utils.py imports "CharSwapGenerator" but PyRIT | ||
| # exports "CharSwapConverter". This is a naming discrepancy in the SDK; | ||
| # the canonical PyRIT name is CharSwapConverter. | ||
| "CharSwapConverter", | ||
| "DiacriticConverter", | ||
| "FlipConverter", | ||
| "LeetspeakConverter", | ||
| "MathPromptConverter", | ||
| "MorseConverter", | ||
| "ROT13Converter", | ||
| "StringJoinConverter", | ||
| "SuffixAppendConverter", | ||
| "TenseConverter", | ||
| "UnicodeConfusableConverter", | ||
| "UnicodeSubstitutionConverter", | ||
| "UrlConverter", | ||
| ], | ||
| ) | ||
| def test_converter_importable(self, converter_name): | ||
| """Each converter used by azure-ai-evaluation must be importable from pyrit.prompt_converter.""" | ||
| import pyrit.prompt_converter as pc | ||
|
|
||
| converter_class = getattr(pc, converter_name, None) | ||
| assert converter_class is not None, ( | ||
| f"{converter_name} not found in pyrit.prompt_converter — azure-ai-evaluation depends on this converter" | ||
| ) | ||
|
|
||
| def test_ascii_smuggler_converter_importable(self): | ||
| """AsciiSmugglerConverter is imported in _agent/_agent_utils.py.""" | ||
| from pyrit.prompt_converter import AsciiSmugglerConverter | ||
|
|
||
| assert AsciiSmugglerConverter is not None | ||
|
|
||
| def test_llm_generic_text_converter_importable(self): | ||
| """LLMGenericTextConverter is used for tense/translation strategies.""" | ||
| from pyrit.prompt_converter import LLMGenericTextConverter | ||
|
|
||
| assert LLMGenericTextConverter is not None |
65 changes: 65 additions & 0 deletions
65
tests/partner_integration/azure_ai_evaluation/test_exceptions_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for PyRIT exception types and retry decorators used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module uses these in: | ||
| - _callback_chat_target.py: EmptyResponseException, RateLimitException, pyrit_target_retry | ||
| - _rai_service_target.py: remove_markdown_json | ||
| """ | ||
|
|
||
| from pyrit.exceptions import ( | ||
| EmptyResponseException, | ||
| RateLimitException, | ||
| pyrit_target_retry, | ||
| remove_markdown_json, | ||
| ) | ||
|
|
||
|
|
||
| class TestExceptionTypesContract: | ||
| """Validate exception types exist and are proper Exception subclasses.""" | ||
|
|
||
| def test_empty_response_exception_is_exception(self): | ||
| """_CallbackChatTarget catches EmptyResponseException.""" | ||
| assert issubclass(EmptyResponseException, Exception) | ||
|
|
||
| def test_rate_limit_exception_is_exception(self): | ||
| """_CallbackChatTarget catches RateLimitException.""" | ||
| assert issubclass(RateLimitException, Exception) | ||
|
|
||
| def test_empty_response_exception_instantiable(self): | ||
| """Verify EmptyResponseException can be raised with a message.""" | ||
| exc = EmptyResponseException() | ||
| assert isinstance(exc, Exception) | ||
|
|
||
| def test_rate_limit_exception_instantiable(self): | ||
| """Verify RateLimitException can be raised with a message.""" | ||
| exc = RateLimitException() | ||
| assert isinstance(exc, Exception) | ||
|
|
||
|
|
||
| class TestRetryDecoratorContract: | ||
| """Validate retry decorator availability.""" | ||
|
|
||
| def test_pyrit_target_retry_is_callable(self): | ||
| """_CallbackChatTarget uses @pyrit_target_retry decorator.""" | ||
| assert callable(pyrit_target_retry) | ||
|
|
||
|
|
||
| class TestUtilityFunctionsContract: | ||
| """Validate utility functions used by azure-ai-evaluation.""" | ||
|
|
||
| def test_remove_markdown_json_is_callable(self): | ||
| """_rai_service_target.py uses remove_markdown_json.""" | ||
| assert callable(remove_markdown_json) | ||
|
|
||
| def test_remove_markdown_json_handles_plain_text(self): | ||
| """Verify remove_markdown_json passes through plain text.""" | ||
| result = remove_markdown_json("plain text") | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_remove_markdown_json_strips_markdown_fences(self): | ||
| """Verify remove_markdown_json strips ```json fences.""" | ||
| input_text = '```json\n{"key": "value"}\n```' | ||
| result = remove_markdown_json(input_text) | ||
| assert "```" not in result |
76 changes: 76 additions & 0 deletions
76
tests/partner_integration/azure_ai_evaluation/test_foundry_scenario_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for Foundry scenario APIs used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module uses the scenario framework for attack execution: | ||
| - FoundryExecutionManager creates FoundryScenario instances per risk category | ||
| - StrategyMapper maps AttackStrategy enum → FoundryStrategy | ||
| - DatasetConfigurationBuilder produces DatasetConfiguration from RAI objectives | ||
| - ScenarioOrchestrator processes ScenarioResult and AttackResult | ||
| - RAIServiceScorer uses AttackScoringConfig for scoring configuration | ||
| """ | ||
|
|
||
| from pyrit.executor.attack import AttackScoringConfig | ||
| from pyrit.scenario import ScenarioStrategy | ||
| from pyrit.scenario.foundry import FoundryStrategy | ||
|
|
||
|
|
||
| class TestRedTeamStrategyContract: | ||
| """Validate FoundryStrategy availability and structure.""" | ||
|
|
||
| def test_foundry_strategy_is_scenario_strategy(self): | ||
| """FoundryStrategy should extend ScenarioStrategy.""" | ||
| assert issubclass(FoundryStrategy, ScenarioStrategy) | ||
|
|
||
|
|
||
hannahwestra25 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| class TestRedTeamScenarioContract: | ||
| """Validate FoundryScenario importability.""" | ||
|
|
||
| def test_foundry_scenario_importable(self): | ||
| """ScenarioOrchestrator creates FoundryScenario instances.""" | ||
| from pyrit.scenario.foundry import FoundryScenario # noqa: F811 | ||
|
|
||
| assert FoundryScenario is not None | ||
|
|
||
|
|
||
| class TestDatasetConfigurationContract: | ||
| """Validate DatasetConfiguration importability.""" | ||
|
|
||
| def test_dataset_configuration_importable(self): | ||
| """DatasetConfigurationBuilder produces DatasetConfiguration.""" | ||
| from pyrit.scenario import DatasetConfiguration # noqa: F811 | ||
|
|
||
| assert DatasetConfiguration is not None | ||
|
|
||
|
|
||
| class TestAttackScoringConfigContract: | ||
| """Validate AttackScoringConfig availability.""" | ||
|
|
||
| def test_attack_scoring_config_has_expected_fields(self): | ||
| """AttackScoringConfig should accept objective_scorer and refusal_scorer.""" | ||
| config = AttackScoringConfig() | ||
| assert hasattr(config, "objective_scorer") | ||
| assert hasattr(config, "refusal_scorer") | ||
|
|
||
|
|
||
| class TestScenarioResultContract: | ||
| """Validate ScenarioResult and AttackResult importability.""" | ||
|
|
||
| def test_scenario_result_importable(self): | ||
| """ScenarioOrchestrator reads ScenarioResult.""" | ||
| from pyrit.models.scenario_result import ScenarioResult # noqa: F811 | ||
|
|
||
| assert ScenarioResult is not None | ||
|
|
||
| def test_attack_result_importable(self): | ||
| """FoundryResultProcessor processes AttackResult.""" | ||
| from pyrit.models import AttackResult | ||
|
|
||
| assert AttackResult is not None | ||
|
|
||
| def test_attack_outcome_importable(self): | ||
| """FoundryResultProcessor checks AttackOutcome values.""" | ||
| from pyrit.models import AttackOutcome | ||
|
|
||
| assert AttackOutcome is not None | ||
113 changes: 113 additions & 0 deletions
113
tests/partner_integration/azure_ai_evaluation/test_import_smoke.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Import smoke tests for azure-ai-evaluation red team module integration. | ||
|
|
||
| These tests verify that the azure-ai-evaluation red team module can be imported | ||
| and that its PyRIT subclasses correctly extend PyRIT base classes. | ||
|
|
||
| Tests are SKIPPED if azure-ai-evaluation[redteam] is not installed. | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from pyrit.prompt_target import PromptTarget | ||
| from pyrit.score.true_false.true_false_scorer import TrueFalseScorer | ||
|
|
||
|
|
||
| def _azure_ai_evaluation_available() -> bool: | ||
| """Check if azure-ai-evaluation[redteam] is installed.""" | ||
| try: | ||
| from azure.ai.evaluation.red_team import RedTeam # noqa: F401 | ||
|
|
||
| return True | ||
| except ImportError: | ||
| return False | ||
|
|
||
|
|
||
| requires_azure_ai_evaluation = pytest.mark.skipif( | ||
| not _azure_ai_evaluation_available(), | ||
| reason="azure-ai-evaluation[redteam] is not installed", | ||
| ) | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestRedTeamModuleImports: | ||
| """Verify azure-ai-evaluation red_team module imports succeed with current PyRIT.""" | ||
|
|
||
| def test_redteam_public_api_imports(self): | ||
| """Verify all public classes from azure.ai.evaluation.red_team are importable.""" | ||
| from azure.ai.evaluation.red_team import ( | ||
| AttackStrategy, | ||
| RedTeam, | ||
| RedTeamResult, | ||
| RiskCategory, | ||
| SupportedLanguages, | ||
| ) | ||
|
|
||
| assert RedTeam is not None | ||
| assert AttackStrategy is not None | ||
| assert RiskCategory is not None | ||
| assert RedTeamResult is not None | ||
| assert SupportedLanguages is not None | ||
|
|
||
|
|
||
| class TestPromptChatTargetTransitionalCompat: | ||
| """Verify PromptChatTarget still exists and extends PromptTarget. | ||
|
|
||
| The SDK currently imports PromptChatTarget in 6+ production files | ||
| (_callback_chat_target.py, _orchestrator_manager.py, _scenario_orchestrator.py, | ||
| _execution_manager.py, strategy_utils.py, _rai_service_target.py). PyRIT is | ||
| migrating from PromptChatTarget to PromptTarget, but during the transition | ||
| both must exist with correct inheritance. | ||
| """ | ||
|
|
||
| def test_prompt_chat_target_exists(self): | ||
| """PromptChatTarget must remain importable during the transition.""" | ||
| from pyrit.prompt_target import PromptChatTarget | ||
|
|
||
| assert PromptChatTarget is not None | ||
|
|
||
| def test_prompt_chat_target_extends_prompt_target(self): | ||
| """PromptChatTarget must be a subclass of PromptTarget.""" | ||
| from pyrit.prompt_target import PromptChatTarget | ||
|
|
||
| assert issubclass(PromptChatTarget, PromptTarget) | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestCallbackChatTargetInheritance: | ||
| """Verify _CallbackChatTarget correctly extends PromptTarget. | ||
|
|
||
| NOTE: These tests intentionally import private (_-prefixed) modules from | ||
| azure-ai-evaluation. This is correct for contract testing — we need to verify | ||
| the actual subclass relationships that PyRIT API changes could break. | ||
|
|
||
| Explicit inheritance checks are REQUIRED here because: | ||
| 1. PyRIT orchestrators and scenarios detect subclasses via issubclass() at | ||
| runtime to determine capabilities (multi-turn, system prompt support, etc.) | ||
| 2. If the inheritance chain breaks, attacks silently fall back to single-turn | ||
| mode or skip system prompt injection — causing false negatives. | ||
| 3. These checks catch breaking changes that import-only tests would miss. | ||
| """ | ||
|
|
||
| def test_callback_chat_target_extends_prompt_target(self): | ||
| """_CallbackChatTarget must be a subclass of pyrit.prompt_target.PromptTarget.""" | ||
| from azure.ai.evaluation.red_team._callback_chat_target import _CallbackChatTarget | ||
|
|
||
| assert issubclass(_CallbackChatTarget, PromptTarget) | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestRAIScorerInheritance: | ||
| """Verify RAIServiceScorer correctly extends TrueFalseScorer. | ||
|
|
||
| Explicit inheritance check — see TestCallbackChatTargetInheritance docstring | ||
| for why issubclass() contract tests are necessary. | ||
| """ | ||
|
|
||
| def test_rai_scorer_extends_true_false_scorer(self): | ||
| """RAIServiceScorer must be a subclass of pyrit.score.true_false.TrueFalseScorer.""" | ||
| from azure.ai.evaluation.red_team._foundry._rai_scorer import RAIServiceScorer # private: intentional | ||
|
|
||
| assert issubclass(RAIServiceScorer, TrueFalseScorer) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.