Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyrit/datasets/seed_datasets/remote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
from pyrit.datasets.seed_datasets.remote.harmbench_multimodal_dataset import (
_HarmBenchMultimodalDataset,
) # noqa: F401
from pyrit.datasets.seed_datasets.remote.jailbreakv_dataset import (
_JailbreakVDataset,
) # noqa: F401
from pyrit.datasets.seed_datasets.remote.jbb_behaviors_dataset import (
_JBBBehaviorsDataset,
) # noqa: F401
Expand Down Expand Up @@ -91,6 +94,7 @@
"_ForbiddenQuestionsDataset",
"_HarmBenchDataset",
"_HarmBenchMultimodalDataset",
"_JailbreakVDataset",
"_JBBBehaviorsDataset",
"_LibrAIDoNotAnswerDataset",
"_MedSafetyBenchDataset",
Expand Down
159 changes: 159 additions & 0 deletions pyrit/datasets/seed_datasets/remote/jailbreakv_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Literal, Optional

from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
_RemoteDatasetLoader,
)
from pyrit.models import SeedDataset, SeedPrompt

logger = logging.getLogger(__name__)


class _JailbreakVDataset(_RemoteDatasetLoader):
"""
Loader for the JailBreakV-28K dataset.

This dataset contains 28,000+ jailbreak prompts across 16 safety policy categories,
designed to evaluate the robustness of multimodal large language models against
jailbreak attacks.

Reference: https://arxiv.org/abs/2404.03027
HuggingFace: https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k
License: MIT

Content Warning: This dataset contains prompts aimed at provoking harmful responses
and may contain offensive content. Users should check with their legal department
before using these prompts against production LLMs.
"""

POLICY_CATEGORIES = [
"Animal Abuse",
"Bias",
"Economic Harm",
"Fraud",
"Government Decision",
"Hate Speech",
"Health Consultation",
"Illegal Activity",
"Malware",
"Physical Harm",
"Political Sensitivity",
"Privacy Violation",
"Tailored Unlicensed Advice",
"Unethical Behavior",
"Violence",
]

def __init__(
self,
*,
source: str = "JailbreakV-28K/JailBreakV-28k",
config: Literal["JailBreakV_28K", "RedTeam_2K"] = "JailBreakV_28K",
split: Optional[str] = None,
):
"""
Initialize the JailBreakV-28K dataset loader.

Args:
source: HuggingFace dataset identifier.
Defaults to "JailbreakV-28K/JailBreakV-28k".
config: Dataset configuration to load.
"JailBreakV_28K" for jailbreak prompts (default),
"RedTeam_2K" for red team queries.
split: Dataset split to load.
For JailBreakV_28K config: "mini_JailBreakV_28K" (280 rows) or
"JailBreakV_28K" (28,300 rows). Defaults to "mini_JailBreakV_28K".
For RedTeam_2K config: "RedTeam_2K" (2,000 rows). Defaults to "RedTeam_2K".
"""
self.source = source
self.config = config

if split is not None:
self.split = split
elif config == "JailBreakV_28K":
self.split = "mini_JailBreakV_28K"
else:
self.split = "RedTeam_2K"

@property
def dataset_name(self) -> str:
"""Return the dataset name."""
return "jailbreakv_28k"

async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
"""
Fetch JailBreakV-28K dataset and return as SeedDataset.

Args:
cache: Whether to cache the fetched dataset. Defaults to True.

Returns:
SeedDataset: A SeedDataset containing jailbreak prompts with harm_categories
derived from the dataset's "policy" column.

Raises:
ValueError: If the dataset is empty after processing.
Exception: If the dataset cannot be loaded or processed.
"""
try:
logger.info(f"Loading JailBreakV-28K dataset (config={self.config}, split={self.split})")

data = await self._fetch_from_huggingface(
dataset_name=self.source,
config=self.config,
split=self.split,
cache=cache,
)

seed_prompts = []

for item in data:
if self.config == "JailBreakV_28K":
prompt_text = item.get("jailbreak_query", "").strip()
else:
prompt_text = item.get("question", "").strip()

if not prompt_text:
logger.warning("[JailBreakV-28K] Skipping item with empty prompt field")
continue

policy = item.get("policy", "")
harm_categories = [policy] if policy else []

metadata = {}
if self.config == "JailBreakV_28K":
redteam_query = item.get("redteam_query", "")
if redteam_query:
metadata["redteam_query"] = redteam_query
fmt = item.get("format", "")
if fmt:
metadata["format"] = fmt
source_from = item.get("from", "")
if source_from:
metadata["from"] = source_from

seed_prompt = SeedPrompt(
value=prompt_text,
data_type="text",
dataset_name=self.dataset_name,
harm_categories=harm_categories,
source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
authors=["JailbreakV-28K Team"],
metadata=metadata,
)

seed_prompts.append(seed_prompt)

if not seed_prompts:
raise ValueError("SeedDataset cannot be empty.")

logger.info(f"Successfully loaded {len(seed_prompts)} prompts from JailBreakV-28K dataset")

return SeedDataset(seeds=seed_prompts, dataset_name=self.dataset_name)

except Exception as e:
logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}")
raise Exception(f"Error loading JailBreakV-28K dataset: {str(e)}")
143 changes: 143 additions & 0 deletions tests/unit/datasets/test_jailbreakv_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from unittest.mock import AsyncMock, patch

import pytest

from pyrit.datasets.seed_datasets.remote.jailbreakv_dataset import (
_JailbreakVDataset,
)
from pyrit.models import SeedDataset


class TestJailbreakVDataset:
"""Unit tests for _JailbreakVDataset."""

def test_dataset_name(self):
dataset = _JailbreakVDataset()
assert dataset.dataset_name == "jailbreakv_28k"

def test_init_defaults(self):
dataset = _JailbreakVDataset()
assert dataset.source == "JailbreakV-28K/JailBreakV-28k"
assert dataset.config == "JailBreakV_28K"
assert dataset.split == "mini_JailBreakV_28K"

def test_init_redteam_config_default_split(self):
dataset = _JailbreakVDataset(config="RedTeam_2K")
assert dataset.config == "RedTeam_2K"
assert dataset.split == "RedTeam_2K"

def test_init_custom_split(self):
dataset = _JailbreakVDataset(split="JailBreakV_28K")
assert dataset.split == "JailBreakV_28K"

@pytest.mark.asyncio
async def test_fetch_dataset_jailbreakv_config(self):
mock_data = [
{
"jailbreak_query": "Test jailbreak prompt 1",
"redteam_query": "Test redteam query 1",
"policy": "Violence",
"format": "text",
"from": "source1",
},
{
"jailbreak_query": "Test jailbreak prompt 2",
"redteam_query": "Test redteam query 2",
"policy": "Fraud",
"format": "text",
"from": "source2",
},
]

dataset_loader = _JailbreakVDataset()

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
dataset = await dataset_loader.fetch_dataset(cache=False)

assert isinstance(dataset, SeedDataset)
assert dataset.dataset_name == "jailbreakv_28k"
assert len(dataset.seeds) == 2
assert dataset.seeds[0].value == "Test jailbreak prompt 1"
assert dataset.seeds[0].harm_categories == ["Violence"]
assert dataset.seeds[0].metadata["redteam_query"] == "Test redteam query 1"
assert dataset.seeds[0].metadata["format"] == "text"
assert dataset.seeds[0].metadata["from"] == "source1"
assert dataset.seeds[1].harm_categories == ["Fraud"]

@pytest.mark.asyncio
async def test_fetch_dataset_redteam_config(self):
mock_data = [
{
"question": "Test red team question",
"policy": "Hate Speech",
},
]

dataset_loader = _JailbreakVDataset(config="RedTeam_2K")

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
dataset = await dataset_loader.fetch_dataset(cache=False)

assert isinstance(dataset, SeedDataset)
assert len(dataset.seeds) == 1
assert dataset.seeds[0].value == "Test red team question"
assert dataset.seeds[0].harm_categories == ["Hate Speech"]

@pytest.mark.asyncio
async def test_fetch_dataset_skips_empty_prompts(self):
mock_data = [
{"jailbreak_query": "", "policy": "Violence"},
{"jailbreak_query": " ", "policy": "Fraud"},
{"jailbreak_query": "Valid prompt", "policy": "Malware"},
]

dataset_loader = _JailbreakVDataset()

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
dataset = await dataset_loader.fetch_dataset(cache=False)

assert len(dataset.seeds) == 1
assert dataset.seeds[0].value == "Valid prompt"

@pytest.mark.asyncio
async def test_fetch_dataset_empty_raises_error(self):
mock_data = [
{"jailbreak_query": "", "policy": "Violence"},
]

dataset_loader = _JailbreakVDataset()

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
with pytest.raises(Exception, match="Error loading JailBreakV-28K dataset"):
await dataset_loader.fetch_dataset(cache=False)

@pytest.mark.asyncio
async def test_fetch_dataset_empty_policy(self):
mock_data = [
{"jailbreak_query": "Test prompt", "policy": ""},
]

dataset_loader = _JailbreakVDataset()

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
dataset = await dataset_loader.fetch_dataset(cache=False)

assert len(dataset.seeds) == 1
assert dataset.seeds[0].harm_categories == []

@pytest.mark.asyncio
async def test_fetch_dataset_optional_metadata_missing(self):
mock_data = [
{"jailbreak_query": "Test prompt", "policy": "Violence"},
]

dataset_loader = _JailbreakVDataset()

with patch.object(dataset_loader, "_fetch_from_huggingface", new_callable=AsyncMock, return_value=mock_data):
dataset = await dataset_loader.fetch_dataset(cache=False)

assert len(dataset.seeds) == 1
assert dataset.seeds[0].metadata == {}