diff --git a/env.example b/env.example index 33c699259c..11ba7baf96 100644 --- a/env.example +++ b/env.example @@ -187,6 +187,12 @@ TOGETHER_API_KEY=... TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=... +# Vonage +VONAGE_API_KEY=... +VONAGE_API_SECRET=... +VONAGE_SESSION_ID=1_MX43... +VONAGE_AUDIO_WS_URI=wss://... + # WhatsApp WHATSAPP_TOKEN=... WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=... diff --git a/examples/foundational/49-vonage-audio-connector-openai.py b/examples/foundational/49-vonage-audio-connector-openai.py new file mode 100644 index 0000000000..f5341090ca --- /dev/null +++ b/examples/foundational/49-vonage-audio-connector-openai.py @@ -0,0 +1,271 @@ +# SPDX-License-Identifier: BSD-2-Clause +""" +Vonage Audio connector with OpenAI. + +The example: +- Runs a Pipecat voice assistant using OpenAI STT/LLM/TTS. +- Exposes a WebSocket server using VonageAudioConnectorTransport. +- Once the server is ready, it calls the Vonage Video API "Audio Connector" + to connect an existing routed session to this WebSocket endpoint. + +Requirements: + - OpenAI API Key + - Vonage API Key + - Vonage API Secret + - Vonage Session Id + - Websocket Server WS URI (ngrok) + + Environment variables (.env file): + OPENAI_API_KEY + VONAGE_API_KEY + VONAGE_API_SECRET + VONAGE_SESSION_ID + VONAGE_AUDIO_WS_URI (e.g. wss:///ws) + +Note: + Start a Vonage Video API session (routed) in your app, and make sure + VONAGE_SESSION_ID matches that session. + +The example focuses on: +- Wiring Vonage Audio Connector → Pipecat pipeline. +- Using OpenAI for STT + LLM + TTS. +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import sys +from typing import Optional + +from dotenv import load_dotenv +from loguru import logger +from opentok import Client as OpenTokClient # Vonage Video SDK + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.serializers.vonage import VonageFrameSerializer +from pipecat.services.openai import OpenAILLMService, OpenAISTTService, OpenAITTSService +from pipecat.transports.network.websocket_server import WebsocketServerParams +from pipecat.transports.vonage.audio_connector import VonageAudioConnectorTransport + +logger.remove(0) +logger.add(sys.stderr, level="INFO") + + +SYSTEM_INSTRUCTION = ( + "You are a friendly voice assistant. " + "The user and you will talk through a phone or browser call. " + "Keep responses short (1–2 sentences) and easy to speak aloud." +) + + +async def connect_audio_connector( + *, + api_key: str, + api_secret: str, + session_id: str, + ws_uri: str, + audio_rate: int, + api_base: str, +) -> None: + """ + Call the Vonage Audio Connector "connect" API using the OpenTok SDK: + + POST /v2/project/{apiKey}/connect + { + \"sessionId\": \"...\", + \"token\": \"...\", + \"websocket\": { \"uri\": \"wss://...\", ... } + } + """ + logger.info( + "Connecting Vonage Audio Connector to WebSocket: " + f"session_id={session_id}, ws_uri={ws_uri}, audioRate={audio_rate}" + ) + + # The OpenTok SDK is synchronous, so run it in a thread. + def _call_connect() -> object: + try: + ot = OpenTokClient(api_key, api_secret, api_url=api_base) + except TypeError: + # Older SDKs may not accept api_url + ot = OpenTokClient(api_key, api_secret) + + token = ot.generate_token(session_id) + + ws_opts = { + "uri": ws_uri, + "audioRate": audio_rate, + "bidirectional": True, + } + + resp = ot.connect_audio_to_websocket(session_id, token, ws_opts) + logger.info(f"Audio Connector connect() response (repr): {resp!r}") + return resp + + loop = asyncio.get_running_loop() + try: + await loop.run_in_executor(None, _call_connect) + except Exception as exc: + logger.error(f"Failed to connect Vonage Audio Connector: {exc}") + raise + + +async def main() -> None: + load_dotenv() + + parser = argparse.ArgumentParser( + description="Vonage Audio Connector + OpenAI foundational example" + ) + parser.add_argument( + "--host", default=os.getenv("VONAGE_WS_HOST", "0.0.0.0"), help="WebSocket bind host" + ) + parser.add_argument( + "--port", + type=int, + default=int(os.getenv("VONAGE_WS_PORT", "8005")), + help="WebSocket bind port", + ) + args = parser.parse_args() + + host = args.host + port = args.port + + # --- OpenAI services ----------------------------------------------------- + openai_api_key = os.getenv("OPENAI_API_KEY") + if not openai_api_key: + logger.error("OPENAI_API_KEY is not set. Please set it in your .env.") + sys.exit(1) + + stt = OpenAISTTService( + api_key=openai_api_key, + model="gpt-4o-transcribe", + prompt="You will hear a human speaking conversational English.", + ) + + tts = OpenAITTSService( + api_key=openai_api_key, + voice="alloy", # any supported OpenAI voice + instructions="Ignore literal '\\n' characters when speaking.", + ) + + llm = OpenAILLMService(api_key=openai_api_key) + + messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}] + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) + + # --- Vonage / Audio Connector config ------------------------------------ + vonage_api_key = os.getenv("VONAGE_API_KEY") + vonage_api_secret = os.getenv("VONAGE_API_SECRET") + vonage_session_id = os.getenv("VONAGE_SESSION_ID") + + if not (vonage_api_key and vonage_api_secret and vonage_session_id): + logger.error( + "VONAGE_API_KEY, VONAGE_API_SECRET, and VONAGE_SESSION_ID " + "must be set in .env for this example." + ) + sys.exit(1) + + api_base = os.getenv("OPENTOK_API_URL", "https://api.opentok.com") + + # Where the Audio Connector will connect: + ws_uri = os.getenv("VONAGE_AUDIO_WS_URI") + if not ws_uri: + # Expose a public wss:// URL (e.g. ngrok or your own domain). + logger.error( + "VONAGE_AUDIO_WS_URI not set " + "please set this environment variable to a public wss://URL (e.g. ngrok)." + ) + sys.exit(1) + + audio_rate = int(os.getenv("VONAGE_AUDIO_RATE", "16000")) + + # --- Serializer & transport --------------------------------------------- + serializer = VonageFrameSerializer( + VonageFrameSerializer.InputParams( + auto_hang_up=False, + send_clear_audio_event=True, + ) + ) + + transport = VonageAudioConnectorTransport( + host=host, + port=port, + params=WebsocketServerParams( + serializer=serializer, + audio_in_enabled=True, + audio_out_enabled=True, + add_wav_header=True, + vad_analyzer=SileroVADAnalyzer(), + session_timeout=60 * 5, + ), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + context_aggregator.user(), + llm, + tts, + transport.output(), + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + audio_out_sample_rate=24_000, + enable_metrics=False, + enable_usage_metrics=False, + ), + ) + + # --- Event handlers ------------------------------------------------------ + + @transport.event_handler("on_client_connected") + async def on_client_connected(_transport, _client): + logger.info("Vonage Audio Connector WebSocket client connected.") + # Optional: send a small intro prompt to prime the LLM + messages.append( + {"role": "system", "content": "Please briefly introduce yourself to the caller."} + ) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(_transport, _client): + logger.info("Vonage Audio Connector WebSocket client disconnected.") + await task.cancel() + + @transport.event_handler("on_websocket_ready") + async def on_websocket_ready(_client): + """ + Called when the WebSocket server is ready to accept incoming connections. + + We use this to trigger the Audio Connector "connect" call from the same file, + so this foundational example remains single-file and self-contained. + """ + logger.info("WebSocket server ready – calling Audio Connector connect()") + await connect_audio_connector( + api_key=vonage_api_key, + api_secret=vonage_api_secret, + session_id=vonage_session_id, + ws_uri=ws_uri, + audio_rate=audio_rate, + api_base=api_base, + ) + + # --- Run ----------------------------------------------------------------- + runner = PipelineRunner() + logger.info(f"Starting Vonage Audio Connector example on ws://{host}:{port}") + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index f3e7350c49..2a62e797d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,7 @@ tavus=[] together = [] tracing = [ "opentelemetry-sdk>=1.33.0", "opentelemetry-api>=1.33.0", "opentelemetry-instrumentation>=0.54b0" ] ultravox = [ "transformers>=4.48.0", "vllm>=0.9.0" ] +vonage-audio-connector = [ "pipecat-ai[websockets-base]", "opentok>=3.0.0" ] webrtc = [ "aiortc>=1.13.0,<2", "opencv-python>=4.11.0.86,<5" ] websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.122.0" ] websockets-base = [ "websockets>=13.1,<16.0" ] diff --git a/src/pipecat/serializers/vonage.py b/src/pipecat/serializers/vonage.py new file mode 100644 index 0000000000..3ffee99ced --- /dev/null +++ b/src/pipecat/serializers/vonage.py @@ -0,0 +1,315 @@ +# SPDX-License-Identifier: BSD-2-Clause +"""Vonage WebSocket serializer (WAV+pydub resample, fixed-size chunking). + +Note: DTMF is intentionally not implemented because Vonage Audio Connector +does not expose DTMF events over the WebSocket protocol. +""" + +from __future__ import annotations + +import io +import json +import wave +from typing import List, Optional, Union + +from loguru import logger +from pydantic import BaseModel +from pydub import AudioSegment + +from pipecat.audio.utils import create_stream_resampler +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + Frame, + InputAudioRawFrame, + OutputAudioRawFrame, + StartFrame, + StartInterruptionFrame, +) +from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType + +# ---- Audio/timing constants -------------------------------------------------- + +AUDIO_TARGET_RATE_HZ: int = 16_000 # 16 kHz target +AUDIO_CHANNELS_MONO: int = 1 # mono +PCM16_SAMPLE_WIDTH_BYTES: int = 2 # 16-bit PCM +CHUNK_DURATION_MS: int = 20 # telephony frame +SECONDS_PER_MS: float = 1.0 / 1_000.0 +CHUNK_PERIOD_SECONDS: float = CHUNK_DURATION_MS * SECONDS_PER_MS +SLEEP_INTERVAL_PER_CHUNK: float = 0.01 + +BYTES_PER_SAMPLE_MONO: int = AUDIO_CHANNELS_MONO * PCM16_SAMPLE_WIDTH_BYTES +BYTES_PER_CHUNK: int = int(AUDIO_TARGET_RATE_HZ * CHUNK_PERIOD_SECONDS) * BYTES_PER_SAMPLE_MONO + + +class VonageFrameSerializer(FrameSerializer): + """Produces 16 kHz mono PCM chunks; resamples using WAV+pydub path.""" + + class InputParams(BaseModel): + """Configuration options for the Vonage frame serializer. + + Controls whether to send a clear-audio event and whether + to auto-hang-up on End/Cancel frames. + + Hang-up configuration: + + - api_base_url: Base URL for the OpenTok API. + Default: "https://api.opentok.com" + - project_id: OpenTok project / API key (used in the URL path). + - session_id: OpenTok session ID. + - connection_id: Connection ID of the Audio Connector WebSocket connection. + May be set at construction time *or later* via + VonageFrameSerializer.set_connection_id(). + - jwt: JWT for OpenTok, used in X-OPENTOK-AUTH header. + """ + + auto_hang_up: bool = True + send_clear_audio_event: bool = True + + api_base_url: str = "https://api.opentok.com" + project_id: Optional[str] = None + session_id: Optional[str] = None + connection_id: Optional[str] = None + jwt: Optional[str] = None + + def __init__(self, params: Optional[InputParams] = None) -> None: + """Initialize the VonageFrameSerializer. + + Args: + params: Optional configuration parameters for serialization. + """ + self._params: VonageFrameSerializer.InputParams = ( + params or VonageFrameSerializer.InputParams() + ) + self._sample_rate_hz: int = AUDIO_TARGET_RATE_HZ + self._in_resampler = create_stream_resampler() + + # Transport reads this for pacing (one sleep per chunk). + self.sleep_interval: float = SLEEP_INTERVAL_PER_CHUNK + + # Serializer-side audio format assumptions for pydub path: + self._channels: int = AUDIO_CHANNELS_MONO + self._sample_width_bytes: int = PCM16_SAMPLE_WIDTH_BYTES + + # Ensure we only attempt hang-up once + self._hangup_attempted: bool = False + + # Warn early if auto_hang_up is enabled but core config is incomplete. + # NOTE: connection_id is intentionally NOT required here, because in + # the Vonage Audio Connector flow it may only be known after + # connect_audio_to_websocket() runs. It can be set later with + # set_connection_id(). + if self._params.auto_hang_up: + missing = [ + name + for name, value in ( + ("project_id", self._params.project_id), + ("session_id", self._params.session_id), + ("jwt", self._params.jwt), + ) + if not value + ] + if missing: + logger.warning( + "VonageFrameSerializer: auto_hang_up is enabled but the following " + f"fields are not configured: {', '.join(missing)}. " + "Hang-up requests will be skipped until these are provided." + ) + + # ---- public properties / setters ---------------------------------------- + + @property + def connection_id(self) -> Optional[str]: + """Current OpenTok connection ID.""" + return self._params.connection_id + + def set_connection_id(self, connection_id: str) -> None: + """Set or update the OpenTok connection ID. + + This is useful in flows where the Audio Connector connectionId is + only known after calling /connect in separate component or script. + """ + self._params.connection_id = connection_id + logger.debug( + "VonageFrameSerializer: connection_id updated to %r", + connection_id, + ) + + @property + def type(self) -> FrameSerializerType: + """Return the serializer type (binary frames).""" + return FrameSerializerType.BINARY + + async def setup(self, frame: StartFrame) -> None: + """Prepare the serializer for a new session. + + Sets the sample rate and sleep interval for chunk pacing. + """ + self._sample_rate_hz = AUDIO_TARGET_RATE_HZ + self.sleep_interval = SLEEP_INTERVAL_PER_CHUNK + + # --- helpers -------------------------------------------------------------- + + @staticmethod + def _resample_audio_with_pydub( + data: bytes, + src_rate_hz: int, + num_channels: int, + sample_width_bytes: int, + target_rate_hz: int, + ) -> bytes: + """Resample via WAV header + pydub. + + NOTE: This assumes `data` contains a WAV header. If your pipeline disables + WAV headers, switch to a raw-PCM resampler instead. + """ + with wave.open(io.BytesIO(data), "rb") as wf: + num_frames = wf.getnframes() + pcm_data = wf.readframes(num_frames) + + segment = AudioSegment.from_raw( + io.BytesIO(pcm_data), + sample_width=sample_width_bytes, + frame_rate=src_rate_hz, + channels=num_channels, + ) + resampled = ( + segment.set_channels(num_channels) + .set_sample_width(sample_width_bytes) + .set_frame_rate(target_rate_hz) + ) + return resampled.raw_data + + @staticmethod + def _split_into_chunks(audio16: bytes) -> List[bytes]: + return [audio16[i : i + BYTES_PER_CHUNK] for i in range(0, len(audio16), BYTES_PER_CHUNK)] + + async def _hang_up_call(self) -> None: + """Hang up the call using OpenTok 'force disconnect' REST API.""" + params = self._params + + missing = [ + name + for name, value in ( + ("project_id", params.project_id), + ("session_id", params.session_id), + ("connection_id", params.connection_id), + ("jwt", params.jwt), + ) + if not value + ] + if missing: + logger.warning( + "VonageFrameSerializer: requested hang-up, but missing required " + f"OpenTok fields: {', '.join(missing)}. Skipping hang-up." + ) + return + + base_url = params.api_base_url.rstrip("/") + endpoint = ( + f"{base_url}/v2/project/{params.project_id}" + f"/session/{params.session_id}/connection/{params.connection_id}" + ) + + headers = { + "X-OPENTOK-AUTH": params.jwt, + } + + logger.info( + "VonageFrameSerializer: calling force disconnect " + f"endpoint={endpoint}, jwt_present={bool(headers.get('X-OPENTOK-AUTH'))}, " + f"connection_id={params.connection_id}" + ) + + try: + import aiohttp + + async with aiohttp.ClientSession() as session: + async with session.delete(endpoint, headers=headers) as resp: + text = await resp.text() + if 200 <= resp.status < 300: + logger.info( + "VonageFrameSerializer: successfully requested force disconnect " + f"for connection {params.connection_id} (status={resp.status})." + ) + elif resp.status == 404: + logger.debug( + "VonageFrameSerializer: connection already disconnected or not found " + f"(connection_id={params.connection_id}, status=404)." + ) + else: + logger.error( + "VonageFrameSerializer: force disconnect request failed " + f"(status={resp.status}): {text}" + ) + except Exception as exc: + logger.exception( + f"VonageFrameSerializer: error while calling OpenTok force disconnect: {exc}" + ) + + # --- API ------------------------------------------------------------------ + + async def serialize(self, frame: Frame) -> Optional[Union[str, bytes, list[bytes]]]: + """Convert a Frame into one or more serialized payloads. + + Args: + frame: The frame to serialize. + + Returns: + The serialized data as a string, bytes, or list of bytes. + """ + # --- Hang-up handling on End/Cancel ---------------------------------- + if isinstance(frame, (EndFrame, CancelFrame)): + if self._params.auto_hang_up and not self._hangup_attempted: + self._hangup_attempted = True + logger.debug("VonageFrameSerializer: End/Cancel observed, triggering hang-up.") + await self._hang_up_call() + else: + logger.debug( + "VonageFrameSerializer: End/Cancel observed; " + "auto_hang_up disabled or already attempted." + ) + # No payload needs to be sent to the WebSocket for End/Cancel. + return None + + # --- Interruption handling ------------------------------------------ + if isinstance(frame, StartInterruptionFrame) and self._params.send_clear_audio_event: + return json.dumps({"event": "clearAudio"}) + + # --- Outbound audio -------------------------------------------------- + if isinstance(frame, OutputAudioRawFrame): + audio16 = self._resample_audio_with_pydub( + data=frame.audio, + src_rate_hz=frame.sample_rate, + num_channels=self._channels, + sample_width_bytes=self._sample_width_bytes, + target_rate_hz=self._sample_rate_hz, + ) + return self._split_into_chunks(audio16) + + logger.debug(f"VonageFrameSerializer: ignoring frame type {type(frame).__name__}.") + return None + + async def deserialize(self, data: Union[str, bytes]) -> Optional[Frame]: + """Convert serialized input data into a Frame. + + Args: + data: The raw audio or frame payload. + + Returns: + The corresponding Frame instance, or None if parsing fails. + """ + # Binary = audio frame from Audio Connector (16-bit PCM, 16 kHz) + if isinstance(data, (bytes, bytearray)): + audio = await self._in_resampler.resample( + bytes(data), self._sample_rate_hz, self._sample_rate_hz + ) + return InputAudioRawFrame( + audio=audio, + num_channels=AUDIO_CHANNELS_MONO, + sample_rate=self._sample_rate_hz, + ) + + # Text messages (websocket:connected / websocket:media:update / websocket:disconnected) + logger.info("VonageFrameSerializer: ignoring non-binary inbound data.") + return None diff --git a/src/pipecat/transports/vonage/__init__.py b/src/pipecat/transports/vonage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/pipecat/transports/vonage/audio_connector.py b/src/pipecat/transports/vonage/audio_connector.py new file mode 100644 index 0000000000..e261d31fce --- /dev/null +++ b/src/pipecat/transports/vonage/audio_connector.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: BSD-2-Clause +"""Vonage WebSocket transport (chunk iterator + sleep-per-chunk pacing).""" + +from __future__ import annotations + +import asyncio +import io +import wave +from typing import Optional + +from loguru import logger + +from pipecat.frames.frames import Frame, OutputAudioRawFrame +from pipecat.transports.base_transport import BaseTransport +from pipecat.transports.network.websocket_server import ( + WebsocketServerOutputTransport, + WebsocketServerParams, + WebsocketServerTransport, +) + +# ---- Constants --------------------------------------------------------------- + +DEFAULT_WS_HOST: str = "localhost" +DEFAULT_WS_PORT: int = 8765 +PCM16_SAMPLE_WIDTH_BYTES: int = 2 # 16-bit PCM + + +class VonageAudioConnectorTransport(WebsocketServerTransport): + """WebSocket server transport that paces by sleeping once per audio chunk.""" + + def __init__( + self, + params: WebsocketServerParams, + host: str = DEFAULT_WS_HOST, + port: int = DEFAULT_WS_PORT, + input_name: Optional[str] = None, + output_name: Optional[str] = None, + ) -> None: + """Initialize the Vonage WebSocket server transport. + + Args: + params: WebSocket server parameters including serializer and audio options. + host: Host address for the WebSocket server. + port: Port number for the WebSocket server. + input_name: Optional name for the input transport. + output_name: Optional name for the output transport. + """ + super().__init__(params, host, port, input_name, output_name) + self._params = params + + def output(self) -> WebsocketServerOutputTransport: + """Return the output transport used to send data to clients.""" + if not self._output: + self._output = VonageAudioConnectorOutputTransport(self, self._params) + return self._output + + +class VonageAudioConnectorOutputTransport(WebsocketServerOutputTransport): + """Output transport that sends each serializer-produced chunk and sleeps between sends.""" + + def __init__(self, transport: BaseTransport, params: WebsocketServerParams, **kwargs) -> None: + """Initialize the Vonage WebSocket output transport. + + Args: + transport: The base transport instance to wrap. + params: WebSocket server parameters. + **kwargs: Additional keyword arguments for the base class. + """ + super().__init__(transport, params, **kwargs) + + async def write_audio_frame(self, frame: OutputAudioRawFrame) -> None: + """Write an audio frame to the WebSocket client with pacing.""" + if not self._websocket: + # Keep pipeline timing consistent if the client isn't connected yet. + await self._write_audio_sleep() + return + + normalized = OutputAudioRawFrame( + audio=frame.audio, + sample_rate=self.sample_rate, + num_channels=self._params.audio_out_channels, + ) + + if self._params.add_wav_header: + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wf: + wf.setsampwidth(PCM16_SAMPLE_WIDTH_BYTES) + wf.setnchannels(normalized.num_channels) + wf.setframerate(normalized.sample_rate) + wf.writeframes(normalized.audio) + normalized = OutputAudioRawFrame( + audio=buffer.getvalue(), + sample_rate=normalized.sample_rate, + num_channels=normalized.num_channels, + ) + + await self._write_frame(normalized) + + async def _write_frame(self, frame: Frame) -> None: + """Serialize and send a frame to the WebSocket client.""" + if not self._params.serializer: + return + + try: + payload = await self._params.serializer.serialize(frame) + if payload and self._websocket: + # For audio, serializer returns a list[bytes] of chunks. + # Pace by sleeping once per chunk using serializer's interval. + for chunk in payload: + await self._websocket.send(chunk) + await asyncio.sleep(self._params.serializer.sleep_interval) + except Exception as exc: + logger.error(f"{self} exception sending data: {exc.__class__.__name__} ({exc})") diff --git a/uv.lock b/uv.lock index 3db38f2a90..74f3a73909 100644 --- a/uv.lock +++ b/uv.lock @@ -4110,6 +4110,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/b5/cf25da2218910f0d6cdf7f876a06bed118c4969eacaf60a887cbaef44f44/opentelemetry_semantic_conventions_ai-0.4.13-py3-none-any.whl", hash = "sha256:883a30a6bb5deaec0d646912b5f9f6dcbb9f6f72557b73d0f2560bf25d13e2d5", size = 6080, upload-time = "2025-08-22T10:14:16.477Z" }, ] +[[package]] +name = "opentok" +version = "3.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyjwt", extra = ["crypto"] }, + { name = "pytz" }, + { name = "requests" }, + { name = "rsa" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/00/950720f6fe19f76ce86a070d6f43076458f4c9d982d50f16edb8dae37a8b/opentok-3.13.0.tar.gz", hash = "sha256:169e27662508a6b79f44bc60c870392e6b02b6dab618c2a319b16c4efbe80d9d", size = 62758, upload-time = "2025-09-19T18:21:44.715Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/ab/2b76b3f03ce8c4a338a495cf97352e35037c5c8467a6af4e4cfae805efc9/opentok-3.13.0-py2.py3-none-any.whl", hash = "sha256:f6f70e6b56b144bb93fa45fcbb68acc63a9c95b5d588450a20cbd9ca05c300a6", size = 41028, upload-time = "2025-09-19T18:21:43.477Z" }, +] + [[package]] name = "orjson" version = "3.11.3" @@ -4621,6 +4637,10 @@ ultravox = [ { name = "transformers" }, { name = "vllm" }, ] +vonage-audio-connector = [ + { name = "opentok" }, + { name = "websockets" }, +] webrtc = [ { name = "aiortc" }, { name = "opencv-python" }, @@ -4714,6 +4734,7 @@ requires-dist = [ { name = "opentelemetry-api", marker = "extra == 'tracing'", specifier = ">=1.33.0" }, { name = "opentelemetry-instrumentation", marker = "extra == 'tracing'", specifier = ">=0.54b0" }, { name = "opentelemetry-sdk", marker = "extra == 'tracing'", specifier = ">=1.33.0" }, + { name = "opentok", marker = "extra == 'vonage-audio-connector'", specifier = ">=3.0.0" }, { name = "ormsgpack", marker = "extra == 'fish'", specifier = "~=1.7.0" }, { name = "pillow", specifier = ">=11.1.0,<12" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'assemblyai'" }, @@ -4732,6 +4753,7 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'soniox'" }, + { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'vonage-audio-connector'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" }, { name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" }, { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=1.0.0" }, @@ -4765,7 +4787,7 @@ requires-dist = [ { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "nim", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "remote-smart-turn", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "nim", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "remote-smart-turn", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "vonage-audio-connector", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ @@ -5319,6 +5341,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, ] +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pylibsrtp" version = "0.12.0"