realtime.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. import json
  4. import logging
  5. from types import TracebackType
  6. from typing import TYPE_CHECKING, Any, Iterator, cast
  7. from typing_extensions import AsyncIterator
  8. import httpx
  9. from pydantic import BaseModel
  10. from .sessions import (
  11. Sessions,
  12. AsyncSessions,
  13. SessionsWithRawResponse,
  14. AsyncSessionsWithRawResponse,
  15. SessionsWithStreamingResponse,
  16. AsyncSessionsWithStreamingResponse,
  17. )
  18. from ...._types import NOT_GIVEN, Query, Headers, NotGiven
  19. from ...._utils import (
  20. is_azure_client,
  21. maybe_transform,
  22. strip_not_given,
  23. async_maybe_transform,
  24. is_async_azure_client,
  25. )
  26. from ...._compat import cached_property
  27. from ...._models import construct_type_unchecked
  28. from ...._resource import SyncAPIResource, AsyncAPIResource
  29. from ...._exceptions import OpenAIError
  30. from ...._base_client import _merge_mappings
  31. from ....types.beta.realtime import (
  32. session_update_event_param,
  33. response_create_event_param,
  34. transcription_session_update_param,
  35. )
  36. from .transcription_sessions import (
  37. TranscriptionSessions,
  38. AsyncTranscriptionSessions,
  39. TranscriptionSessionsWithRawResponse,
  40. AsyncTranscriptionSessionsWithRawResponse,
  41. TranscriptionSessionsWithStreamingResponse,
  42. AsyncTranscriptionSessionsWithStreamingResponse,
  43. )
  44. from ....types.websocket_connection_options import WebsocketConnectionOptions
  45. from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
  46. from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
  47. from ....types.beta.realtime.conversation_item_param import ConversationItemParam
  48. from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
  49. if TYPE_CHECKING:
  50. from websockets.sync.client import ClientConnection as WebsocketConnection
  51. from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
  52. from ...._client import OpenAI, AsyncOpenAI
  53. __all__ = ["Realtime", "AsyncRealtime"]
  54. log: logging.Logger = logging.getLogger(__name__)
  55. class Realtime(SyncAPIResource):
  56. @cached_property
  57. def sessions(self) -> Sessions:
  58. return Sessions(self._client)
  59. @cached_property
  60. def transcription_sessions(self) -> TranscriptionSessions:
  61. return TranscriptionSessions(self._client)
  62. @cached_property
  63. def with_raw_response(self) -> RealtimeWithRawResponse:
  64. """
  65. This property can be used as a prefix for any HTTP method call to return
  66. the raw response object instead of the parsed content.
  67. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  68. """
  69. return RealtimeWithRawResponse(self)
  70. @cached_property
  71. def with_streaming_response(self) -> RealtimeWithStreamingResponse:
  72. """
  73. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  74. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  75. """
  76. return RealtimeWithStreamingResponse(self)
  77. def connect(
  78. self,
  79. *,
  80. model: str,
  81. extra_query: Query = {},
  82. extra_headers: Headers = {},
  83. websocket_connection_options: WebsocketConnectionOptions = {},
  84. ) -> RealtimeConnectionManager:
  85. """
  86. The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
  87. Some notable benefits of the API include:
  88. - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
  89. - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
  90. - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
  91. The Realtime API is a stateful, event-based API that communicates over a WebSocket.
  92. """
  93. return RealtimeConnectionManager(
  94. client=self._client,
  95. extra_query=extra_query,
  96. extra_headers=extra_headers,
  97. websocket_connection_options=websocket_connection_options,
  98. model=model,
  99. )
  100. class AsyncRealtime(AsyncAPIResource):
  101. @cached_property
  102. def sessions(self) -> AsyncSessions:
  103. return AsyncSessions(self._client)
  104. @cached_property
  105. def transcription_sessions(self) -> AsyncTranscriptionSessions:
  106. return AsyncTranscriptionSessions(self._client)
  107. @cached_property
  108. def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
  109. """
  110. This property can be used as a prefix for any HTTP method call to return
  111. the raw response object instead of the parsed content.
  112. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  113. """
  114. return AsyncRealtimeWithRawResponse(self)
  115. @cached_property
  116. def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
  117. """
  118. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  119. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  120. """
  121. return AsyncRealtimeWithStreamingResponse(self)
  122. def connect(
  123. self,
  124. *,
  125. model: str,
  126. extra_query: Query = {},
  127. extra_headers: Headers = {},
  128. websocket_connection_options: WebsocketConnectionOptions = {},
  129. ) -> AsyncRealtimeConnectionManager:
  130. """
  131. The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
  132. Some notable benefits of the API include:
  133. - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
  134. - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
  135. - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
  136. The Realtime API is a stateful, event-based API that communicates over a WebSocket.
  137. """
  138. return AsyncRealtimeConnectionManager(
  139. client=self._client,
  140. extra_query=extra_query,
  141. extra_headers=extra_headers,
  142. websocket_connection_options=websocket_connection_options,
  143. model=model,
  144. )
  145. class RealtimeWithRawResponse:
  146. def __init__(self, realtime: Realtime) -> None:
  147. self._realtime = realtime
  148. @cached_property
  149. def sessions(self) -> SessionsWithRawResponse:
  150. return SessionsWithRawResponse(self._realtime.sessions)
  151. @cached_property
  152. def transcription_sessions(self) -> TranscriptionSessionsWithRawResponse:
  153. return TranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
  154. class AsyncRealtimeWithRawResponse:
  155. def __init__(self, realtime: AsyncRealtime) -> None:
  156. self._realtime = realtime
  157. @cached_property
  158. def sessions(self) -> AsyncSessionsWithRawResponse:
  159. return AsyncSessionsWithRawResponse(self._realtime.sessions)
  160. @cached_property
  161. def transcription_sessions(self) -> AsyncTranscriptionSessionsWithRawResponse:
  162. return AsyncTranscriptionSessionsWithRawResponse(self._realtime.transcription_sessions)
  163. class RealtimeWithStreamingResponse:
  164. def __init__(self, realtime: Realtime) -> None:
  165. self._realtime = realtime
  166. @cached_property
  167. def sessions(self) -> SessionsWithStreamingResponse:
  168. return SessionsWithStreamingResponse(self._realtime.sessions)
  169. @cached_property
  170. def transcription_sessions(self) -> TranscriptionSessionsWithStreamingResponse:
  171. return TranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
  172. class AsyncRealtimeWithStreamingResponse:
  173. def __init__(self, realtime: AsyncRealtime) -> None:
  174. self._realtime = realtime
  175. @cached_property
  176. def sessions(self) -> AsyncSessionsWithStreamingResponse:
  177. return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
  178. @cached_property
  179. def transcription_sessions(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
  180. return AsyncTranscriptionSessionsWithStreamingResponse(self._realtime.transcription_sessions)
  181. class AsyncRealtimeConnection:
  182. """Represents a live websocket connection to the Realtime API"""
  183. session: AsyncRealtimeSessionResource
  184. response: AsyncRealtimeResponseResource
  185. input_audio_buffer: AsyncRealtimeInputAudioBufferResource
  186. conversation: AsyncRealtimeConversationResource
  187. output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
  188. transcription_session: AsyncRealtimeTranscriptionSessionResource
  189. _connection: AsyncWebsocketConnection
  190. def __init__(self, connection: AsyncWebsocketConnection) -> None:
  191. self._connection = connection
  192. self.session = AsyncRealtimeSessionResource(self)
  193. self.response = AsyncRealtimeResponseResource(self)
  194. self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
  195. self.conversation = AsyncRealtimeConversationResource(self)
  196. self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
  197. self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
  198. async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
  199. """
  200. An infinite-iterator that will continue to yield events until
  201. the connection is closed.
  202. """
  203. from websockets.exceptions import ConnectionClosedOK
  204. try:
  205. while True:
  206. yield await self.recv()
  207. except ConnectionClosedOK:
  208. return
  209. async def recv(self) -> RealtimeServerEvent:
  210. """
  211. Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
  212. Canceling this method is safe. There's no risk of losing data.
  213. """
  214. return self.parse_event(await self.recv_bytes())
  215. async def recv_bytes(self) -> bytes:
  216. """Receive the next message from the connection as raw bytes.
  217. Canceling this method is safe. There's no risk of losing data.
  218. If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
  219. then you can call `.parse_event(data)`.
  220. """
  221. message = await self._connection.recv(decode=False)
  222. log.debug(f"Received websocket message: %s", message)
  223. return message
  224. async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
  225. data = (
  226. event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
  227. if isinstance(event, BaseModel)
  228. else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
  229. )
  230. await self._connection.send(data)
  231. async def close(self, *, code: int = 1000, reason: str = "") -> None:
  232. await self._connection.close(code=code, reason=reason)
  233. def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
  234. """
  235. Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
  236. This is helpful if you're using `.recv_bytes()`.
  237. """
  238. return cast(
  239. RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
  240. )
  241. class AsyncRealtimeConnectionManager:
  242. """
  243. Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
  244. This context manager ensures that the connection will be closed when it exits.
  245. ---
  246. Note that if your application doesn't work well with the context manager approach then you
  247. can call the `.enter()` method directly to initiate a connection.
  248. **Warning**: You must remember to close the connection with `.close()`.
  249. ```py
  250. connection = await client.beta.realtime.connect(...).enter()
  251. # ...
  252. await connection.close()
  253. ```
  254. """
  255. def __init__(
  256. self,
  257. *,
  258. client: AsyncOpenAI,
  259. model: str,
  260. extra_query: Query,
  261. extra_headers: Headers,
  262. websocket_connection_options: WebsocketConnectionOptions,
  263. ) -> None:
  264. self.__client = client
  265. self.__model = model
  266. self.__connection: AsyncRealtimeConnection | None = None
  267. self.__extra_query = extra_query
  268. self.__extra_headers = extra_headers
  269. self.__websocket_connection_options = websocket_connection_options
  270. async def __aenter__(self) -> AsyncRealtimeConnection:
  271. """
  272. 👋 If your application doesn't work well with the context manager approach then you
  273. can call this method directly to initiate a connection.
  274. **Warning**: You must remember to close the connection with `.close()`.
  275. ```py
  276. connection = await client.beta.realtime.connect(...).enter()
  277. # ...
  278. await connection.close()
  279. ```
  280. """
  281. try:
  282. from websockets.asyncio.client import connect
  283. except ImportError as exc:
  284. raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
  285. extra_query = self.__extra_query
  286. await self.__client._refresh_api_key()
  287. auth_headers = self.__client.auth_headers
  288. if is_async_azure_client(self.__client):
  289. url, auth_headers = await self.__client._configure_realtime(self.__model, extra_query)
  290. else:
  291. url = self._prepare_url().copy_with(
  292. params={
  293. **self.__client.base_url.params,
  294. "model": self.__model,
  295. **extra_query,
  296. },
  297. )
  298. log.debug("Connecting to %s", url)
  299. if self.__websocket_connection_options:
  300. log.debug("Connection options: %s", self.__websocket_connection_options)
  301. self.__connection = AsyncRealtimeConnection(
  302. await connect(
  303. str(url),
  304. user_agent_header=self.__client.user_agent,
  305. additional_headers=_merge_mappings(
  306. {
  307. **auth_headers,
  308. "OpenAI-Beta": "realtime=v1",
  309. },
  310. self.__extra_headers,
  311. ),
  312. **self.__websocket_connection_options,
  313. )
  314. )
  315. return self.__connection
  316. enter = __aenter__
  317. def _prepare_url(self) -> httpx.URL:
  318. if self.__client.websocket_base_url is not None:
  319. base_url = httpx.URL(self.__client.websocket_base_url)
  320. else:
  321. base_url = self.__client._base_url.copy_with(scheme="wss")
  322. merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
  323. return base_url.copy_with(raw_path=merge_raw_path)
  324. async def __aexit__(
  325. self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
  326. ) -> None:
  327. if self.__connection is not None:
  328. await self.__connection.close()
  329. class RealtimeConnection:
  330. """Represents a live websocket connection to the Realtime API"""
  331. session: RealtimeSessionResource
  332. response: RealtimeResponseResource
  333. input_audio_buffer: RealtimeInputAudioBufferResource
  334. conversation: RealtimeConversationResource
  335. output_audio_buffer: RealtimeOutputAudioBufferResource
  336. transcription_session: RealtimeTranscriptionSessionResource
  337. _connection: WebsocketConnection
  338. def __init__(self, connection: WebsocketConnection) -> None:
  339. self._connection = connection
  340. self.session = RealtimeSessionResource(self)
  341. self.response = RealtimeResponseResource(self)
  342. self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
  343. self.conversation = RealtimeConversationResource(self)
  344. self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
  345. self.transcription_session = RealtimeTranscriptionSessionResource(self)
  346. def __iter__(self) -> Iterator[RealtimeServerEvent]:
  347. """
  348. An infinite-iterator that will continue to yield events until
  349. the connection is closed.
  350. """
  351. from websockets.exceptions import ConnectionClosedOK
  352. try:
  353. while True:
  354. yield self.recv()
  355. except ConnectionClosedOK:
  356. return
  357. def recv(self) -> RealtimeServerEvent:
  358. """
  359. Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
  360. Canceling this method is safe. There's no risk of losing data.
  361. """
  362. return self.parse_event(self.recv_bytes())
  363. def recv_bytes(self) -> bytes:
  364. """Receive the next message from the connection as raw bytes.
  365. Canceling this method is safe. There's no risk of losing data.
  366. If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
  367. then you can call `.parse_event(data)`.
  368. """
  369. message = self._connection.recv(decode=False)
  370. log.debug(f"Received websocket message: %s", message)
  371. return message
  372. def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
  373. data = (
  374. event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
  375. if isinstance(event, BaseModel)
  376. else json.dumps(maybe_transform(event, RealtimeClientEventParam))
  377. )
  378. self._connection.send(data)
  379. def close(self, *, code: int = 1000, reason: str = "") -> None:
  380. self._connection.close(code=code, reason=reason)
  381. def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
  382. """
  383. Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
  384. This is helpful if you're using `.recv_bytes()`.
  385. """
  386. return cast(
  387. RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
  388. )
  389. class RealtimeConnectionManager:
  390. """
  391. Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
  392. This context manager ensures that the connection will be closed when it exits.
  393. ---
  394. Note that if your application doesn't work well with the context manager approach then you
  395. can call the `.enter()` method directly to initiate a connection.
  396. **Warning**: You must remember to close the connection with `.close()`.
  397. ```py
  398. connection = client.beta.realtime.connect(...).enter()
  399. # ...
  400. connection.close()
  401. ```
  402. """
  403. def __init__(
  404. self,
  405. *,
  406. client: OpenAI,
  407. model: str,
  408. extra_query: Query,
  409. extra_headers: Headers,
  410. websocket_connection_options: WebsocketConnectionOptions,
  411. ) -> None:
  412. self.__client = client
  413. self.__model = model
  414. self.__connection: RealtimeConnection | None = None
  415. self.__extra_query = extra_query
  416. self.__extra_headers = extra_headers
  417. self.__websocket_connection_options = websocket_connection_options
  418. def __enter__(self) -> RealtimeConnection:
  419. """
  420. 👋 If your application doesn't work well with the context manager approach then you
  421. can call this method directly to initiate a connection.
  422. **Warning**: You must remember to close the connection with `.close()`.
  423. ```py
  424. connection = client.beta.realtime.connect(...).enter()
  425. # ...
  426. connection.close()
  427. ```
  428. """
  429. try:
  430. from websockets.sync.client import connect
  431. except ImportError as exc:
  432. raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
  433. extra_query = self.__extra_query
  434. self.__client._refresh_api_key()
  435. auth_headers = self.__client.auth_headers
  436. if is_azure_client(self.__client):
  437. url, auth_headers = self.__client._configure_realtime(self.__model, extra_query)
  438. else:
  439. url = self._prepare_url().copy_with(
  440. params={
  441. **self.__client.base_url.params,
  442. "model": self.__model,
  443. **extra_query,
  444. },
  445. )
  446. log.debug("Connecting to %s", url)
  447. if self.__websocket_connection_options:
  448. log.debug("Connection options: %s", self.__websocket_connection_options)
  449. self.__connection = RealtimeConnection(
  450. connect(
  451. str(url),
  452. user_agent_header=self.__client.user_agent,
  453. additional_headers=_merge_mappings(
  454. {
  455. **auth_headers,
  456. "OpenAI-Beta": "realtime=v1",
  457. },
  458. self.__extra_headers,
  459. ),
  460. **self.__websocket_connection_options,
  461. )
  462. )
  463. return self.__connection
  464. enter = __enter__
  465. def _prepare_url(self) -> httpx.URL:
  466. if self.__client.websocket_base_url is not None:
  467. base_url = httpx.URL(self.__client.websocket_base_url)
  468. else:
  469. base_url = self.__client._base_url.copy_with(scheme="wss")
  470. merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
  471. return base_url.copy_with(raw_path=merge_raw_path)
  472. def __exit__(
  473. self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
  474. ) -> None:
  475. if self.__connection is not None:
  476. self.__connection.close()
  477. class BaseRealtimeConnectionResource:
  478. def __init__(self, connection: RealtimeConnection) -> None:
  479. self._connection = connection
  480. class RealtimeSessionResource(BaseRealtimeConnectionResource):
  481. def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
  482. """
  483. Send this event to update the session’s default configuration.
  484. The client may send this event at any time to update any field,
  485. except for `voice`. However, note that once a session has been
  486. initialized with a particular `model`, it can’t be changed to
  487. another model using `session.update`.
  488. When the server receives a `session.update`, it will respond
  489. with a `session.updated` event showing the full, effective configuration.
  490. Only the fields that are present are updated. To clear a field like
  491. `instructions`, pass an empty string.
  492. """
  493. self._connection.send(
  494. cast(
  495. RealtimeClientEventParam,
  496. strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
  497. )
  498. )
  499. class RealtimeResponseResource(BaseRealtimeConnectionResource):
  500. def create(
  501. self,
  502. *,
  503. event_id: str | NotGiven = NOT_GIVEN,
  504. response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
  505. ) -> None:
  506. """
  507. This event instructs the server to create a Response, which means triggering
  508. model inference. When in Server VAD mode, the server will create Responses
  509. automatically.
  510. A Response will include at least one Item, and may have two, in which case
  511. the second will be a function call. These Items will be appended to the
  512. conversation history.
  513. The server will respond with a `response.created` event, events for Items
  514. and content created, and finally a `response.done` event to indicate the
  515. Response is complete.
  516. The `response.create` event includes inference configuration like
  517. `instructions`, and `temperature`. These fields will override the Session's
  518. configuration for this Response only.
  519. """
  520. self._connection.send(
  521. cast(
  522. RealtimeClientEventParam,
  523. strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
  524. )
  525. )
  526. def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
  527. """Send this event to cancel an in-progress response.
  528. The server will respond
  529. with a `response.done` event with a status of `response.status=cancelled`. If
  530. there is no response to cancel, the server will respond with an error.
  531. """
  532. self._connection.send(
  533. cast(
  534. RealtimeClientEventParam,
  535. strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
  536. )
  537. )
  538. class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
  539. def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  540. """Send this event to clear the audio bytes in the buffer.
  541. The server will
  542. respond with an `input_audio_buffer.cleared` event.
  543. """
  544. self._connection.send(
  545. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
  546. )
  547. def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  548. """
  549. Send this event to commit the user input audio buffer, which will create a
  550. new user message item in the conversation. This event will produce an error
  551. if the input audio buffer is empty. When in Server VAD mode, the client does
  552. not need to send this event, the server will commit the audio buffer
  553. automatically.
  554. Committing the input audio buffer will trigger input audio transcription
  555. (if enabled in session configuration), but it will not create a response
  556. from the model. The server will respond with an `input_audio_buffer.committed`
  557. event.
  558. """
  559. self._connection.send(
  560. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
  561. )
  562. def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  563. """Send this event to append audio bytes to the input audio buffer.
  564. The audio
  565. buffer is temporary storage you can write to and later commit. In Server VAD
  566. mode, the audio buffer is used to detect speech and the server will decide
  567. when to commit. When Server VAD is disabled, you must commit the audio buffer
  568. manually.
  569. The client may choose how much audio to place in each event up to a maximum
  570. of 15 MiB, for example streaming smaller chunks from the client may allow the
  571. VAD to be more responsive. Unlike made other client events, the server will
  572. not send a confirmation response to this event.
  573. """
  574. self._connection.send(
  575. cast(
  576. RealtimeClientEventParam,
  577. strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
  578. )
  579. )
  580. class RealtimeConversationResource(BaseRealtimeConnectionResource):
  581. @cached_property
  582. def item(self) -> RealtimeConversationItemResource:
  583. return RealtimeConversationItemResource(self._connection)
  584. class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
  585. def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  586. """Send this event when you want to remove any item from the conversation
  587. history.
  588. The server will respond with a `conversation.item.deleted` event,
  589. unless the item does not exist in the conversation history, in which case the
  590. server will respond with an error.
  591. """
  592. self._connection.send(
  593. cast(
  594. RealtimeClientEventParam,
  595. strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
  596. )
  597. )
  598. def create(
  599. self,
  600. *,
  601. item: ConversationItemParam,
  602. event_id: str | NotGiven = NOT_GIVEN,
  603. previous_item_id: str | NotGiven = NOT_GIVEN,
  604. ) -> None:
  605. """
  606. Add a new Item to the Conversation's context, including messages, function
  607. calls, and function call responses. This event can be used both to populate a
  608. "history" of the conversation and to add new items mid-stream, but has the
  609. current limitation that it cannot populate assistant audio messages.
  610. If successful, the server will respond with a `conversation.item.created`
  611. event, otherwise an `error` event will be sent.
  612. """
  613. self._connection.send(
  614. cast(
  615. RealtimeClientEventParam,
  616. strip_not_given(
  617. {
  618. "type": "conversation.item.create",
  619. "item": item,
  620. "event_id": event_id,
  621. "previous_item_id": previous_item_id,
  622. }
  623. ),
  624. )
  625. )
  626. def truncate(
  627. self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
  628. ) -> None:
  629. """Send this event to truncate a previous assistant message’s audio.
  630. The server
  631. will produce audio faster than realtime, so this event is useful when the user
  632. interrupts to truncate audio that has already been sent to the client but not
  633. yet played. This will synchronize the server's understanding of the audio with
  634. the client's playback.
  635. Truncating audio will delete the server-side text transcript to ensure there
  636. is not text in the context that hasn't been heard by the user.
  637. If successful, the server will respond with a `conversation.item.truncated`
  638. event.
  639. """
  640. self._connection.send(
  641. cast(
  642. RealtimeClientEventParam,
  643. strip_not_given(
  644. {
  645. "type": "conversation.item.truncate",
  646. "audio_end_ms": audio_end_ms,
  647. "content_index": content_index,
  648. "item_id": item_id,
  649. "event_id": event_id,
  650. }
  651. ),
  652. )
  653. )
  654. def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  655. """
  656. Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
  657. The server will respond with a `conversation.item.retrieved` event,
  658. unless the item does not exist in the conversation history, in which case the
  659. server will respond with an error.
  660. """
  661. self._connection.send(
  662. cast(
  663. RealtimeClientEventParam,
  664. strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
  665. )
  666. )
  667. class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
  668. def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  669. """**WebRTC Only:** Emit to cut off the current audio response.
  670. This will trigger the server to
  671. stop generating audio and emit a `output_audio_buffer.cleared` event. This
  672. event should be preceded by a `response.cancel` client event to stop the
  673. generation of the current response.
  674. [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
  675. """
  676. self._connection.send(
  677. cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
  678. )
  679. class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
  680. def update(
  681. self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
  682. ) -> None:
  683. """Send this event to update a transcription session."""
  684. self._connection.send(
  685. cast(
  686. RealtimeClientEventParam,
  687. strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
  688. )
  689. )
  690. class BaseAsyncRealtimeConnectionResource:
  691. def __init__(self, connection: AsyncRealtimeConnection) -> None:
  692. self._connection = connection
  693. class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
  694. async def update(
  695. self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
  696. ) -> None:
  697. """
  698. Send this event to update the session’s default configuration.
  699. The client may send this event at any time to update any field,
  700. except for `voice`. However, note that once a session has been
  701. initialized with a particular `model`, it can’t be changed to
  702. another model using `session.update`.
  703. When the server receives a `session.update`, it will respond
  704. with a `session.updated` event showing the full, effective configuration.
  705. Only the fields that are present are updated. To clear a field like
  706. `instructions`, pass an empty string.
  707. """
  708. await self._connection.send(
  709. cast(
  710. RealtimeClientEventParam,
  711. strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
  712. )
  713. )
  714. class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
  715. async def create(
  716. self,
  717. *,
  718. event_id: str | NotGiven = NOT_GIVEN,
  719. response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
  720. ) -> None:
  721. """
  722. This event instructs the server to create a Response, which means triggering
  723. model inference. When in Server VAD mode, the server will create Responses
  724. automatically.
  725. A Response will include at least one Item, and may have two, in which case
  726. the second will be a function call. These Items will be appended to the
  727. conversation history.
  728. The server will respond with a `response.created` event, events for Items
  729. and content created, and finally a `response.done` event to indicate the
  730. Response is complete.
  731. The `response.create` event includes inference configuration like
  732. `instructions`, and `temperature`. These fields will override the Session's
  733. configuration for this Response only.
  734. """
  735. await self._connection.send(
  736. cast(
  737. RealtimeClientEventParam,
  738. strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
  739. )
  740. )
  741. async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
  742. """Send this event to cancel an in-progress response.
  743. The server will respond
  744. with a `response.done` event with a status of `response.status=cancelled`. If
  745. there is no response to cancel, the server will respond with an error.
  746. """
  747. await self._connection.send(
  748. cast(
  749. RealtimeClientEventParam,
  750. strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
  751. )
  752. )
  753. class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
  754. async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  755. """Send this event to clear the audio bytes in the buffer.
  756. The server will
  757. respond with an `input_audio_buffer.cleared` event.
  758. """
  759. await self._connection.send(
  760. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
  761. )
  762. async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  763. """
  764. Send this event to commit the user input audio buffer, which will create a
  765. new user message item in the conversation. This event will produce an error
  766. if the input audio buffer is empty. When in Server VAD mode, the client does
  767. not need to send this event, the server will commit the audio buffer
  768. automatically.
  769. Committing the input audio buffer will trigger input audio transcription
  770. (if enabled in session configuration), but it will not create a response
  771. from the model. The server will respond with an `input_audio_buffer.committed`
  772. event.
  773. """
  774. await self._connection.send(
  775. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
  776. )
  777. async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  778. """Send this event to append audio bytes to the input audio buffer.
  779. The audio
  780. buffer is temporary storage you can write to and later commit. In Server VAD
  781. mode, the audio buffer is used to detect speech and the server will decide
  782. when to commit. When Server VAD is disabled, you must commit the audio buffer
  783. manually.
  784. The client may choose how much audio to place in each event up to a maximum
  785. of 15 MiB, for example streaming smaller chunks from the client may allow the
  786. VAD to be more responsive. Unlike made other client events, the server will
  787. not send a confirmation response to this event.
  788. """
  789. await self._connection.send(
  790. cast(
  791. RealtimeClientEventParam,
  792. strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
  793. )
  794. )
  795. class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
  796. @cached_property
  797. def item(self) -> AsyncRealtimeConversationItemResource:
  798. return AsyncRealtimeConversationItemResource(self._connection)
  799. class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
  800. async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  801. """Send this event when you want to remove any item from the conversation
  802. history.
  803. The server will respond with a `conversation.item.deleted` event,
  804. unless the item does not exist in the conversation history, in which case the
  805. server will respond with an error.
  806. """
  807. await self._connection.send(
  808. cast(
  809. RealtimeClientEventParam,
  810. strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
  811. )
  812. )
  813. async def create(
  814. self,
  815. *,
  816. item: ConversationItemParam,
  817. event_id: str | NotGiven = NOT_GIVEN,
  818. previous_item_id: str | NotGiven = NOT_GIVEN,
  819. ) -> None:
  820. """
  821. Add a new Item to the Conversation's context, including messages, function
  822. calls, and function call responses. This event can be used both to populate a
  823. "history" of the conversation and to add new items mid-stream, but has the
  824. current limitation that it cannot populate assistant audio messages.
  825. If successful, the server will respond with a `conversation.item.created`
  826. event, otherwise an `error` event will be sent.
  827. """
  828. await self._connection.send(
  829. cast(
  830. RealtimeClientEventParam,
  831. strip_not_given(
  832. {
  833. "type": "conversation.item.create",
  834. "item": item,
  835. "event_id": event_id,
  836. "previous_item_id": previous_item_id,
  837. }
  838. ),
  839. )
  840. )
  841. async def truncate(
  842. self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
  843. ) -> None:
  844. """Send this event to truncate a previous assistant message’s audio.
  845. The server
  846. will produce audio faster than realtime, so this event is useful when the user
  847. interrupts to truncate audio that has already been sent to the client but not
  848. yet played. This will synchronize the server's understanding of the audio with
  849. the client's playback.
  850. Truncating audio will delete the server-side text transcript to ensure there
  851. is not text in the context that hasn't been heard by the user.
  852. If successful, the server will respond with a `conversation.item.truncated`
  853. event.
  854. """
  855. await self._connection.send(
  856. cast(
  857. RealtimeClientEventParam,
  858. strip_not_given(
  859. {
  860. "type": "conversation.item.truncate",
  861. "audio_end_ms": audio_end_ms,
  862. "content_index": content_index,
  863. "item_id": item_id,
  864. "event_id": event_id,
  865. }
  866. ),
  867. )
  868. )
  869. async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
  870. """
  871. Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
  872. The server will respond with a `conversation.item.retrieved` event,
  873. unless the item does not exist in the conversation history, in which case the
  874. server will respond with an error.
  875. """
  876. await self._connection.send(
  877. cast(
  878. RealtimeClientEventParam,
  879. strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
  880. )
  881. )
  882. class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
  883. async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
  884. """**WebRTC Only:** Emit to cut off the current audio response.
  885. This will trigger the server to
  886. stop generating audio and emit a `output_audio_buffer.cleared` event. This
  887. event should be preceded by a `response.cancel` client event to stop the
  888. generation of the current response.
  889. [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
  890. """
  891. await self._connection.send(
  892. cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
  893. )
  894. class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
  895. async def update(
  896. self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
  897. ) -> None:
  898. """Send this event to update a transcription session."""
  899. await self._connection.send(
  900. cast(
  901. RealtimeClientEventParam,
  902. strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}),
  903. )
  904. )