| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624 |
- import { APIResource } from "../../core/resource.mjs";
- import * as ClientSecretsAPI from "./client-secrets.mjs";
- import * as RealtimeAPI from "./realtime.mjs";
- import * as ResponsesAPI from "../responses/responses.mjs";
- import { APIPromise } from "../../core/api-promise.mjs";
- import { RequestOptions } from "../../internal/request-options.mjs";
- export declare class ClientSecrets extends APIResource {
- /**
- * Create a Realtime client secret with an associated session configuration.
- *
- * @example
- * ```ts
- * const clientSecret =
- * await client.realtime.clientSecrets.create();
- * ```
- */
- create(body: ClientSecretCreateParams, options?: RequestOptions): APIPromise<ClientSecretCreateResponse>;
- }
- /**
- * Ephemeral key returned by the API.
- */
- export interface RealtimeSessionClientSecret {
- /**
- * Timestamp for when the token expires. Currently, all tokens expire after one
- * minute.
- */
- expires_at: number;
- /**
- * Ephemeral key usable in client environments to authenticate connections to the
- * Realtime API. Use this in client-side environments rather than a standard API
- * token, which should only be used server-side.
- */
- value: string;
- }
- /**
- * A new Realtime session configuration, with an ephemeral key. Default TTL for
- * keys is one minute.
- */
- export interface RealtimeSessionCreateResponse {
- /**
- * Ephemeral key returned by the API.
- */
- client_secret: RealtimeSessionClientSecret;
- /**
- * The type of session to create. Always `realtime` for the Realtime API.
- */
- type: 'realtime';
- /**
- * Configuration for input and output audio.
- */
- audio?: RealtimeSessionCreateResponse.Audio;
- /**
- * Additional fields to include in server outputs.
- *
- * `item.input_audio_transcription.logprobs`: Include logprobs for input audio
- * transcription.
- */
- include?: Array<'item.input_audio_transcription.logprobs'>;
- /**
- * The default system instructions (i.e. system message) prepended to model calls.
- * This field allows the client to guide the model on desired responses. The model
- * can be instructed on response content and format, (e.g. "be extremely succinct",
- * "act friendly", "here are examples of good responses") and on audio behavior
- * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
- * instructions are not guaranteed to be followed by the model, but they provide
- * guidance to the model on the desired behavior.
- *
- * Note that the server sets default instructions which will be used if this field
- * is not set and are visible in the `session.created` event at the start of the
- * session.
- */
- instructions?: string;
- /**
- * Maximum number of output tokens for a single assistant response, inclusive of
- * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
- * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
- */
- max_output_tokens?: number | 'inf';
- /**
- * The Realtime model used for this session.
- */
- model?: (string & {}) | 'gpt-realtime' | 'gpt-realtime-2025-08-28' | 'gpt-4o-realtime-preview' | 'gpt-4o-realtime-preview-2024-10-01' | 'gpt-4o-realtime-preview-2024-12-17' | 'gpt-4o-realtime-preview-2025-06-03' | 'gpt-4o-mini-realtime-preview' | 'gpt-4o-mini-realtime-preview-2024-12-17' | 'gpt-realtime-mini' | 'gpt-realtime-mini-2025-10-06' | 'gpt-realtime-mini-2025-12-15' | 'gpt-audio-mini' | 'gpt-audio-mini-2025-10-06' | 'gpt-audio-mini-2025-12-15';
- /**
- * The set of modalities the model can respond with. It defaults to `["audio"]`,
- * indicating that the model will respond with audio plus a transcript. `["text"]`
- * can be used to make the model respond with text only. It is not possible to
- * request both `text` and `audio` at the same time.
- */
- output_modalities?: Array<'text' | 'audio'>;
- /**
- * Reference to a prompt template and its variables.
- * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
- */
- prompt?: ResponsesAPI.ResponsePrompt | null;
- /**
- * How the model chooses tools. Provide one of the string modes or force a specific
- * function/MCP tool.
- */
- tool_choice?: ResponsesAPI.ToolChoiceOptions | ResponsesAPI.ToolChoiceFunction | ResponsesAPI.ToolChoiceMcp;
- /**
- * Tools available to the model.
- */
- tools?: Array<RealtimeAPI.RealtimeFunctionTool | RealtimeSessionCreateResponse.McpTool>;
- /**
- * Realtime API can write session traces to the
- * [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
- * tracing is enabled for a session, the configuration cannot be modified.
- *
- * `auto` will create a trace for the session with default values for the workflow
- * name, group id, and metadata.
- */
- tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration | null;
- /**
- * When the number of tokens in a conversation exceeds the model's input token
- * limit, the conversation be truncated, meaning messages (starting from the
- * oldest) will not be included in the model's context. A 32k context model with
- * 4,096 max output tokens can only include 28,224 tokens in the context before
- * truncation occurs.
- *
- * Clients can configure truncation behavior to truncate with a lower max token
- * limit, which is an effective way to control token usage and cost.
- *
- * Truncation will reduce the number of cached tokens on the next turn (busting the
- * cache), since messages are dropped from the beginning of the context. However,
- * clients can also configure truncation to retain messages up to a fraction of the
- * maximum context size, which will reduce the need for future truncations and thus
- * improve the cache rate.
- *
- * Truncation can be disabled entirely, which means the server will never truncate
- * but would instead return an error if the conversation exceeds the model's input
- * token limit.
- */
- truncation?: RealtimeAPI.RealtimeTruncation;
- }
- export declare namespace RealtimeSessionCreateResponse {
- /**
- * Configuration for input and output audio.
- */
- interface Audio {
- input?: Audio.Input;
- output?: Audio.Output;
- }
- namespace Audio {
- interface Input {
- /**
- * The format of the input audio.
- */
- format?: RealtimeAPI.RealtimeAudioFormats;
- /**
- * Configuration for input audio noise reduction. This can be set to `null` to turn
- * off. Noise reduction filters audio added to the input audio buffer before it is
- * sent to VAD and the model. Filtering the audio can improve VAD and turn
- * detection accuracy (reducing false positives) and model performance by improving
- * perception of the input audio.
- */
- noise_reduction?: Input.NoiseReduction;
- /**
- * Configuration for input audio transcription, defaults to off and can be set to
- * `null` to turn off once on. Input audio transcription is not native to the
- * model, since the model consumes audio directly. Transcription runs
- * asynchronously through
- * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
- * and should be treated as guidance of input audio content rather than precisely
- * what the model heard. The client can optionally set the language and prompt for
- * transcription, these offer additional guidance to the transcription service.
- */
- transcription?: RealtimeAPI.AudioTranscription;
- /**
- * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
- * set to `null` to turn off, in which case the client must manually trigger model
- * response.
- *
- * Server VAD means that the model will detect the start and end of speech based on
- * audio volume and respond at the end of user speech.
- *
- * Semantic VAD is more advanced and uses a turn detection model (in conjunction
- * with VAD) to semantically estimate whether the user has finished speaking, then
- * dynamically sets a timeout based on this probability. For example, if user audio
- * trails off with "uhhm", the model will score a low probability of turn end and
- * wait longer for the user to continue speaking. This can be useful for more
- * natural conversations, but may have a higher latency.
- */
- turn_detection?: Input.ServerVad | Input.SemanticVad | null;
- }
- namespace Input {
- /**
- * Configuration for input audio noise reduction. This can be set to `null` to turn
- * off. Noise reduction filters audio added to the input audio buffer before it is
- * sent to VAD and the model. Filtering the audio can improve VAD and turn
- * detection accuracy (reducing false positives) and model performance by improving
- * perception of the input audio.
- */
- interface NoiseReduction {
- /**
- * Type of noise reduction. `near_field` is for close-talking microphones such as
- * headphones, `far_field` is for far-field microphones such as laptop or
- * conference room microphones.
- */
- type?: RealtimeAPI.NoiseReductionType;
- }
- /**
- * Server-side voice activity detection (VAD) which flips on when user speech is
- * detected and off after a period of silence.
- */
- interface ServerVad {
- /**
- * Type of turn detection, `server_vad` to turn on simple Server VAD.
- */
- type: 'server_vad';
- /**
- * Whether or not to automatically generate a response when a VAD stop event
- * occurs. If `interrupt_response` is set to `false` this may fail to create a
- * response if the model is already responding.
- *
- * If both `create_response` and `interrupt_response` are set to `false`, the model
- * will never respond automatically but VAD events will still be emitted.
- */
- create_response?: boolean;
- /**
- * Optional timeout after which a model response will be triggered automatically.
- * This is useful for situations in which a long pause from the user is unexpected,
- * such as a phone call. The model will effectively prompt the user to continue the
- * conversation based on the current context.
- *
- * The timeout value will be applied after the last model response's audio has
- * finished playing, i.e. it's set to the `response.done` time plus audio playback
- * duration.
- *
- * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
- * Response) will be emitted when the timeout is reached. Idle timeout is currently
- * only supported for `server_vad` mode.
- */
- idle_timeout_ms?: number | null;
- /**
- * Whether or not to automatically interrupt (cancel) any ongoing response with
- * output to the default conversation (i.e. `conversation` of `auto`) when a VAD
- * start event occurs. If `true` then the response will be cancelled, otherwise it
- * will continue until complete.
- *
- * If both `create_response` and `interrupt_response` are set to `false`, the model
- * will never respond automatically but VAD events will still be emitted.
- */
- interrupt_response?: boolean;
- /**
- * Used only for `server_vad` mode. Amount of audio to include before the VAD
- * detected speech (in milliseconds). Defaults to 300ms.
- */
- prefix_padding_ms?: number;
- /**
- * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
- * milliseconds). Defaults to 500ms. With shorter values the model will respond
- * more quickly, but may jump in on short pauses from the user.
- */
- silence_duration_ms?: number;
- /**
- * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
- * defaults to 0.5. A higher threshold will require louder audio to activate the
- * model, and thus might perform better in noisy environments.
- */
- threshold?: number;
- }
- /**
- * Server-side semantic turn detection which uses a model to determine when the
- * user has finished speaking.
- */
- interface SemanticVad {
- /**
- * Type of turn detection, `semantic_vad` to turn on Semantic VAD.
- */
- type: 'semantic_vad';
- /**
- * Whether or not to automatically generate a response when a VAD stop event
- * occurs.
- */
- create_response?: boolean;
- /**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
- * will wait longer for the user to continue speaking, `high` will respond more
- * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
- * and `high` have max timeouts of 8s, 4s, and 2s respectively.
- */
- eagerness?: 'low' | 'medium' | 'high' | 'auto';
- /**
- * Whether or not to automatically interrupt any ongoing response with output to
- * the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- * occurs.
- */
- interrupt_response?: boolean;
- }
- }
- interface Output {
- /**
- * The format of the output audio.
- */
- format?: RealtimeAPI.RealtimeAudioFormats;
- /**
- * The speed of the model's spoken response as a multiple of the original speed.
- * 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
- * This value can only be changed in between model turns, not while a response is
- * in progress.
- *
- * This parameter is a post-processing adjustment to the audio after it is
- * generated, it's also possible to prompt the model to speak faster or slower.
- */
- speed?: number;
- /**
- * The voice the model uses to respond. Voice cannot be changed during the session
- * once the model has responded with audio at least once. Current voice options are
- * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
- * and `cedar`. We recommend `marin` and `cedar` for best quality.
- */
- voice?: (string & {}) | 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse' | 'marin' | 'cedar';
- }
- }
- /**
- * Give the model access to additional tools via remote Model Context Protocol
- * (MCP) servers.
- * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
- */
- interface McpTool {
- /**
- * A label for this MCP server, used to identify it in tool calls.
- */
- server_label: string;
- /**
- * The type of the MCP tool. Always `mcp`.
- */
- type: 'mcp';
- /**
- * List of allowed tool names or a filter object.
- */
- allowed_tools?: Array<string> | McpTool.McpToolFilter | null;
- /**
- * An OAuth access token that can be used with a remote MCP server, either with a
- * custom MCP server URL or a service connector. Your application must handle the
- * OAuth authorization flow and provide the token here.
- */
- authorization?: string;
- /**
- * Identifier for service connectors, like those available in ChatGPT. One of
- * `server_url` or `connector_id` must be provided. Learn more about service
- * connectors
- * [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
- *
- * Currently supported `connector_id` values are:
- *
- * - Dropbox: `connector_dropbox`
- * - Gmail: `connector_gmail`
- * - Google Calendar: `connector_googlecalendar`
- * - Google Drive: `connector_googledrive`
- * - Microsoft Teams: `connector_microsoftteams`
- * - Outlook Calendar: `connector_outlookcalendar`
- * - Outlook Email: `connector_outlookemail`
- * - SharePoint: `connector_sharepoint`
- */
- connector_id?: 'connector_dropbox' | 'connector_gmail' | 'connector_googlecalendar' | 'connector_googledrive' | 'connector_microsoftteams' | 'connector_outlookcalendar' | 'connector_outlookemail' | 'connector_sharepoint';
- /**
- * Optional HTTP headers to send to the MCP server. Use for authentication or other
- * purposes.
- */
- headers?: {
- [key: string]: string;
- } | null;
- /**
- * Specify which of the MCP server's tools require approval.
- */
- require_approval?: McpTool.McpToolApprovalFilter | 'always' | 'never' | null;
- /**
- * Optional description of the MCP server, used to provide more context.
- */
- server_description?: string;
- /**
- * The URL for the MCP server. One of `server_url` or `connector_id` must be
- * provided.
- */
- server_url?: string;
- }
- namespace McpTool {
- /**
- * A filter object to specify which tools are allowed.
- */
- interface McpToolFilter {
- /**
- * Indicates whether or not a tool modifies data or is read-only. If an MCP server
- * is
- * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
- * it will match this filter.
- */
- read_only?: boolean;
- /**
- * List of allowed tool names.
- */
- tool_names?: Array<string>;
- }
- /**
- * Specify which of the MCP server's tools require approval. Can be `always`,
- * `never`, or a filter object associated with tools that require approval.
- */
- interface McpToolApprovalFilter {
- /**
- * A filter object to specify which tools are allowed.
- */
- always?: McpToolApprovalFilter.Always;
- /**
- * A filter object to specify which tools are allowed.
- */
- never?: McpToolApprovalFilter.Never;
- }
- namespace McpToolApprovalFilter {
- /**
- * A filter object to specify which tools are allowed.
- */
- interface Always {
- /**
- * Indicates whether or not a tool modifies data or is read-only. If an MCP server
- * is
- * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
- * it will match this filter.
- */
- read_only?: boolean;
- /**
- * List of allowed tool names.
- */
- tool_names?: Array<string>;
- }
- /**
- * A filter object to specify which tools are allowed.
- */
- interface Never {
- /**
- * Indicates whether or not a tool modifies data or is read-only. If an MCP server
- * is
- * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
- * it will match this filter.
- */
- read_only?: boolean;
- /**
- * List of allowed tool names.
- */
- tool_names?: Array<string>;
- }
- }
- }
- /**
- * Granular configuration for tracing.
- */
- interface TracingConfiguration {
- /**
- * The group id to attach to this trace to enable filtering and grouping in the
- * Traces Dashboard.
- */
- group_id?: string;
- /**
- * The arbitrary metadata to attach to this trace to enable filtering in the Traces
- * Dashboard.
- */
- metadata?: unknown;
- /**
- * The name of the workflow to attach to this trace. This is used to name the trace
- * in the Traces Dashboard.
- */
- workflow_name?: string;
- }
- }
- /**
- * A Realtime transcription session configuration object.
- */
- export interface RealtimeTranscriptionSessionCreateResponse {
- /**
- * Unique identifier for the session that looks like `sess_1234567890abcdef`.
- */
- id: string;
- /**
- * The object type. Always `realtime.transcription_session`.
- */
- object: string;
- /**
- * The type of session. Always `transcription` for transcription sessions.
- */
- type: 'transcription';
- /**
- * Configuration for input audio for the session.
- */
- audio?: RealtimeTranscriptionSessionCreateResponse.Audio;
- /**
- * Expiration timestamp for the session, in seconds since epoch.
- */
- expires_at?: number;
- /**
- * Additional fields to include in server outputs.
- *
- * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
- * transcription.
- */
- include?: Array<'item.input_audio_transcription.logprobs'>;
- }
- export declare namespace RealtimeTranscriptionSessionCreateResponse {
- /**
- * Configuration for input audio for the session.
- */
- interface Audio {
- input?: Audio.Input;
- }
- namespace Audio {
- interface Input {
- /**
- * The PCM audio format. Only a 24kHz sample rate is supported.
- */
- format?: RealtimeAPI.RealtimeAudioFormats;
- /**
- * Configuration for input audio noise reduction.
- */
- noise_reduction?: Input.NoiseReduction;
- /**
- * Configuration of the transcription model.
- */
- transcription?: RealtimeAPI.AudioTranscription;
- /**
- * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
- * means that the model will detect the start and end of speech based on audio
- * volume and respond at the end of user speech.
- */
- turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection;
- }
- namespace Input {
- /**
- * Configuration for input audio noise reduction.
- */
- interface NoiseReduction {
- /**
- * Type of noise reduction. `near_field` is for close-talking microphones such as
- * headphones, `far_field` is for far-field microphones such as laptop or
- * conference room microphones.
- */
- type?: RealtimeAPI.NoiseReductionType;
- }
- }
- }
- }
- /**
- * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
- * means that the model will detect the start and end of speech based on audio
- * volume and respond at the end of user speech.
- */
- export interface RealtimeTranscriptionSessionTurnDetection {
- /**
- * Amount of audio to include before the VAD detected speech (in milliseconds).
- * Defaults to 300ms.
- */
- prefix_padding_ms?: number;
- /**
- * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
- * With shorter values the model will respond more quickly, but may jump in on
- * short pauses from the user.
- */
- silence_duration_ms?: number;
- /**
- * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
- * threshold will require louder audio to activate the model, and thus might
- * perform better in noisy environments.
- */
- threshold?: number;
- /**
- * Type of turn detection, only `server_vad` is currently supported.
- */
- type?: string;
- }
- /**
- * Response from creating a session and client secret for the Realtime API.
- */
- export interface ClientSecretCreateResponse {
- /**
- * Expiration timestamp for the client secret, in seconds since epoch.
- */
- expires_at: number;
- /**
- * The session configuration for either a realtime or transcription session.
- */
- session: RealtimeSessionCreateResponse | RealtimeTranscriptionSessionCreateResponse;
- /**
- * The generated client secret value.
- */
- value: string;
- }
- export interface ClientSecretCreateParams {
- /**
- * Configuration for the client secret expiration. Expiration refers to the time
- * after which a client secret will no longer be valid for creating sessions. The
- * session itself may continue after that time once started. A secret can be used
- * to create multiple sessions until it expires.
- */
- expires_after?: ClientSecretCreateParams.ExpiresAfter;
- /**
- * Session configuration to use for the client secret. Choose either a realtime
- * session or a transcription session.
- */
- session?: RealtimeAPI.RealtimeSessionCreateRequest | RealtimeAPI.RealtimeTranscriptionSessionCreateRequest;
- }
- export declare namespace ClientSecretCreateParams {
- /**
- * Configuration for the client secret expiration. Expiration refers to the time
- * after which a client secret will no longer be valid for creating sessions. The
- * session itself may continue after that time once started. A secret can be used
- * to create multiple sessions until it expires.
- */
- interface ExpiresAfter {
- /**
- * The anchor point for the client secret expiration, meaning that `seconds` will
- * be added to the `created_at` time of the client secret to produce an expiration
- * timestamp. Only `created_at` is currently supported.
- */
- anchor?: 'created_at';
- /**
- * The number of seconds from the anchor point to the expiration. Select a value
- * between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
- * not specified.
- */
- seconds?: number;
- }
- }
- export declare namespace ClientSecrets {
- export { type RealtimeSessionClientSecret as RealtimeSessionClientSecret, type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse, type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse, type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse as ClientSecretCreateResponse, type ClientSecretCreateParams as ClientSecretCreateParams, };
- }
- //# sourceMappingURL=client-secrets.d.mts.map
|