yichael
/
AIStoryBoard


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
							import { APIResource } from "../../core/resource.mjs";
import * as ClientSecretsAPI from "./client-secrets.mjs";
import * as RealtimeAPI from "./realtime.mjs";
import * as ResponsesAPI from "../responses/responses.mjs";
import { APIPromise } from "../../core/api-promise.mjs";
import { RequestOptions } from "../../internal/request-options.mjs";
export declare class ClientSecrets extends APIResource {
    /**
     * Create a Realtime client secret with an associated session configuration.
     *
     * @example
     * ```ts
     * const clientSecret =
     *   await client.realtime.clientSecrets.create();
     * ```
     */
    create(body: ClientSecretCreateParams, options?: RequestOptions): APIPromise<ClientSecretCreateResponse>;
}
/**
 * Ephemeral key returned by the API.
 */
export interface RealtimeSessionClientSecret {
    /**
     * Timestamp for when the token expires. Currently, all tokens expire after one
     * minute.
     */
    expires_at: number;
    /**
     * Ephemeral key usable in client environments to authenticate connections to the
     * Realtime API. Use this in client-side environments rather than a standard API
     * token, which should only be used server-side.
     */
    value: string;
}
/**
 * A new Realtime session configuration, with an ephemeral key. Default TTL for
 * keys is one minute.
 */
export interface RealtimeSessionCreateResponse {
    /**
     * Ephemeral key returned by the API.
     */
    client_secret: RealtimeSessionClientSecret;
    /**
     * The type of session to create. Always `realtime` for the Realtime API.
     */
    type: 'realtime';
    /**
     * Configuration for input and output audio.
     */
    audio?: RealtimeSessionCreateResponse.Audio;
    /**
     * Additional fields to include in server outputs.
     *
     * `item.input_audio_transcription.logprobs`: Include logprobs for input audio
     * transcription.
     */
    include?: Array<'item.input_audio_transcription.logprobs'>;
    /**
     * The default system instructions (i.e. system message) prepended to model calls.
     * This field allows the client to guide the model on desired responses. The model
     * can be instructed on response content and format, (e.g. "be extremely succinct",
     * "act friendly", "here are examples of good responses") and on audio behavior
     * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
     * instructions are not guaranteed to be followed by the model, but they provide
     * guidance to the model on the desired behavior.
     *
     * Note that the server sets default instructions which will be used if this field
     * is not set and are visible in the `session.created` event at the start of the
     * session.
     */
    instructions?: string;
    /**
     * Maximum number of output tokens for a single assistant response, inclusive of
     * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
     * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
     */
    max_output_tokens?: number | 'inf';
    /**
     * The Realtime model used for this session.
     */
    model?: (string & {}) | 'gpt-realtime' | 'gpt-realtime-2025-08-28' | 'gpt-4o-realtime-preview' | 'gpt-4o-realtime-preview-2024-10-01' | 'gpt-4o-realtime-preview-2024-12-17' | 'gpt-4o-realtime-preview-2025-06-03' | 'gpt-4o-mini-realtime-preview' | 'gpt-4o-mini-realtime-preview-2024-12-17' | 'gpt-realtime-mini' | 'gpt-realtime-mini-2025-10-06' | 'gpt-realtime-mini-2025-12-15' | 'gpt-audio-mini' | 'gpt-audio-mini-2025-10-06' | 'gpt-audio-mini-2025-12-15';
    /**
     * The set of modalities the model can respond with. It defaults to `["audio"]`,
     * indicating that the model will respond with audio plus a transcript. `["text"]`
     * can be used to make the model respond with text only. It is not possible to
     * request both `text` and `audio` at the same time.
     */
    output_modalities?: Array<'text' | 'audio'>;
    /**
     * Reference to a prompt template and its variables.
     * [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
     */
    prompt?: ResponsesAPI.ResponsePrompt | null;
    /**
     * How the model chooses tools. Provide one of the string modes or force a specific
     * function/MCP tool.
     */
    tool_choice?: ResponsesAPI.ToolChoiceOptions | ResponsesAPI.ToolChoiceFunction | ResponsesAPI.ToolChoiceMcp;
    /**
     * Tools available to the model.
     */
    tools?: Array<RealtimeAPI.RealtimeFunctionTool | RealtimeSessionCreateResponse.McpTool>;
    /**
     * Realtime API can write session traces to the
     * [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
     * tracing is enabled for a session, the configuration cannot be modified.
     *
     * `auto` will create a trace for the session with default values for the workflow
     * name, group id, and metadata.
     */
    tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration | null;
    /**
     * When the number of tokens in a conversation exceeds the model's input token
     * limit, the conversation be truncated, meaning messages (starting from the
     * oldest) will not be included in the model's context. A 32k context model with
     * 4,096 max output tokens can only include 28,224 tokens in the context before
     * truncation occurs.
     *
     * Clients can configure truncation behavior to truncate with a lower max token
     * limit, which is an effective way to control token usage and cost.
     *
     * Truncation will reduce the number of cached tokens on the next turn (busting the
     * cache), since messages are dropped from the beginning of the context. However,
     * clients can also configure truncation to retain messages up to a fraction of the
     * maximum context size, which will reduce the need for future truncations and thus
     * improve the cache rate.
     *
     * Truncation can be disabled entirely, which means the server will never truncate
     * but would instead return an error if the conversation exceeds the model's input
     * token limit.
     */
    truncation?: RealtimeAPI.RealtimeTruncation;
}
export declare namespace RealtimeSessionCreateResponse {
    /**
     * Configuration for input and output audio.
     */
    interface Audio {
        input?: Audio.Input;
        output?: Audio.Output;
    }
    namespace Audio {
        interface Input {
            /**
             * The format of the input audio.
             */
            format?: RealtimeAPI.RealtimeAudioFormats;
            /**
             * Configuration for input audio noise reduction. This can be set to `null` to turn
             * off. Noise reduction filters audio added to the input audio buffer before it is
             * sent to VAD and the model. Filtering the audio can improve VAD and turn
             * detection accuracy (reducing false positives) and model performance by improving
             * perception of the input audio.
             */
            noise_reduction?: Input.NoiseReduction;
            /**
             * Configuration for input audio transcription, defaults to off and can be set to
             * `null` to turn off once on. Input audio transcription is not native to the
             * model, since the model consumes audio directly. Transcription runs
             * asynchronously through
             * [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
             * and should be treated as guidance of input audio content rather than precisely
             * what the model heard. The client can optionally set the language and prompt for
             * transcription, these offer additional guidance to the transcription service.
             */
            transcription?: RealtimeAPI.AudioTranscription;
            /**
             * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
             * set to `null` to turn off, in which case the client must manually trigger model
             * response.
             *
             * Server VAD means that the model will detect the start and end of speech based on
             * audio volume and respond at the end of user speech.
             *
             * Semantic VAD is more advanced and uses a turn detection model (in conjunction
             * with VAD) to semantically estimate whether the user has finished speaking, then
             * dynamically sets a timeout based on this probability. For example, if user audio
             * trails off with "uhhm", the model will score a low probability of turn end and
             * wait longer for the user to continue speaking. This can be useful for more
             * natural conversations, but may have a higher latency.
             */
            turn_detection?: Input.ServerVad | Input.SemanticVad | null;
        }
        namespace Input {
            /**
             * Configuration for input audio noise reduction. This can be set to `null` to turn
             * off. Noise reduction filters audio added to the input audio buffer before it is
             * sent to VAD and the model. Filtering the audio can improve VAD and turn
             * detection accuracy (reducing false positives) and model performance by improving
             * perception of the input audio.
             */
            interface NoiseReduction {
                /**
                 * Type of noise reduction. `near_field` is for close-talking microphones such as
                 * headphones, `far_field` is for far-field microphones such as laptop or
                 * conference room microphones.
                 */
                type?: RealtimeAPI.NoiseReductionType;
            }
            /**
             * Server-side voice activity detection (VAD) which flips on when user speech is
             * detected and off after a period of silence.
             */
            interface ServerVad {
                /**
                 * Type of turn detection, `server_vad` to turn on simple Server VAD.
                 */
                type: 'server_vad';
                /**
                 * Whether or not to automatically generate a response when a VAD stop event
                 * occurs. If `interrupt_response` is set to `false` this may fail to create a
                 * response if the model is already responding.
                 *
                 * If both `create_response` and `interrupt_response` are set to `false`, the model
                 * will never respond automatically but VAD events will still be emitted.
                 */
                create_response?: boolean;
                /**
                 * Optional timeout after which a model response will be triggered automatically.
                 * This is useful for situations in which a long pause from the user is unexpected,
                 * such as a phone call. The model will effectively prompt the user to continue the
                 * conversation based on the current context.
                 *
                 * The timeout value will be applied after the last model response's audio has
                 * finished playing, i.e. it's set to the `response.done` time plus audio playback
                 * duration.
                 *
                 * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
                 * Response) will be emitted when the timeout is reached. Idle timeout is currently
                 * only supported for `server_vad` mode.
                 */
                idle_timeout_ms?: number | null;
                /**
                 * Whether or not to automatically interrupt (cancel) any ongoing response with
                 * output to the default conversation (i.e. `conversation` of `auto`) when a VAD
                 * start event occurs. If `true` then the response will be cancelled, otherwise it
                 * will continue until complete.
                 *
                 * If both `create_response` and `interrupt_response` are set to `false`, the model
                 * will never respond automatically but VAD events will still be emitted.
                 */
                interrupt_response?: boolean;
                /**
                 * Used only for `server_vad` mode. Amount of audio to include before the VAD
                 * detected speech (in milliseconds). Defaults to 300ms.
                 */
                prefix_padding_ms?: number;
                /**
                 * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
                 * milliseconds). Defaults to 500ms. With shorter values the model will respond
                 * more quickly, but may jump in on short pauses from the user.
                 */
                silence_duration_ms?: number;
                /**
                 * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
                 * defaults to 0.5. A higher threshold will require louder audio to activate the
                 * model, and thus might perform better in noisy environments.
                 */
                threshold?: number;
            }
            /**
             * Server-side semantic turn detection which uses a model to determine when the
             * user has finished speaking.
             */
            interface SemanticVad {
                /**
                 * Type of turn detection, `semantic_vad` to turn on Semantic VAD.
                 */
                type: 'semantic_vad';
                /**
                 * Whether or not to automatically generate a response when a VAD stop event
                 * occurs.
                 */
                create_response?: boolean;
                /**
                 * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
                 * will wait longer for the user to continue speaking, `high` will respond more
                 * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
                 * and `high` have max timeouts of 8s, 4s, and 2s respectively.
                 */
                eagerness?: 'low' | 'medium' | 'high' | 'auto';
                /**
                 * Whether or not to automatically interrupt any ongoing response with output to
                 * the default conversation (i.e. `conversation` of `auto`) when a VAD start event
                 * occurs.
                 */
                interrupt_response?: boolean;
            }
        }
        interface Output {
            /**
             * The format of the output audio.
             */
            format?: RealtimeAPI.RealtimeAudioFormats;
            /**
             * The speed of the model's spoken response as a multiple of the original speed.
             * 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
             * This value can only be changed in between model turns, not while a response is
             * in progress.
             *
             * This parameter is a post-processing adjustment to the audio after it is
             * generated, it's also possible to prompt the model to speak faster or slower.
             */
            speed?: number;
            /**
             * The voice the model uses to respond. Voice cannot be changed during the session
             * once the model has responded with audio at least once. Current voice options are
             * `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
             * and `cedar`. We recommend `marin` and `cedar` for best quality.
             */
            voice?: (string & {}) | 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse' | 'marin' | 'cedar';
        }
    }
    /**
     * Give the model access to additional tools via remote Model Context Protocol
     * (MCP) servers.
     * [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
     */
    interface McpTool {
        /**
         * A label for this MCP server, used to identify it in tool calls.
         */
        server_label: string;
        /**
         * The type of the MCP tool. Always `mcp`.
         */
        type: 'mcp';
        /**
         * List of allowed tool names or a filter object.
         */
        allowed_tools?: Array<string> | McpTool.McpToolFilter | null;
        /**
         * An OAuth access token that can be used with a remote MCP server, either with a
         * custom MCP server URL or a service connector. Your application must handle the
         * OAuth authorization flow and provide the token here.
         */
        authorization?: string;
        /**
         * Identifier for service connectors, like those available in ChatGPT. One of
         * `server_url` or `connector_id` must be provided. Learn more about service
         * connectors
         * [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
         *
         * Currently supported `connector_id` values are:
         *
         * - Dropbox: `connector_dropbox`
         * - Gmail: `connector_gmail`
         * - Google Calendar: `connector_googlecalendar`
         * - Google Drive: `connector_googledrive`
         * - Microsoft Teams: `connector_microsoftteams`
         * - Outlook Calendar: `connector_outlookcalendar`
         * - Outlook Email: `connector_outlookemail`
         * - SharePoint: `connector_sharepoint`
         */
        connector_id?: 'connector_dropbox' | 'connector_gmail' | 'connector_googlecalendar' | 'connector_googledrive' | 'connector_microsoftteams' | 'connector_outlookcalendar' | 'connector_outlookemail' | 'connector_sharepoint';
        /**
         * Optional HTTP headers to send to the MCP server. Use for authentication or other
         * purposes.
         */
        headers?: {
            [key: string]: string;
        } | null;
        /**
         * Specify which of the MCP server's tools require approval.
         */
        require_approval?: McpTool.McpToolApprovalFilter | 'always' | 'never' | null;
        /**
         * Optional description of the MCP server, used to provide more context.
         */
        server_description?: string;
        /**
         * The URL for the MCP server. One of `server_url` or `connector_id` must be
         * provided.
         */
        server_url?: string;
    }
    namespace McpTool {
        /**
         * A filter object to specify which tools are allowed.
         */
        interface McpToolFilter {
            /**
             * Indicates whether or not a tool modifies data or is read-only. If an MCP server
             * is
             * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
             * it will match this filter.
             */
            read_only?: boolean;
            /**
             * List of allowed tool names.
             */
            tool_names?: Array<string>;
        }
        /**
         * Specify which of the MCP server's tools require approval. Can be `always`,
         * `never`, or a filter object associated with tools that require approval.
         */
        interface McpToolApprovalFilter {
            /**
             * A filter object to specify which tools are allowed.
             */
            always?: McpToolApprovalFilter.Always;
            /**
             * A filter object to specify which tools are allowed.
             */
            never?: McpToolApprovalFilter.Never;
        }
        namespace McpToolApprovalFilter {
            /**
             * A filter object to specify which tools are allowed.
             */
            interface Always {
                /**
                 * Indicates whether or not a tool modifies data or is read-only. If an MCP server
                 * is
                 * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
                 * it will match this filter.
                 */
                read_only?: boolean;
                /**
                 * List of allowed tool names.
                 */
                tool_names?: Array<string>;
            }
            /**
             * A filter object to specify which tools are allowed.
             */
            interface Never {
                /**
                 * Indicates whether or not a tool modifies data or is read-only. If an MCP server
                 * is
                 * [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
                 * it will match this filter.
                 */
                read_only?: boolean;
                /**
                 * List of allowed tool names.
                 */
                tool_names?: Array<string>;
            }
        }
    }
    /**
     * Granular configuration for tracing.
     */
    interface TracingConfiguration {
        /**
         * The group id to attach to this trace to enable filtering and grouping in the
         * Traces Dashboard.
         */
        group_id?: string;
        /**
         * The arbitrary metadata to attach to this trace to enable filtering in the Traces
         * Dashboard.
         */
        metadata?: unknown;
        /**
         * The name of the workflow to attach to this trace. This is used to name the trace
         * in the Traces Dashboard.
         */
        workflow_name?: string;
    }
}
/**
 * A Realtime transcription session configuration object.
 */
export interface RealtimeTranscriptionSessionCreateResponse {
    /**
     * Unique identifier for the session that looks like `sess_1234567890abcdef`.
     */
    id: string;
    /**
     * The object type. Always `realtime.transcription_session`.
     */
    object: string;
    /**
     * The type of session. Always `transcription` for transcription sessions.
     */
    type: 'transcription';
    /**
     * Configuration for input audio for the session.
     */
    audio?: RealtimeTranscriptionSessionCreateResponse.Audio;
    /**
     * Expiration timestamp for the session, in seconds since epoch.
     */
    expires_at?: number;
    /**
     * Additional fields to include in server outputs.
     *
     * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
     *   transcription.
     */
    include?: Array<'item.input_audio_transcription.logprobs'>;
}
export declare namespace RealtimeTranscriptionSessionCreateResponse {
    /**
     * Configuration for input audio for the session.
     */
    interface Audio {
        input?: Audio.Input;
    }
    namespace Audio {
        interface Input {
            /**
             * The PCM audio format. Only a 24kHz sample rate is supported.
             */
            format?: RealtimeAPI.RealtimeAudioFormats;
            /**
             * Configuration for input audio noise reduction.
             */
            noise_reduction?: Input.NoiseReduction;
            /**
             * Configuration of the transcription model.
             */
            transcription?: RealtimeAPI.AudioTranscription;
            /**
             * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
             * means that the model will detect the start and end of speech based on audio
             * volume and respond at the end of user speech.
             */
            turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection;
        }
        namespace Input {
            /**
             * Configuration for input audio noise reduction.
             */
            interface NoiseReduction {
                /**
                 * Type of noise reduction. `near_field` is for close-talking microphones such as
                 * headphones, `far_field` is for far-field microphones such as laptop or
                 * conference room microphones.
                 */
                type?: RealtimeAPI.NoiseReductionType;
            }
        }
    }
}
/**
 * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
 * means that the model will detect the start and end of speech based on audio
 * volume and respond at the end of user speech.
 */
export interface RealtimeTranscriptionSessionTurnDetection {
    /**
     * Amount of audio to include before the VAD detected speech (in milliseconds).
     * Defaults to 300ms.
     */
    prefix_padding_ms?: number;
    /**
     * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
     * With shorter values the model will respond more quickly, but may jump in on
     * short pauses from the user.
     */
    silence_duration_ms?: number;
    /**
     * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
     * threshold will require louder audio to activate the model, and thus might
     * perform better in noisy environments.
     */
    threshold?: number;
    /**
     * Type of turn detection, only `server_vad` is currently supported.
     */
    type?: string;
}
/**
 * Response from creating a session and client secret for the Realtime API.
 */
export interface ClientSecretCreateResponse {
    /**
     * Expiration timestamp for the client secret, in seconds since epoch.
     */
    expires_at: number;
    /**
     * The session configuration for either a realtime or transcription session.
     */
    session: RealtimeSessionCreateResponse | RealtimeTranscriptionSessionCreateResponse;
    /**
     * The generated client secret value.
     */
    value: string;
}
export interface ClientSecretCreateParams {
    /**
     * Configuration for the client secret expiration. Expiration refers to the time
     * after which a client secret will no longer be valid for creating sessions. The
     * session itself may continue after that time once started. A secret can be used
     * to create multiple sessions until it expires.
     */
    expires_after?: ClientSecretCreateParams.ExpiresAfter;
    /**
     * Session configuration to use for the client secret. Choose either a realtime
     * session or a transcription session.
     */
    session?: RealtimeAPI.RealtimeSessionCreateRequest | RealtimeAPI.RealtimeTranscriptionSessionCreateRequest;
}
export declare namespace ClientSecretCreateParams {
    /**
     * Configuration for the client secret expiration. Expiration refers to the time
     * after which a client secret will no longer be valid for creating sessions. The
     * session itself may continue after that time once started. A secret can be used
     * to create multiple sessions until it expires.
     */
    interface ExpiresAfter {
        /**
         * The anchor point for the client secret expiration, meaning that `seconds` will
         * be added to the `created_at` time of the client secret to produce an expiration
         * timestamp. Only `created_at` is currently supported.
         */
        anchor?: 'created_at';
        /**
         * The number of seconds from the anchor point to the expiration. Select a value
         * between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
         * not specified.
         */
        seconds?: number;
    }
}
export declare namespace ClientSecrets {
    export { type RealtimeSessionClientSecret as RealtimeSessionClientSecret, type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse, type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse, type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection, type ClientSecretCreateResponse as ClientSecretCreateResponse, type ClientSecretCreateParams as ClientSecretCreateParams, };
}
//# sourceMappingURL=client-secrets.d.mts.map