| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935 |
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
- from __future__ import annotations
- import typing_extensions
- from typing import Union, Iterable, Optional
- from functools import partial
- from typing_extensions import Literal, overload
- import httpx
- from .... import _legacy_response
- from .messages import (
- Messages,
- AsyncMessages,
- MessagesWithRawResponse,
- AsyncMessagesWithRawResponse,
- MessagesWithStreamingResponse,
- AsyncMessagesWithStreamingResponse,
- )
- from ...._types import NOT_GIVEN, Body, Omit, Query, Headers, NotGiven, omit, not_given
- from ...._utils import required_args, maybe_transform, async_maybe_transform
- from .runs.runs import (
- Runs,
- AsyncRuns,
- RunsWithRawResponse,
- AsyncRunsWithRawResponse,
- RunsWithStreamingResponse,
- AsyncRunsWithStreamingResponse,
- )
- from ...._compat import cached_property
- from ...._resource import SyncAPIResource, AsyncAPIResource
- from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
- from ...._streaming import Stream, AsyncStream
- from ....types.beta import (
- thread_create_params,
- thread_update_params,
- thread_create_and_run_params,
- )
- from ...._base_client import make_request_options
- from ....lib.streaming import (
- AssistantEventHandler,
- AssistantEventHandlerT,
- AssistantStreamManager,
- AsyncAssistantEventHandler,
- AsyncAssistantEventHandlerT,
- AsyncAssistantStreamManager,
- )
- from ....types.beta.thread import Thread
- from ....types.beta.threads.run import Run
- from ....types.shared.chat_model import ChatModel
- from ....types.beta.thread_deleted import ThreadDeleted
- from ....types.shared_params.metadata import Metadata
- from ....types.beta.assistant_tool_param import AssistantToolParam
- from ....types.beta.assistant_stream_event import AssistantStreamEvent
- from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
- from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
- __all__ = ["Threads", "AsyncThreads"]
- class Threads(SyncAPIResource):
- @cached_property
- def runs(self) -> Runs:
- return Runs(self._client)
- @cached_property
- def messages(self) -> Messages:
- return Messages(self._client)
- @cached_property
- def with_raw_response(self) -> ThreadsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return
- the raw response object instead of the parsed content.
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return ThreadsWithRawResponse(self)
- @cached_property
- def with_streaming_response(self) -> ThreadsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return ThreadsWithStreamingResponse(self)
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def create(
- self,
- *,
- messages: Iterable[thread_create_params.Message] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Create a thread.
- Args:
- messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
- start the thread with.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- tool_resources: A set of resources that are made available to the assistant's tools in this
- thread. The resources are specific to the type of tool. For example, the
- `code_interpreter` tool requires a list of file IDs, while the `file_search`
- tool requires a list of vector store IDs.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._post(
- "/threads",
- body=maybe_transform(
- {
- "messages": messages,
- "metadata": metadata,
- "tool_resources": tool_resources,
- },
- thread_create_params.ThreadCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def retrieve(
- self,
- thread_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Retrieves a thread.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._get(
- f"/threads/{thread_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def update(
- self,
- thread_id: str,
- *,
- metadata: Optional[Metadata] | Omit = omit,
- tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Modifies a thread.
- Args:
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- tool_resources: A set of resources that are made available to the assistant's tools in this
- thread. The resources are specific to the type of tool. For example, the
- `code_interpreter` tool requires a list of file IDs, while the `file_search`
- tool requires a list of vector store IDs.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._post(
- f"/threads/{thread_id}",
- body=maybe_transform(
- {
- "metadata": metadata,
- "tool_resources": tool_resources,
- },
- thread_update_params.ThreadUpdateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def delete(
- self,
- thread_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> ThreadDeleted:
- """
- Delete a thread.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._delete(
- f"/threads/{thread_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=ThreadDeleted,
- )
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def create_and_run(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- stream: Optional[Literal[False]] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def create_and_run(
- self,
- *,
- assistant_id: str,
- stream: Literal[True],
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Stream[AssistantStreamEvent]:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def create_and_run(
- self,
- *,
- assistant_id: str,
- stream: bool,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run | Stream[AssistantStreamEvent]:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- @required_args(["assistant_id"], ["assistant_id", "stream"])
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- def create_and_run(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run | Stream[AssistantStreamEvent]:
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._post(
- "/threads/runs",
- body=maybe_transform(
- {
- "assistant_id": assistant_id,
- "instructions": instructions,
- "max_completion_tokens": max_completion_tokens,
- "max_prompt_tokens": max_prompt_tokens,
- "metadata": metadata,
- "model": model,
- "parallel_tool_calls": parallel_tool_calls,
- "response_format": response_format,
- "stream": stream,
- "temperature": temperature,
- "thread": thread,
- "tool_choice": tool_choice,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- "truncation_strategy": truncation_strategy,
- },
- thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
- if stream
- else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Run,
- stream=stream or False,
- stream_cls=Stream[AssistantStreamEvent],
- )
- def create_and_run_poll(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- poll_interval_ms: int | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> Run:
- """
- A helper to create a thread, start a run and then poll for a terminal state.
- More information on Run lifecycles can be found here:
- https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
- """
- run = self.create_and_run( # pyright: ignore[reportDeprecated]
- assistant_id=assistant_id,
- instructions=instructions,
- max_completion_tokens=max_completion_tokens,
- max_prompt_tokens=max_prompt_tokens,
- metadata=metadata,
- model=model,
- parallel_tool_calls=parallel_tool_calls,
- response_format=response_format,
- temperature=temperature,
- stream=False,
- thread=thread,
- tool_resources=tool_resources,
- tool_choice=tool_choice,
- truncation_strategy=truncation_strategy,
- top_p=top_p,
- tools=tools,
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- )
- return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms) # pyright: ignore[reportDeprecated]
- @overload
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AssistantStreamManager[AssistantEventHandler]:
- """Create a thread and stream the run back"""
- ...
- @overload
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- event_handler: AssistantEventHandlerT,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AssistantStreamManager[AssistantEventHandlerT]:
- """Create a thread and stream the run back"""
- ...
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- event_handler: AssistantEventHandlerT | None = None,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
- """Create a thread and stream the run back"""
- extra_headers = {
- "OpenAI-Beta": "assistants=v2",
- "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
- "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
- **(extra_headers or {}),
- }
- make_request = partial(
- self._post,
- "/threads/runs",
- body=maybe_transform(
- {
- "assistant_id": assistant_id,
- "instructions": instructions,
- "max_completion_tokens": max_completion_tokens,
- "max_prompt_tokens": max_prompt_tokens,
- "metadata": metadata,
- "model": model,
- "parallel_tool_calls": parallel_tool_calls,
- "response_format": response_format,
- "temperature": temperature,
- "tool_choice": tool_choice,
- "stream": True,
- "thread": thread,
- "tools": tools,
- "tool_resources": tool_resources,
- "truncation_strategy": truncation_strategy,
- "top_p": top_p,
- },
- thread_create_and_run_params.ThreadCreateAndRunParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Run,
- stream=True,
- stream_cls=Stream[AssistantStreamEvent],
- )
- return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
- class AsyncThreads(AsyncAPIResource):
- @cached_property
- def runs(self) -> AsyncRuns:
- return AsyncRuns(self._client)
- @cached_property
- def messages(self) -> AsyncMessages:
- return AsyncMessages(self._client)
- @cached_property
- def with_raw_response(self) -> AsyncThreadsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return
- the raw response object instead of the parsed content.
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return AsyncThreadsWithRawResponse(self)
- @cached_property
- def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return AsyncThreadsWithStreamingResponse(self)
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def create(
- self,
- *,
- messages: Iterable[thread_create_params.Message] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Create a thread.
- Args:
- messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
- start the thread with.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- tool_resources: A set of resources that are made available to the assistant's tools in this
- thread. The resources are specific to the type of tool. For example, the
- `code_interpreter` tool requires a list of file IDs, while the `file_search`
- tool requires a list of vector store IDs.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._post(
- "/threads",
- body=await async_maybe_transform(
- {
- "messages": messages,
- "metadata": metadata,
- "tool_resources": tool_resources,
- },
- thread_create_params.ThreadCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def retrieve(
- self,
- thread_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Retrieves a thread.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._get(
- f"/threads/{thread_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def update(
- self,
- thread_id: str,
- *,
- metadata: Optional[Metadata] | Omit = omit,
- tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Thread:
- """
- Modifies a thread.
- Args:
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- tool_resources: A set of resources that are made available to the assistant's tools in this
- thread. The resources are specific to the type of tool. For example, the
- `code_interpreter` tool requires a list of file IDs, while the `file_search`
- tool requires a list of vector store IDs.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._post(
- f"/threads/{thread_id}",
- body=await async_maybe_transform(
- {
- "metadata": metadata,
- "tool_resources": tool_resources,
- },
- thread_update_params.ThreadUpdateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Thread,
- )
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def delete(
- self,
- thread_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> ThreadDeleted:
- """
- Delete a thread.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not thread_id:
- raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._delete(
- f"/threads/{thread_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=ThreadDeleted,
- )
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def create_and_run(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- stream: Optional[Literal[False]] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def create_and_run(
- self,
- *,
- assistant_id: str,
- stream: Literal[True],
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncStream[AssistantStreamEvent]:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @overload
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def create_and_run(
- self,
- *,
- assistant_id: str,
- stream: bool,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run | AsyncStream[AssistantStreamEvent]:
- """
- Create a thread and run it in one request.
- Args:
- assistant_id: The ID of the
- [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
- execute this run.
- stream: If `true`, returns a stream of events that happen during the Run as server-sent
- events, terminating when the Run enters a terminal state with a `data: [DONE]`
- message.
- instructions: Override the default system message of the assistant. This is useful for
- modifying the behavior on a per-run basis.
- max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
- run. The run will make a best effort to use only the number of completion tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- completion tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
- The run will make a best effort to use only the number of prompt tokens
- specified, across multiple turns of the run. If the run exceeds the number of
- prompt tokens specified, the run will end with status `incomplete`. See
- `incomplete_details` for more info.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
- be used to execute this run. If a value is provided here, it will override the
- model associated with the assistant. If not, the model associated with the
- assistant will be used.
- parallel_tool_calls: Whether to enable
- [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
- during tool use.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- thread: Options to create a new thread. If no thread is provided when running a request,
- an empty thread will be created.
- tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
- not call any tools and instead generates a message. `auto` is the default value
- and means the model can pick between generating a message or calling one or more
- tools. `required` means the model must call one or more tools before responding
- to the user. Specifying a particular tool like `{"type": "file_search"}` or
- `{"type": "function", "function": {"name": "my_function"}}` forces the model to
- call that tool.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: Override the tools the assistant can use for this run. This is useful for
- modifying the behavior on a per-run basis.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
- control the initial context window of the run.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- ...
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- @required_args(["assistant_id"], ["assistant_id", "stream"])
- @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
- async def create_and_run(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Run | AsyncStream[AssistantStreamEvent]:
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._post(
- "/threads/runs",
- body=await async_maybe_transform(
- {
- "assistant_id": assistant_id,
- "instructions": instructions,
- "max_completion_tokens": max_completion_tokens,
- "max_prompt_tokens": max_prompt_tokens,
- "metadata": metadata,
- "model": model,
- "parallel_tool_calls": parallel_tool_calls,
- "response_format": response_format,
- "stream": stream,
- "temperature": temperature,
- "thread": thread,
- "tool_choice": tool_choice,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- "truncation_strategy": truncation_strategy,
- },
- thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
- if stream
- else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Run,
- stream=stream or False,
- stream_cls=AsyncStream[AssistantStreamEvent],
- )
- async def create_and_run_poll(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- poll_interval_ms: int | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> Run:
- """
- A helper to create a thread, start a run and then poll for a terminal state.
- More information on Run lifecycles can be found here:
- https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
- """
- run = await self.create_and_run( # pyright: ignore[reportDeprecated]
- assistant_id=assistant_id,
- instructions=instructions,
- max_completion_tokens=max_completion_tokens,
- max_prompt_tokens=max_prompt_tokens,
- metadata=metadata,
- model=model,
- parallel_tool_calls=parallel_tool_calls,
- response_format=response_format,
- temperature=temperature,
- stream=False,
- thread=thread,
- tool_resources=tool_resources,
- tool_choice=tool_choice,
- truncation_strategy=truncation_strategy,
- top_p=top_p,
- tools=tools,
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- )
- return await self.runs.poll( # pyright: ignore[reportDeprecated]
- run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
- )
- @overload
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
- """Create a thread and stream the run back"""
- ...
- @overload
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- event_handler: AsyncAssistantEventHandlerT,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
- """Create a thread and stream the run back"""
- ...
- def create_and_run_stream(
- self,
- *,
- assistant_id: str,
- instructions: Optional[str] | Omit = omit,
- max_completion_tokens: Optional[int] | Omit = omit,
- max_prompt_tokens: Optional[int] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[str, ChatModel, None] | Omit = omit,
- parallel_tool_calls: bool | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- thread: thread_create_and_run_params.Thread | Omit = omit,
- tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
- tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
- tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
- event_handler: AsyncAssistantEventHandlerT | None = None,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> (
- AsyncAssistantStreamManager[AsyncAssistantEventHandler]
- | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
- ):
- """Create a thread and stream the run back"""
- extra_headers = {
- "OpenAI-Beta": "assistants=v2",
- "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
- "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
- **(extra_headers or {}),
- }
- request = self._post(
- "/threads/runs",
- body=maybe_transform(
- {
- "assistant_id": assistant_id,
- "instructions": instructions,
- "max_completion_tokens": max_completion_tokens,
- "max_prompt_tokens": max_prompt_tokens,
- "metadata": metadata,
- "model": model,
- "parallel_tool_calls": parallel_tool_calls,
- "response_format": response_format,
- "temperature": temperature,
- "tool_choice": tool_choice,
- "stream": True,
- "thread": thread,
- "tools": tools,
- "tool_resources": tool_resources,
- "truncation_strategy": truncation_strategy,
- "top_p": top_p,
- },
- thread_create_and_run_params.ThreadCreateAndRunParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Run,
- stream=True,
- stream_cls=AsyncStream[AssistantStreamEvent],
- )
- return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
- class ThreadsWithRawResponse:
- def __init__(self, threads: Threads) -> None:
- self._threads = threads
- self.create = ( # pyright: ignore[reportDeprecated]
- _legacy_response.to_raw_response_wrapper(
- threads.create, # pyright: ignore[reportDeprecated],
- )
- )
- self.retrieve = ( # pyright: ignore[reportDeprecated]
- _legacy_response.to_raw_response_wrapper(
- threads.retrieve, # pyright: ignore[reportDeprecated],
- )
- )
- self.update = ( # pyright: ignore[reportDeprecated]
- _legacy_response.to_raw_response_wrapper(
- threads.update, # pyright: ignore[reportDeprecated],
- )
- )
- self.delete = ( # pyright: ignore[reportDeprecated]
- _legacy_response.to_raw_response_wrapper(
- threads.delete, # pyright: ignore[reportDeprecated],
- )
- )
- self.create_and_run = ( # pyright: ignore[reportDeprecated]
- _legacy_response.to_raw_response_wrapper(
- threads.create_and_run, # pyright: ignore[reportDeprecated],
- )
- )
- @cached_property
- def runs(self) -> RunsWithRawResponse:
- return RunsWithRawResponse(self._threads.runs)
- @cached_property
- def messages(self) -> MessagesWithRawResponse:
- return MessagesWithRawResponse(self._threads.messages)
- class AsyncThreadsWithRawResponse:
- def __init__(self, threads: AsyncThreads) -> None:
- self._threads = threads
- self.create = ( # pyright: ignore[reportDeprecated]
- _legacy_response.async_to_raw_response_wrapper(
- threads.create, # pyright: ignore[reportDeprecated],
- )
- )
- self.retrieve = ( # pyright: ignore[reportDeprecated]
- _legacy_response.async_to_raw_response_wrapper(
- threads.retrieve, # pyright: ignore[reportDeprecated],
- )
- )
- self.update = ( # pyright: ignore[reportDeprecated]
- _legacy_response.async_to_raw_response_wrapper(
- threads.update, # pyright: ignore[reportDeprecated],
- )
- )
- self.delete = ( # pyright: ignore[reportDeprecated]
- _legacy_response.async_to_raw_response_wrapper(
- threads.delete, # pyright: ignore[reportDeprecated],
- )
- )
- self.create_and_run = ( # pyright: ignore[reportDeprecated]
- _legacy_response.async_to_raw_response_wrapper(
- threads.create_and_run, # pyright: ignore[reportDeprecated],
- )
- )
- @cached_property
- def runs(self) -> AsyncRunsWithRawResponse:
- return AsyncRunsWithRawResponse(self._threads.runs)
- @cached_property
- def messages(self) -> AsyncMessagesWithRawResponse:
- return AsyncMessagesWithRawResponse(self._threads.messages)
- class ThreadsWithStreamingResponse:
- def __init__(self, threads: Threads) -> None:
- self._threads = threads
- self.create = ( # pyright: ignore[reportDeprecated]
- to_streamed_response_wrapper(
- threads.create, # pyright: ignore[reportDeprecated],
- )
- )
- self.retrieve = ( # pyright: ignore[reportDeprecated]
- to_streamed_response_wrapper(
- threads.retrieve, # pyright: ignore[reportDeprecated],
- )
- )
- self.update = ( # pyright: ignore[reportDeprecated]
- to_streamed_response_wrapper(
- threads.update, # pyright: ignore[reportDeprecated],
- )
- )
- self.delete = ( # pyright: ignore[reportDeprecated]
- to_streamed_response_wrapper(
- threads.delete, # pyright: ignore[reportDeprecated],
- )
- )
- self.create_and_run = ( # pyright: ignore[reportDeprecated]
- to_streamed_response_wrapper(
- threads.create_and_run, # pyright: ignore[reportDeprecated],
- )
- )
- @cached_property
- def runs(self) -> RunsWithStreamingResponse:
- return RunsWithStreamingResponse(self._threads.runs)
- @cached_property
- def messages(self) -> MessagesWithStreamingResponse:
- return MessagesWithStreamingResponse(self._threads.messages)
- class AsyncThreadsWithStreamingResponse:
- def __init__(self, threads: AsyncThreads) -> None:
- self._threads = threads
- self.create = ( # pyright: ignore[reportDeprecated]
- async_to_streamed_response_wrapper(
- threads.create, # pyright: ignore[reportDeprecated],
- )
- )
- self.retrieve = ( # pyright: ignore[reportDeprecated]
- async_to_streamed_response_wrapper(
- threads.retrieve, # pyright: ignore[reportDeprecated],
- )
- )
- self.update = ( # pyright: ignore[reportDeprecated]
- async_to_streamed_response_wrapper(
- threads.update, # pyright: ignore[reportDeprecated],
- )
- )
- self.delete = ( # pyright: ignore[reportDeprecated]
- async_to_streamed_response_wrapper(
- threads.delete, # pyright: ignore[reportDeprecated],
- )
- )
- self.create_and_run = ( # pyright: ignore[reportDeprecated]
- async_to_streamed_response_wrapper(
- threads.create_and_run, # pyright: ignore[reportDeprecated],
- )
- )
- @cached_property
- def runs(self) -> AsyncRunsWithStreamingResponse:
- return AsyncRunsWithStreamingResponse(self._threads.runs)
- @cached_property
- def messages(self) -> AsyncMessagesWithStreamingResponse:
- return AsyncMessagesWithStreamingResponse(self._threads.messages)
|