threads.py 95 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. import typing_extensions
  4. from typing import Union, Iterable, Optional
  5. from functools import partial
  6. from typing_extensions import Literal, overload
  7. import httpx
  8. from .... import _legacy_response
  9. from .messages import (
  10. Messages,
  11. AsyncMessages,
  12. MessagesWithRawResponse,
  13. AsyncMessagesWithRawResponse,
  14. MessagesWithStreamingResponse,
  15. AsyncMessagesWithStreamingResponse,
  16. )
  17. from ...._types import NOT_GIVEN, Body, Omit, Query, Headers, NotGiven, omit, not_given
  18. from ...._utils import required_args, maybe_transform, async_maybe_transform
  19. from .runs.runs import (
  20. Runs,
  21. AsyncRuns,
  22. RunsWithRawResponse,
  23. AsyncRunsWithRawResponse,
  24. RunsWithStreamingResponse,
  25. AsyncRunsWithStreamingResponse,
  26. )
  27. from ...._compat import cached_property
  28. from ...._resource import SyncAPIResource, AsyncAPIResource
  29. from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  30. from ...._streaming import Stream, AsyncStream
  31. from ....types.beta import (
  32. thread_create_params,
  33. thread_update_params,
  34. thread_create_and_run_params,
  35. )
  36. from ...._base_client import make_request_options
  37. from ....lib.streaming import (
  38. AssistantEventHandler,
  39. AssistantEventHandlerT,
  40. AssistantStreamManager,
  41. AsyncAssistantEventHandler,
  42. AsyncAssistantEventHandlerT,
  43. AsyncAssistantStreamManager,
  44. )
  45. from ....types.beta.thread import Thread
  46. from ....types.beta.threads.run import Run
  47. from ....types.shared.chat_model import ChatModel
  48. from ....types.beta.thread_deleted import ThreadDeleted
  49. from ....types.shared_params.metadata import Metadata
  50. from ....types.beta.assistant_tool_param import AssistantToolParam
  51. from ....types.beta.assistant_stream_event import AssistantStreamEvent
  52. from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
  53. from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
  54. __all__ = ["Threads", "AsyncThreads"]
  55. class Threads(SyncAPIResource):
  56. @cached_property
  57. def runs(self) -> Runs:
  58. return Runs(self._client)
  59. @cached_property
  60. def messages(self) -> Messages:
  61. return Messages(self._client)
  62. @cached_property
  63. def with_raw_response(self) -> ThreadsWithRawResponse:
  64. """
  65. This property can be used as a prefix for any HTTP method call to return
  66. the raw response object instead of the parsed content.
  67. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  68. """
  69. return ThreadsWithRawResponse(self)
  70. @cached_property
  71. def with_streaming_response(self) -> ThreadsWithStreamingResponse:
  72. """
  73. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  74. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  75. """
  76. return ThreadsWithStreamingResponse(self)
  77. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  78. def create(
  79. self,
  80. *,
  81. messages: Iterable[thread_create_params.Message] | Omit = omit,
  82. metadata: Optional[Metadata] | Omit = omit,
  83. tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
  84. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  85. # The extra values given here take precedence over values defined on the client or passed to this method.
  86. extra_headers: Headers | None = None,
  87. extra_query: Query | None = None,
  88. extra_body: Body | None = None,
  89. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  90. ) -> Thread:
  91. """
  92. Create a thread.
  93. Args:
  94. messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
  95. start the thread with.
  96. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  97. for storing additional information about the object in a structured format, and
  98. querying for objects via API or the dashboard.
  99. Keys are strings with a maximum length of 64 characters. Values are strings with
  100. a maximum length of 512 characters.
  101. tool_resources: A set of resources that are made available to the assistant's tools in this
  102. thread. The resources are specific to the type of tool. For example, the
  103. `code_interpreter` tool requires a list of file IDs, while the `file_search`
  104. tool requires a list of vector store IDs.
  105. extra_headers: Send extra headers
  106. extra_query: Add additional query parameters to the request
  107. extra_body: Add additional JSON properties to the request
  108. timeout: Override the client-level default timeout for this request, in seconds
  109. """
  110. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  111. return self._post(
  112. "/threads",
  113. body=maybe_transform(
  114. {
  115. "messages": messages,
  116. "metadata": metadata,
  117. "tool_resources": tool_resources,
  118. },
  119. thread_create_params.ThreadCreateParams,
  120. ),
  121. options=make_request_options(
  122. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  123. ),
  124. cast_to=Thread,
  125. )
  126. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  127. def retrieve(
  128. self,
  129. thread_id: str,
  130. *,
  131. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  132. # The extra values given here take precedence over values defined on the client or passed to this method.
  133. extra_headers: Headers | None = None,
  134. extra_query: Query | None = None,
  135. extra_body: Body | None = None,
  136. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  137. ) -> Thread:
  138. """
  139. Retrieves a thread.
  140. Args:
  141. extra_headers: Send extra headers
  142. extra_query: Add additional query parameters to the request
  143. extra_body: Add additional JSON properties to the request
  144. timeout: Override the client-level default timeout for this request, in seconds
  145. """
  146. if not thread_id:
  147. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  148. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  149. return self._get(
  150. f"/threads/{thread_id}",
  151. options=make_request_options(
  152. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  153. ),
  154. cast_to=Thread,
  155. )
  156. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  157. def update(
  158. self,
  159. thread_id: str,
  160. *,
  161. metadata: Optional[Metadata] | Omit = omit,
  162. tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
  163. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  164. # The extra values given here take precedence over values defined on the client or passed to this method.
  165. extra_headers: Headers | None = None,
  166. extra_query: Query | None = None,
  167. extra_body: Body | None = None,
  168. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  169. ) -> Thread:
  170. """
  171. Modifies a thread.
  172. Args:
  173. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  174. for storing additional information about the object in a structured format, and
  175. querying for objects via API or the dashboard.
  176. Keys are strings with a maximum length of 64 characters. Values are strings with
  177. a maximum length of 512 characters.
  178. tool_resources: A set of resources that are made available to the assistant's tools in this
  179. thread. The resources are specific to the type of tool. For example, the
  180. `code_interpreter` tool requires a list of file IDs, while the `file_search`
  181. tool requires a list of vector store IDs.
  182. extra_headers: Send extra headers
  183. extra_query: Add additional query parameters to the request
  184. extra_body: Add additional JSON properties to the request
  185. timeout: Override the client-level default timeout for this request, in seconds
  186. """
  187. if not thread_id:
  188. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  189. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  190. return self._post(
  191. f"/threads/{thread_id}",
  192. body=maybe_transform(
  193. {
  194. "metadata": metadata,
  195. "tool_resources": tool_resources,
  196. },
  197. thread_update_params.ThreadUpdateParams,
  198. ),
  199. options=make_request_options(
  200. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  201. ),
  202. cast_to=Thread,
  203. )
  204. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  205. def delete(
  206. self,
  207. thread_id: str,
  208. *,
  209. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  210. # The extra values given here take precedence over values defined on the client or passed to this method.
  211. extra_headers: Headers | None = None,
  212. extra_query: Query | None = None,
  213. extra_body: Body | None = None,
  214. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  215. ) -> ThreadDeleted:
  216. """
  217. Delete a thread.
  218. Args:
  219. extra_headers: Send extra headers
  220. extra_query: Add additional query parameters to the request
  221. extra_body: Add additional JSON properties to the request
  222. timeout: Override the client-level default timeout for this request, in seconds
  223. """
  224. if not thread_id:
  225. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  226. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  227. return self._delete(
  228. f"/threads/{thread_id}",
  229. options=make_request_options(
  230. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  231. ),
  232. cast_to=ThreadDeleted,
  233. )
  234. @overload
  235. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  236. def create_and_run(
  237. self,
  238. *,
  239. assistant_id: str,
  240. instructions: Optional[str] | Omit = omit,
  241. max_completion_tokens: Optional[int] | Omit = omit,
  242. max_prompt_tokens: Optional[int] | Omit = omit,
  243. metadata: Optional[Metadata] | Omit = omit,
  244. model: Union[str, ChatModel, None] | Omit = omit,
  245. parallel_tool_calls: bool | Omit = omit,
  246. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  247. stream: Optional[Literal[False]] | Omit = omit,
  248. temperature: Optional[float] | Omit = omit,
  249. thread: thread_create_and_run_params.Thread | Omit = omit,
  250. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  251. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  252. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  253. top_p: Optional[float] | Omit = omit,
  254. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  255. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  256. # The extra values given here take precedence over values defined on the client or passed to this method.
  257. extra_headers: Headers | None = None,
  258. extra_query: Query | None = None,
  259. extra_body: Body | None = None,
  260. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  261. ) -> Run:
  262. """
  263. Create a thread and run it in one request.
  264. Args:
  265. assistant_id: The ID of the
  266. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  267. execute this run.
  268. instructions: Override the default system message of the assistant. This is useful for
  269. modifying the behavior on a per-run basis.
  270. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  271. run. The run will make a best effort to use only the number of completion tokens
  272. specified, across multiple turns of the run. If the run exceeds the number of
  273. completion tokens specified, the run will end with status `incomplete`. See
  274. `incomplete_details` for more info.
  275. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  276. The run will make a best effort to use only the number of prompt tokens
  277. specified, across multiple turns of the run. If the run exceeds the number of
  278. prompt tokens specified, the run will end with status `incomplete`. See
  279. `incomplete_details` for more info.
  280. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  281. for storing additional information about the object in a structured format, and
  282. querying for objects via API or the dashboard.
  283. Keys are strings with a maximum length of 64 characters. Values are strings with
  284. a maximum length of 512 characters.
  285. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  286. be used to execute this run. If a value is provided here, it will override the
  287. model associated with the assistant. If not, the model associated with the
  288. assistant will be used.
  289. parallel_tool_calls: Whether to enable
  290. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  291. during tool use.
  292. response_format: Specifies the format that the model must output. Compatible with
  293. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  294. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  295. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  296. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  297. Outputs which ensures the model will match your supplied JSON schema. Learn more
  298. in the
  299. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  300. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  301. message the model generates is valid JSON.
  302. **Important:** when using JSON mode, you **must** also instruct the model to
  303. produce JSON yourself via a system or user message. Without this, the model may
  304. generate an unending stream of whitespace until the generation reaches the token
  305. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  306. the message content may be partially cut off if `finish_reason="length"`, which
  307. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  308. max context length.
  309. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  310. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  311. message.
  312. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  313. make the output more random, while lower values like 0.2 will make it more
  314. focused and deterministic.
  315. thread: Options to create a new thread. If no thread is provided when running a request,
  316. an empty thread will be created.
  317. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  318. not call any tools and instead generates a message. `auto` is the default value
  319. and means the model can pick between generating a message or calling one or more
  320. tools. `required` means the model must call one or more tools before responding
  321. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  322. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  323. call that tool.
  324. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  325. specific to the type of tool. For example, the `code_interpreter` tool requires
  326. a list of file IDs, while the `file_search` tool requires a list of vector store
  327. IDs.
  328. tools: Override the tools the assistant can use for this run. This is useful for
  329. modifying the behavior on a per-run basis.
  330. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  331. model considers the results of the tokens with top_p probability mass. So 0.1
  332. means only the tokens comprising the top 10% probability mass are considered.
  333. We generally recommend altering this or temperature but not both.
  334. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  335. control the initial context window of the run.
  336. extra_headers: Send extra headers
  337. extra_query: Add additional query parameters to the request
  338. extra_body: Add additional JSON properties to the request
  339. timeout: Override the client-level default timeout for this request, in seconds
  340. """
  341. ...
  342. @overload
  343. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  344. def create_and_run(
  345. self,
  346. *,
  347. assistant_id: str,
  348. stream: Literal[True],
  349. instructions: Optional[str] | Omit = omit,
  350. max_completion_tokens: Optional[int] | Omit = omit,
  351. max_prompt_tokens: Optional[int] | Omit = omit,
  352. metadata: Optional[Metadata] | Omit = omit,
  353. model: Union[str, ChatModel, None] | Omit = omit,
  354. parallel_tool_calls: bool | Omit = omit,
  355. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  356. temperature: Optional[float] | Omit = omit,
  357. thread: thread_create_and_run_params.Thread | Omit = omit,
  358. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  359. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  360. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  361. top_p: Optional[float] | Omit = omit,
  362. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  363. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  364. # The extra values given here take precedence over values defined on the client or passed to this method.
  365. extra_headers: Headers | None = None,
  366. extra_query: Query | None = None,
  367. extra_body: Body | None = None,
  368. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  369. ) -> Stream[AssistantStreamEvent]:
  370. """
  371. Create a thread and run it in one request.
  372. Args:
  373. assistant_id: The ID of the
  374. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  375. execute this run.
  376. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  377. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  378. message.
  379. instructions: Override the default system message of the assistant. This is useful for
  380. modifying the behavior on a per-run basis.
  381. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  382. run. The run will make a best effort to use only the number of completion tokens
  383. specified, across multiple turns of the run. If the run exceeds the number of
  384. completion tokens specified, the run will end with status `incomplete`. See
  385. `incomplete_details` for more info.
  386. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  387. The run will make a best effort to use only the number of prompt tokens
  388. specified, across multiple turns of the run. If the run exceeds the number of
  389. prompt tokens specified, the run will end with status `incomplete`. See
  390. `incomplete_details` for more info.
  391. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  392. for storing additional information about the object in a structured format, and
  393. querying for objects via API or the dashboard.
  394. Keys are strings with a maximum length of 64 characters. Values are strings with
  395. a maximum length of 512 characters.
  396. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  397. be used to execute this run. If a value is provided here, it will override the
  398. model associated with the assistant. If not, the model associated with the
  399. assistant will be used.
  400. parallel_tool_calls: Whether to enable
  401. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  402. during tool use.
  403. response_format: Specifies the format that the model must output. Compatible with
  404. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  405. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  406. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  407. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  408. Outputs which ensures the model will match your supplied JSON schema. Learn more
  409. in the
  410. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  411. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  412. message the model generates is valid JSON.
  413. **Important:** when using JSON mode, you **must** also instruct the model to
  414. produce JSON yourself via a system or user message. Without this, the model may
  415. generate an unending stream of whitespace until the generation reaches the token
  416. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  417. the message content may be partially cut off if `finish_reason="length"`, which
  418. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  419. max context length.
  420. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  421. make the output more random, while lower values like 0.2 will make it more
  422. focused and deterministic.
  423. thread: Options to create a new thread. If no thread is provided when running a request,
  424. an empty thread will be created.
  425. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  426. not call any tools and instead generates a message. `auto` is the default value
  427. and means the model can pick between generating a message or calling one or more
  428. tools. `required` means the model must call one or more tools before responding
  429. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  430. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  431. call that tool.
  432. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  433. specific to the type of tool. For example, the `code_interpreter` tool requires
  434. a list of file IDs, while the `file_search` tool requires a list of vector store
  435. IDs.
  436. tools: Override the tools the assistant can use for this run. This is useful for
  437. modifying the behavior on a per-run basis.
  438. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  439. model considers the results of the tokens with top_p probability mass. So 0.1
  440. means only the tokens comprising the top 10% probability mass are considered.
  441. We generally recommend altering this or temperature but not both.
  442. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  443. control the initial context window of the run.
  444. extra_headers: Send extra headers
  445. extra_query: Add additional query parameters to the request
  446. extra_body: Add additional JSON properties to the request
  447. timeout: Override the client-level default timeout for this request, in seconds
  448. """
  449. ...
  450. @overload
  451. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  452. def create_and_run(
  453. self,
  454. *,
  455. assistant_id: str,
  456. stream: bool,
  457. instructions: Optional[str] | Omit = omit,
  458. max_completion_tokens: Optional[int] | Omit = omit,
  459. max_prompt_tokens: Optional[int] | Omit = omit,
  460. metadata: Optional[Metadata] | Omit = omit,
  461. model: Union[str, ChatModel, None] | Omit = omit,
  462. parallel_tool_calls: bool | Omit = omit,
  463. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  464. temperature: Optional[float] | Omit = omit,
  465. thread: thread_create_and_run_params.Thread | Omit = omit,
  466. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  467. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  468. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  469. top_p: Optional[float] | Omit = omit,
  470. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  471. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  472. # The extra values given here take precedence over values defined on the client or passed to this method.
  473. extra_headers: Headers | None = None,
  474. extra_query: Query | None = None,
  475. extra_body: Body | None = None,
  476. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  477. ) -> Run | Stream[AssistantStreamEvent]:
  478. """
  479. Create a thread and run it in one request.
  480. Args:
  481. assistant_id: The ID of the
  482. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  483. execute this run.
  484. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  485. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  486. message.
  487. instructions: Override the default system message of the assistant. This is useful for
  488. modifying the behavior on a per-run basis.
  489. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  490. run. The run will make a best effort to use only the number of completion tokens
  491. specified, across multiple turns of the run. If the run exceeds the number of
  492. completion tokens specified, the run will end with status `incomplete`. See
  493. `incomplete_details` for more info.
  494. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  495. The run will make a best effort to use only the number of prompt tokens
  496. specified, across multiple turns of the run. If the run exceeds the number of
  497. prompt tokens specified, the run will end with status `incomplete`. See
  498. `incomplete_details` for more info.
  499. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  500. for storing additional information about the object in a structured format, and
  501. querying for objects via API or the dashboard.
  502. Keys are strings with a maximum length of 64 characters. Values are strings with
  503. a maximum length of 512 characters.
  504. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  505. be used to execute this run. If a value is provided here, it will override the
  506. model associated with the assistant. If not, the model associated with the
  507. assistant will be used.
  508. parallel_tool_calls: Whether to enable
  509. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  510. during tool use.
  511. response_format: Specifies the format that the model must output. Compatible with
  512. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  513. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  514. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  515. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  516. Outputs which ensures the model will match your supplied JSON schema. Learn more
  517. in the
  518. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  519. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  520. message the model generates is valid JSON.
  521. **Important:** when using JSON mode, you **must** also instruct the model to
  522. produce JSON yourself via a system or user message. Without this, the model may
  523. generate an unending stream of whitespace until the generation reaches the token
  524. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  525. the message content may be partially cut off if `finish_reason="length"`, which
  526. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  527. max context length.
  528. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  529. make the output more random, while lower values like 0.2 will make it more
  530. focused and deterministic.
  531. thread: Options to create a new thread. If no thread is provided when running a request,
  532. an empty thread will be created.
  533. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  534. not call any tools and instead generates a message. `auto` is the default value
  535. and means the model can pick between generating a message or calling one or more
  536. tools. `required` means the model must call one or more tools before responding
  537. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  538. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  539. call that tool.
  540. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  541. specific to the type of tool. For example, the `code_interpreter` tool requires
  542. a list of file IDs, while the `file_search` tool requires a list of vector store
  543. IDs.
  544. tools: Override the tools the assistant can use for this run. This is useful for
  545. modifying the behavior on a per-run basis.
  546. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  547. model considers the results of the tokens with top_p probability mass. So 0.1
  548. means only the tokens comprising the top 10% probability mass are considered.
  549. We generally recommend altering this or temperature but not both.
  550. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  551. control the initial context window of the run.
  552. extra_headers: Send extra headers
  553. extra_query: Add additional query parameters to the request
  554. extra_body: Add additional JSON properties to the request
  555. timeout: Override the client-level default timeout for this request, in seconds
  556. """
  557. ...
  558. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  559. @required_args(["assistant_id"], ["assistant_id", "stream"])
  560. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  561. def create_and_run(
  562. self,
  563. *,
  564. assistant_id: str,
  565. instructions: Optional[str] | Omit = omit,
  566. max_completion_tokens: Optional[int] | Omit = omit,
  567. max_prompt_tokens: Optional[int] | Omit = omit,
  568. metadata: Optional[Metadata] | Omit = omit,
  569. model: Union[str, ChatModel, None] | Omit = omit,
  570. parallel_tool_calls: bool | Omit = omit,
  571. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  572. stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
  573. temperature: Optional[float] | Omit = omit,
  574. thread: thread_create_and_run_params.Thread | Omit = omit,
  575. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  576. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  577. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  578. top_p: Optional[float] | Omit = omit,
  579. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  580. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  581. # The extra values given here take precedence over values defined on the client or passed to this method.
  582. extra_headers: Headers | None = None,
  583. extra_query: Query | None = None,
  584. extra_body: Body | None = None,
  585. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  586. ) -> Run | Stream[AssistantStreamEvent]:
  587. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  588. return self._post(
  589. "/threads/runs",
  590. body=maybe_transform(
  591. {
  592. "assistant_id": assistant_id,
  593. "instructions": instructions,
  594. "max_completion_tokens": max_completion_tokens,
  595. "max_prompt_tokens": max_prompt_tokens,
  596. "metadata": metadata,
  597. "model": model,
  598. "parallel_tool_calls": parallel_tool_calls,
  599. "response_format": response_format,
  600. "stream": stream,
  601. "temperature": temperature,
  602. "thread": thread,
  603. "tool_choice": tool_choice,
  604. "tool_resources": tool_resources,
  605. "tools": tools,
  606. "top_p": top_p,
  607. "truncation_strategy": truncation_strategy,
  608. },
  609. thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
  610. if stream
  611. else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
  612. ),
  613. options=make_request_options(
  614. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  615. ),
  616. cast_to=Run,
  617. stream=stream or False,
  618. stream_cls=Stream[AssistantStreamEvent],
  619. )
  620. def create_and_run_poll(
  621. self,
  622. *,
  623. assistant_id: str,
  624. instructions: Optional[str] | Omit = omit,
  625. max_completion_tokens: Optional[int] | Omit = omit,
  626. max_prompt_tokens: Optional[int] | Omit = omit,
  627. metadata: Optional[Metadata] | Omit = omit,
  628. model: Union[str, ChatModel, None] | Omit = omit,
  629. parallel_tool_calls: bool | Omit = omit,
  630. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  631. temperature: Optional[float] | Omit = omit,
  632. thread: thread_create_and_run_params.Thread | Omit = omit,
  633. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  634. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  635. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  636. top_p: Optional[float] | Omit = omit,
  637. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  638. poll_interval_ms: int | Omit = omit,
  639. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  640. # The extra values given here take precedence over values defined on the client or passed to this method.
  641. extra_headers: Headers | None = None,
  642. extra_query: Query | None = None,
  643. extra_body: Body | None = None,
  644. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  645. ) -> Run:
  646. """
  647. A helper to create a thread, start a run and then poll for a terminal state.
  648. More information on Run lifecycles can be found here:
  649. https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
  650. """
  651. run = self.create_and_run( # pyright: ignore[reportDeprecated]
  652. assistant_id=assistant_id,
  653. instructions=instructions,
  654. max_completion_tokens=max_completion_tokens,
  655. max_prompt_tokens=max_prompt_tokens,
  656. metadata=metadata,
  657. model=model,
  658. parallel_tool_calls=parallel_tool_calls,
  659. response_format=response_format,
  660. temperature=temperature,
  661. stream=False,
  662. thread=thread,
  663. tool_resources=tool_resources,
  664. tool_choice=tool_choice,
  665. truncation_strategy=truncation_strategy,
  666. top_p=top_p,
  667. tools=tools,
  668. extra_headers=extra_headers,
  669. extra_query=extra_query,
  670. extra_body=extra_body,
  671. timeout=timeout,
  672. )
  673. return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms) # pyright: ignore[reportDeprecated]
  674. @overload
  675. def create_and_run_stream(
  676. self,
  677. *,
  678. assistant_id: str,
  679. instructions: Optional[str] | Omit = omit,
  680. max_completion_tokens: Optional[int] | Omit = omit,
  681. max_prompt_tokens: Optional[int] | Omit = omit,
  682. metadata: Optional[Metadata] | Omit = omit,
  683. model: Union[str, ChatModel, None] | Omit = omit,
  684. parallel_tool_calls: bool | Omit = omit,
  685. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  686. temperature: Optional[float] | Omit = omit,
  687. thread: thread_create_and_run_params.Thread | Omit = omit,
  688. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  689. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  690. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  691. top_p: Optional[float] | Omit = omit,
  692. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  693. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  694. # The extra values given here take precedence over values defined on the client or passed to this method.
  695. extra_headers: Headers | None = None,
  696. extra_query: Query | None = None,
  697. extra_body: Body | None = None,
  698. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  699. ) -> AssistantStreamManager[AssistantEventHandler]:
  700. """Create a thread and stream the run back"""
  701. ...
  702. @overload
  703. def create_and_run_stream(
  704. self,
  705. *,
  706. assistant_id: str,
  707. instructions: Optional[str] | Omit = omit,
  708. max_completion_tokens: Optional[int] | Omit = omit,
  709. max_prompt_tokens: Optional[int] | Omit = omit,
  710. metadata: Optional[Metadata] | Omit = omit,
  711. model: Union[str, ChatModel, None] | Omit = omit,
  712. parallel_tool_calls: bool | Omit = omit,
  713. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  714. temperature: Optional[float] | Omit = omit,
  715. thread: thread_create_and_run_params.Thread | Omit = omit,
  716. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  717. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  718. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  719. top_p: Optional[float] | Omit = omit,
  720. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  721. event_handler: AssistantEventHandlerT,
  722. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  723. # The extra values given here take precedence over values defined on the client or passed to this method.
  724. extra_headers: Headers | None = None,
  725. extra_query: Query | None = None,
  726. extra_body: Body | None = None,
  727. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  728. ) -> AssistantStreamManager[AssistantEventHandlerT]:
  729. """Create a thread and stream the run back"""
  730. ...
  731. def create_and_run_stream(
  732. self,
  733. *,
  734. assistant_id: str,
  735. instructions: Optional[str] | Omit = omit,
  736. max_completion_tokens: Optional[int] | Omit = omit,
  737. max_prompt_tokens: Optional[int] | Omit = omit,
  738. metadata: Optional[Metadata] | Omit = omit,
  739. model: Union[str, ChatModel, None] | Omit = omit,
  740. parallel_tool_calls: bool | Omit = omit,
  741. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  742. temperature: Optional[float] | Omit = omit,
  743. thread: thread_create_and_run_params.Thread | Omit = omit,
  744. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  745. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  746. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  747. top_p: Optional[float] | Omit = omit,
  748. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  749. event_handler: AssistantEventHandlerT | None = None,
  750. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  751. # The extra values given here take precedence over values defined on the client or passed to this method.
  752. extra_headers: Headers | None = None,
  753. extra_query: Query | None = None,
  754. extra_body: Body | None = None,
  755. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  756. ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
  757. """Create a thread and stream the run back"""
  758. extra_headers = {
  759. "OpenAI-Beta": "assistants=v2",
  760. "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
  761. "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
  762. **(extra_headers or {}),
  763. }
  764. make_request = partial(
  765. self._post,
  766. "/threads/runs",
  767. body=maybe_transform(
  768. {
  769. "assistant_id": assistant_id,
  770. "instructions": instructions,
  771. "max_completion_tokens": max_completion_tokens,
  772. "max_prompt_tokens": max_prompt_tokens,
  773. "metadata": metadata,
  774. "model": model,
  775. "parallel_tool_calls": parallel_tool_calls,
  776. "response_format": response_format,
  777. "temperature": temperature,
  778. "tool_choice": tool_choice,
  779. "stream": True,
  780. "thread": thread,
  781. "tools": tools,
  782. "tool_resources": tool_resources,
  783. "truncation_strategy": truncation_strategy,
  784. "top_p": top_p,
  785. },
  786. thread_create_and_run_params.ThreadCreateAndRunParams,
  787. ),
  788. options=make_request_options(
  789. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  790. ),
  791. cast_to=Run,
  792. stream=True,
  793. stream_cls=Stream[AssistantStreamEvent],
  794. )
  795. return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
  796. class AsyncThreads(AsyncAPIResource):
  797. @cached_property
  798. def runs(self) -> AsyncRuns:
  799. return AsyncRuns(self._client)
  800. @cached_property
  801. def messages(self) -> AsyncMessages:
  802. return AsyncMessages(self._client)
  803. @cached_property
  804. def with_raw_response(self) -> AsyncThreadsWithRawResponse:
  805. """
  806. This property can be used as a prefix for any HTTP method call to return
  807. the raw response object instead of the parsed content.
  808. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  809. """
  810. return AsyncThreadsWithRawResponse(self)
  811. @cached_property
  812. def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
  813. """
  814. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  815. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  816. """
  817. return AsyncThreadsWithStreamingResponse(self)
  818. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  819. async def create(
  820. self,
  821. *,
  822. messages: Iterable[thread_create_params.Message] | Omit = omit,
  823. metadata: Optional[Metadata] | Omit = omit,
  824. tool_resources: Optional[thread_create_params.ToolResources] | Omit = omit,
  825. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  826. # The extra values given here take precedence over values defined on the client or passed to this method.
  827. extra_headers: Headers | None = None,
  828. extra_query: Query | None = None,
  829. extra_body: Body | None = None,
  830. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  831. ) -> Thread:
  832. """
  833. Create a thread.
  834. Args:
  835. messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
  836. start the thread with.
  837. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  838. for storing additional information about the object in a structured format, and
  839. querying for objects via API or the dashboard.
  840. Keys are strings with a maximum length of 64 characters. Values are strings with
  841. a maximum length of 512 characters.
  842. tool_resources: A set of resources that are made available to the assistant's tools in this
  843. thread. The resources are specific to the type of tool. For example, the
  844. `code_interpreter` tool requires a list of file IDs, while the `file_search`
  845. tool requires a list of vector store IDs.
  846. extra_headers: Send extra headers
  847. extra_query: Add additional query parameters to the request
  848. extra_body: Add additional JSON properties to the request
  849. timeout: Override the client-level default timeout for this request, in seconds
  850. """
  851. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  852. return await self._post(
  853. "/threads",
  854. body=await async_maybe_transform(
  855. {
  856. "messages": messages,
  857. "metadata": metadata,
  858. "tool_resources": tool_resources,
  859. },
  860. thread_create_params.ThreadCreateParams,
  861. ),
  862. options=make_request_options(
  863. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  864. ),
  865. cast_to=Thread,
  866. )
  867. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  868. async def retrieve(
  869. self,
  870. thread_id: str,
  871. *,
  872. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  873. # The extra values given here take precedence over values defined on the client or passed to this method.
  874. extra_headers: Headers | None = None,
  875. extra_query: Query | None = None,
  876. extra_body: Body | None = None,
  877. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  878. ) -> Thread:
  879. """
  880. Retrieves a thread.
  881. Args:
  882. extra_headers: Send extra headers
  883. extra_query: Add additional query parameters to the request
  884. extra_body: Add additional JSON properties to the request
  885. timeout: Override the client-level default timeout for this request, in seconds
  886. """
  887. if not thread_id:
  888. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  889. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  890. return await self._get(
  891. f"/threads/{thread_id}",
  892. options=make_request_options(
  893. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  894. ),
  895. cast_to=Thread,
  896. )
  897. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  898. async def update(
  899. self,
  900. thread_id: str,
  901. *,
  902. metadata: Optional[Metadata] | Omit = omit,
  903. tool_resources: Optional[thread_update_params.ToolResources] | Omit = omit,
  904. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  905. # The extra values given here take precedence over values defined on the client or passed to this method.
  906. extra_headers: Headers | None = None,
  907. extra_query: Query | None = None,
  908. extra_body: Body | None = None,
  909. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  910. ) -> Thread:
  911. """
  912. Modifies a thread.
  913. Args:
  914. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  915. for storing additional information about the object in a structured format, and
  916. querying for objects via API or the dashboard.
  917. Keys are strings with a maximum length of 64 characters. Values are strings with
  918. a maximum length of 512 characters.
  919. tool_resources: A set of resources that are made available to the assistant's tools in this
  920. thread. The resources are specific to the type of tool. For example, the
  921. `code_interpreter` tool requires a list of file IDs, while the `file_search`
  922. tool requires a list of vector store IDs.
  923. extra_headers: Send extra headers
  924. extra_query: Add additional query parameters to the request
  925. extra_body: Add additional JSON properties to the request
  926. timeout: Override the client-level default timeout for this request, in seconds
  927. """
  928. if not thread_id:
  929. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  930. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  931. return await self._post(
  932. f"/threads/{thread_id}",
  933. body=await async_maybe_transform(
  934. {
  935. "metadata": metadata,
  936. "tool_resources": tool_resources,
  937. },
  938. thread_update_params.ThreadUpdateParams,
  939. ),
  940. options=make_request_options(
  941. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  942. ),
  943. cast_to=Thread,
  944. )
  945. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  946. async def delete(
  947. self,
  948. thread_id: str,
  949. *,
  950. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  951. # The extra values given here take precedence over values defined on the client or passed to this method.
  952. extra_headers: Headers | None = None,
  953. extra_query: Query | None = None,
  954. extra_body: Body | None = None,
  955. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  956. ) -> ThreadDeleted:
  957. """
  958. Delete a thread.
  959. Args:
  960. extra_headers: Send extra headers
  961. extra_query: Add additional query parameters to the request
  962. extra_body: Add additional JSON properties to the request
  963. timeout: Override the client-level default timeout for this request, in seconds
  964. """
  965. if not thread_id:
  966. raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
  967. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  968. return await self._delete(
  969. f"/threads/{thread_id}",
  970. options=make_request_options(
  971. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  972. ),
  973. cast_to=ThreadDeleted,
  974. )
  975. @overload
  976. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  977. async def create_and_run(
  978. self,
  979. *,
  980. assistant_id: str,
  981. instructions: Optional[str] | Omit = omit,
  982. max_completion_tokens: Optional[int] | Omit = omit,
  983. max_prompt_tokens: Optional[int] | Omit = omit,
  984. metadata: Optional[Metadata] | Omit = omit,
  985. model: Union[str, ChatModel, None] | Omit = omit,
  986. parallel_tool_calls: bool | Omit = omit,
  987. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  988. stream: Optional[Literal[False]] | Omit = omit,
  989. temperature: Optional[float] | Omit = omit,
  990. thread: thread_create_and_run_params.Thread | Omit = omit,
  991. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  992. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  993. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  994. top_p: Optional[float] | Omit = omit,
  995. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  996. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  997. # The extra values given here take precedence over values defined on the client or passed to this method.
  998. extra_headers: Headers | None = None,
  999. extra_query: Query | None = None,
  1000. extra_body: Body | None = None,
  1001. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  1002. ) -> Run:
  1003. """
  1004. Create a thread and run it in one request.
  1005. Args:
  1006. assistant_id: The ID of the
  1007. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  1008. execute this run.
  1009. instructions: Override the default system message of the assistant. This is useful for
  1010. modifying the behavior on a per-run basis.
  1011. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  1012. run. The run will make a best effort to use only the number of completion tokens
  1013. specified, across multiple turns of the run. If the run exceeds the number of
  1014. completion tokens specified, the run will end with status `incomplete`. See
  1015. `incomplete_details` for more info.
  1016. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  1017. The run will make a best effort to use only the number of prompt tokens
  1018. specified, across multiple turns of the run. If the run exceeds the number of
  1019. prompt tokens specified, the run will end with status `incomplete`. See
  1020. `incomplete_details` for more info.
  1021. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  1022. for storing additional information about the object in a structured format, and
  1023. querying for objects via API or the dashboard.
  1024. Keys are strings with a maximum length of 64 characters. Values are strings with
  1025. a maximum length of 512 characters.
  1026. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  1027. be used to execute this run. If a value is provided here, it will override the
  1028. model associated with the assistant. If not, the model associated with the
  1029. assistant will be used.
  1030. parallel_tool_calls: Whether to enable
  1031. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  1032. during tool use.
  1033. response_format: Specifies the format that the model must output. Compatible with
  1034. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  1035. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  1036. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  1037. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  1038. Outputs which ensures the model will match your supplied JSON schema. Learn more
  1039. in the
  1040. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  1041. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  1042. message the model generates is valid JSON.
  1043. **Important:** when using JSON mode, you **must** also instruct the model to
  1044. produce JSON yourself via a system or user message. Without this, the model may
  1045. generate an unending stream of whitespace until the generation reaches the token
  1046. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  1047. the message content may be partially cut off if `finish_reason="length"`, which
  1048. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  1049. max context length.
  1050. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  1051. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  1052. message.
  1053. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  1054. make the output more random, while lower values like 0.2 will make it more
  1055. focused and deterministic.
  1056. thread: Options to create a new thread. If no thread is provided when running a request,
  1057. an empty thread will be created.
  1058. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  1059. not call any tools and instead generates a message. `auto` is the default value
  1060. and means the model can pick between generating a message or calling one or more
  1061. tools. `required` means the model must call one or more tools before responding
  1062. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  1063. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  1064. call that tool.
  1065. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  1066. specific to the type of tool. For example, the `code_interpreter` tool requires
  1067. a list of file IDs, while the `file_search` tool requires a list of vector store
  1068. IDs.
  1069. tools: Override the tools the assistant can use for this run. This is useful for
  1070. modifying the behavior on a per-run basis.
  1071. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  1072. model considers the results of the tokens with top_p probability mass. So 0.1
  1073. means only the tokens comprising the top 10% probability mass are considered.
  1074. We generally recommend altering this or temperature but not both.
  1075. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  1076. control the initial context window of the run.
  1077. extra_headers: Send extra headers
  1078. extra_query: Add additional query parameters to the request
  1079. extra_body: Add additional JSON properties to the request
  1080. timeout: Override the client-level default timeout for this request, in seconds
  1081. """
  1082. ...
  1083. @overload
  1084. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  1085. async def create_and_run(
  1086. self,
  1087. *,
  1088. assistant_id: str,
  1089. stream: Literal[True],
  1090. instructions: Optional[str] | Omit = omit,
  1091. max_completion_tokens: Optional[int] | Omit = omit,
  1092. max_prompt_tokens: Optional[int] | Omit = omit,
  1093. metadata: Optional[Metadata] | Omit = omit,
  1094. model: Union[str, ChatModel, None] | Omit = omit,
  1095. parallel_tool_calls: bool | Omit = omit,
  1096. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1097. temperature: Optional[float] | Omit = omit,
  1098. thread: thread_create_and_run_params.Thread | Omit = omit,
  1099. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1100. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1101. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1102. top_p: Optional[float] | Omit = omit,
  1103. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1104. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1105. # The extra values given here take precedence over values defined on the client or passed to this method.
  1106. extra_headers: Headers | None = None,
  1107. extra_query: Query | None = None,
  1108. extra_body: Body | None = None,
  1109. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  1110. ) -> AsyncStream[AssistantStreamEvent]:
  1111. """
  1112. Create a thread and run it in one request.
  1113. Args:
  1114. assistant_id: The ID of the
  1115. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  1116. execute this run.
  1117. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  1118. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  1119. message.
  1120. instructions: Override the default system message of the assistant. This is useful for
  1121. modifying the behavior on a per-run basis.
  1122. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  1123. run. The run will make a best effort to use only the number of completion tokens
  1124. specified, across multiple turns of the run. If the run exceeds the number of
  1125. completion tokens specified, the run will end with status `incomplete`. See
  1126. `incomplete_details` for more info.
  1127. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  1128. The run will make a best effort to use only the number of prompt tokens
  1129. specified, across multiple turns of the run. If the run exceeds the number of
  1130. prompt tokens specified, the run will end with status `incomplete`. See
  1131. `incomplete_details` for more info.
  1132. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  1133. for storing additional information about the object in a structured format, and
  1134. querying for objects via API or the dashboard.
  1135. Keys are strings with a maximum length of 64 characters. Values are strings with
  1136. a maximum length of 512 characters.
  1137. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  1138. be used to execute this run. If a value is provided here, it will override the
  1139. model associated with the assistant. If not, the model associated with the
  1140. assistant will be used.
  1141. parallel_tool_calls: Whether to enable
  1142. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  1143. during tool use.
  1144. response_format: Specifies the format that the model must output. Compatible with
  1145. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  1146. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  1147. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  1148. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  1149. Outputs which ensures the model will match your supplied JSON schema. Learn more
  1150. in the
  1151. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  1152. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  1153. message the model generates is valid JSON.
  1154. **Important:** when using JSON mode, you **must** also instruct the model to
  1155. produce JSON yourself via a system or user message. Without this, the model may
  1156. generate an unending stream of whitespace until the generation reaches the token
  1157. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  1158. the message content may be partially cut off if `finish_reason="length"`, which
  1159. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  1160. max context length.
  1161. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  1162. make the output more random, while lower values like 0.2 will make it more
  1163. focused and deterministic.
  1164. thread: Options to create a new thread. If no thread is provided when running a request,
  1165. an empty thread will be created.
  1166. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  1167. not call any tools and instead generates a message. `auto` is the default value
  1168. and means the model can pick between generating a message or calling one or more
  1169. tools. `required` means the model must call one or more tools before responding
  1170. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  1171. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  1172. call that tool.
  1173. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  1174. specific to the type of tool. For example, the `code_interpreter` tool requires
  1175. a list of file IDs, while the `file_search` tool requires a list of vector store
  1176. IDs.
  1177. tools: Override the tools the assistant can use for this run. This is useful for
  1178. modifying the behavior on a per-run basis.
  1179. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  1180. model considers the results of the tokens with top_p probability mass. So 0.1
  1181. means only the tokens comprising the top 10% probability mass are considered.
  1182. We generally recommend altering this or temperature but not both.
  1183. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  1184. control the initial context window of the run.
  1185. extra_headers: Send extra headers
  1186. extra_query: Add additional query parameters to the request
  1187. extra_body: Add additional JSON properties to the request
  1188. timeout: Override the client-level default timeout for this request, in seconds
  1189. """
  1190. ...
  1191. @overload
  1192. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  1193. async def create_and_run(
  1194. self,
  1195. *,
  1196. assistant_id: str,
  1197. stream: bool,
  1198. instructions: Optional[str] | Omit = omit,
  1199. max_completion_tokens: Optional[int] | Omit = omit,
  1200. max_prompt_tokens: Optional[int] | Omit = omit,
  1201. metadata: Optional[Metadata] | Omit = omit,
  1202. model: Union[str, ChatModel, None] | Omit = omit,
  1203. parallel_tool_calls: bool | Omit = omit,
  1204. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1205. temperature: Optional[float] | Omit = omit,
  1206. thread: thread_create_and_run_params.Thread | Omit = omit,
  1207. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1208. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1209. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1210. top_p: Optional[float] | Omit = omit,
  1211. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1212. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1213. # The extra values given here take precedence over values defined on the client or passed to this method.
  1214. extra_headers: Headers | None = None,
  1215. extra_query: Query | None = None,
  1216. extra_body: Body | None = None,
  1217. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  1218. ) -> Run | AsyncStream[AssistantStreamEvent]:
  1219. """
  1220. Create a thread and run it in one request.
  1221. Args:
  1222. assistant_id: The ID of the
  1223. [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
  1224. execute this run.
  1225. stream: If `true`, returns a stream of events that happen during the Run as server-sent
  1226. events, terminating when the Run enters a terminal state with a `data: [DONE]`
  1227. message.
  1228. instructions: Override the default system message of the assistant. This is useful for
  1229. modifying the behavior on a per-run basis.
  1230. max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
  1231. run. The run will make a best effort to use only the number of completion tokens
  1232. specified, across multiple turns of the run. If the run exceeds the number of
  1233. completion tokens specified, the run will end with status `incomplete`. See
  1234. `incomplete_details` for more info.
  1235. max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
  1236. The run will make a best effort to use only the number of prompt tokens
  1237. specified, across multiple turns of the run. If the run exceeds the number of
  1238. prompt tokens specified, the run will end with status `incomplete`. See
  1239. `incomplete_details` for more info.
  1240. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  1241. for storing additional information about the object in a structured format, and
  1242. querying for objects via API or the dashboard.
  1243. Keys are strings with a maximum length of 64 characters. Values are strings with
  1244. a maximum length of 512 characters.
  1245. model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
  1246. be used to execute this run. If a value is provided here, it will override the
  1247. model associated with the assistant. If not, the model associated with the
  1248. assistant will be used.
  1249. parallel_tool_calls: Whether to enable
  1250. [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
  1251. during tool use.
  1252. response_format: Specifies the format that the model must output. Compatible with
  1253. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  1254. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  1255. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  1256. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  1257. Outputs which ensures the model will match your supplied JSON schema. Learn more
  1258. in the
  1259. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  1260. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  1261. message the model generates is valid JSON.
  1262. **Important:** when using JSON mode, you **must** also instruct the model to
  1263. produce JSON yourself via a system or user message. Without this, the model may
  1264. generate an unending stream of whitespace until the generation reaches the token
  1265. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  1266. the message content may be partially cut off if `finish_reason="length"`, which
  1267. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  1268. max context length.
  1269. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  1270. make the output more random, while lower values like 0.2 will make it more
  1271. focused and deterministic.
  1272. thread: Options to create a new thread. If no thread is provided when running a request,
  1273. an empty thread will be created.
  1274. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
  1275. not call any tools and instead generates a message. `auto` is the default value
  1276. and means the model can pick between generating a message or calling one or more
  1277. tools. `required` means the model must call one or more tools before responding
  1278. to the user. Specifying a particular tool like `{"type": "file_search"}` or
  1279. `{"type": "function", "function": {"name": "my_function"}}` forces the model to
  1280. call that tool.
  1281. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  1282. specific to the type of tool. For example, the `code_interpreter` tool requires
  1283. a list of file IDs, while the `file_search` tool requires a list of vector store
  1284. IDs.
  1285. tools: Override the tools the assistant can use for this run. This is useful for
  1286. modifying the behavior on a per-run basis.
  1287. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  1288. model considers the results of the tokens with top_p probability mass. So 0.1
  1289. means only the tokens comprising the top 10% probability mass are considered.
  1290. We generally recommend altering this or temperature but not both.
  1291. truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
  1292. control the initial context window of the run.
  1293. extra_headers: Send extra headers
  1294. extra_query: Add additional query parameters to the request
  1295. extra_body: Add additional JSON properties to the request
  1296. timeout: Override the client-level default timeout for this request, in seconds
  1297. """
  1298. ...
  1299. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  1300. @required_args(["assistant_id"], ["assistant_id", "stream"])
  1301. @typing_extensions.deprecated("The Assistants API is deprecated in favor of the Responses API")
  1302. async def create_and_run(
  1303. self,
  1304. *,
  1305. assistant_id: str,
  1306. instructions: Optional[str] | Omit = omit,
  1307. max_completion_tokens: Optional[int] | Omit = omit,
  1308. max_prompt_tokens: Optional[int] | Omit = omit,
  1309. metadata: Optional[Metadata] | Omit = omit,
  1310. model: Union[str, ChatModel, None] | Omit = omit,
  1311. parallel_tool_calls: bool | Omit = omit,
  1312. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1313. stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
  1314. temperature: Optional[float] | Omit = omit,
  1315. thread: thread_create_and_run_params.Thread | Omit = omit,
  1316. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1317. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1318. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1319. top_p: Optional[float] | Omit = omit,
  1320. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1321. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1322. # The extra values given here take precedence over values defined on the client or passed to this method.
  1323. extra_headers: Headers | None = None,
  1324. extra_query: Query | None = None,
  1325. extra_body: Body | None = None,
  1326. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  1327. ) -> Run | AsyncStream[AssistantStreamEvent]:
  1328. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  1329. return await self._post(
  1330. "/threads/runs",
  1331. body=await async_maybe_transform(
  1332. {
  1333. "assistant_id": assistant_id,
  1334. "instructions": instructions,
  1335. "max_completion_tokens": max_completion_tokens,
  1336. "max_prompt_tokens": max_prompt_tokens,
  1337. "metadata": metadata,
  1338. "model": model,
  1339. "parallel_tool_calls": parallel_tool_calls,
  1340. "response_format": response_format,
  1341. "stream": stream,
  1342. "temperature": temperature,
  1343. "thread": thread,
  1344. "tool_choice": tool_choice,
  1345. "tool_resources": tool_resources,
  1346. "tools": tools,
  1347. "top_p": top_p,
  1348. "truncation_strategy": truncation_strategy,
  1349. },
  1350. thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
  1351. if stream
  1352. else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
  1353. ),
  1354. options=make_request_options(
  1355. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  1356. ),
  1357. cast_to=Run,
  1358. stream=stream or False,
  1359. stream_cls=AsyncStream[AssistantStreamEvent],
  1360. )
  1361. async def create_and_run_poll(
  1362. self,
  1363. *,
  1364. assistant_id: str,
  1365. instructions: Optional[str] | Omit = omit,
  1366. max_completion_tokens: Optional[int] | Omit = omit,
  1367. max_prompt_tokens: Optional[int] | Omit = omit,
  1368. metadata: Optional[Metadata] | Omit = omit,
  1369. model: Union[str, ChatModel, None] | Omit = omit,
  1370. parallel_tool_calls: bool | Omit = omit,
  1371. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1372. temperature: Optional[float] | Omit = omit,
  1373. thread: thread_create_and_run_params.Thread | Omit = omit,
  1374. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1375. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1376. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1377. top_p: Optional[float] | Omit = omit,
  1378. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1379. poll_interval_ms: int | Omit = omit,
  1380. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1381. # The extra values given here take precedence over values defined on the client or passed to this method.
  1382. extra_headers: Headers | None = None,
  1383. extra_query: Query | None = None,
  1384. extra_body: Body | None = None,
  1385. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  1386. ) -> Run:
  1387. """
  1388. A helper to create a thread, start a run and then poll for a terminal state.
  1389. More information on Run lifecycles can be found here:
  1390. https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
  1391. """
  1392. run = await self.create_and_run( # pyright: ignore[reportDeprecated]
  1393. assistant_id=assistant_id,
  1394. instructions=instructions,
  1395. max_completion_tokens=max_completion_tokens,
  1396. max_prompt_tokens=max_prompt_tokens,
  1397. metadata=metadata,
  1398. model=model,
  1399. parallel_tool_calls=parallel_tool_calls,
  1400. response_format=response_format,
  1401. temperature=temperature,
  1402. stream=False,
  1403. thread=thread,
  1404. tool_resources=tool_resources,
  1405. tool_choice=tool_choice,
  1406. truncation_strategy=truncation_strategy,
  1407. top_p=top_p,
  1408. tools=tools,
  1409. extra_headers=extra_headers,
  1410. extra_query=extra_query,
  1411. extra_body=extra_body,
  1412. timeout=timeout,
  1413. )
  1414. return await self.runs.poll( # pyright: ignore[reportDeprecated]
  1415. run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
  1416. )
  1417. @overload
  1418. def create_and_run_stream(
  1419. self,
  1420. *,
  1421. assistant_id: str,
  1422. instructions: Optional[str] | Omit = omit,
  1423. max_completion_tokens: Optional[int] | Omit = omit,
  1424. max_prompt_tokens: Optional[int] | Omit = omit,
  1425. metadata: Optional[Metadata] | Omit = omit,
  1426. model: Union[str, ChatModel, None] | Omit = omit,
  1427. parallel_tool_calls: bool | Omit = omit,
  1428. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1429. temperature: Optional[float] | Omit = omit,
  1430. thread: thread_create_and_run_params.Thread | Omit = omit,
  1431. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1432. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1433. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1434. top_p: Optional[float] | Omit = omit,
  1435. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1436. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1437. # The extra values given here take precedence over values defined on the client or passed to this method.
  1438. extra_headers: Headers | None = None,
  1439. extra_query: Query | None = None,
  1440. extra_body: Body | None = None,
  1441. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  1442. ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
  1443. """Create a thread and stream the run back"""
  1444. ...
  1445. @overload
  1446. def create_and_run_stream(
  1447. self,
  1448. *,
  1449. assistant_id: str,
  1450. instructions: Optional[str] | Omit = omit,
  1451. max_completion_tokens: Optional[int] | Omit = omit,
  1452. max_prompt_tokens: Optional[int] | Omit = omit,
  1453. metadata: Optional[Metadata] | Omit = omit,
  1454. model: Union[str, ChatModel, None] | Omit = omit,
  1455. parallel_tool_calls: bool | Omit = omit,
  1456. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1457. temperature: Optional[float] | Omit = omit,
  1458. thread: thread_create_and_run_params.Thread | Omit = omit,
  1459. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1460. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1461. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1462. top_p: Optional[float] | Omit = omit,
  1463. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1464. event_handler: AsyncAssistantEventHandlerT,
  1465. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1466. # The extra values given here take precedence over values defined on the client or passed to this method.
  1467. extra_headers: Headers | None = None,
  1468. extra_query: Query | None = None,
  1469. extra_body: Body | None = None,
  1470. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  1471. ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
  1472. """Create a thread and stream the run back"""
  1473. ...
  1474. def create_and_run_stream(
  1475. self,
  1476. *,
  1477. assistant_id: str,
  1478. instructions: Optional[str] | Omit = omit,
  1479. max_completion_tokens: Optional[int] | Omit = omit,
  1480. max_prompt_tokens: Optional[int] | Omit = omit,
  1481. metadata: Optional[Metadata] | Omit = omit,
  1482. model: Union[str, ChatModel, None] | Omit = omit,
  1483. parallel_tool_calls: bool | Omit = omit,
  1484. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  1485. temperature: Optional[float] | Omit = omit,
  1486. thread: thread_create_and_run_params.Thread | Omit = omit,
  1487. tool_choice: Optional[AssistantToolChoiceOptionParam] | Omit = omit,
  1488. tool_resources: Optional[thread_create_and_run_params.ToolResources] | Omit = omit,
  1489. tools: Optional[Iterable[AssistantToolParam]] | Omit = omit,
  1490. top_p: Optional[float] | Omit = omit,
  1491. truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | Omit = omit,
  1492. event_handler: AsyncAssistantEventHandlerT | None = None,
  1493. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  1494. # The extra values given here take precedence over values defined on the client or passed to this method.
  1495. extra_headers: Headers | None = None,
  1496. extra_query: Query | None = None,
  1497. extra_body: Body | None = None,
  1498. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  1499. ) -> (
  1500. AsyncAssistantStreamManager[AsyncAssistantEventHandler]
  1501. | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
  1502. ):
  1503. """Create a thread and stream the run back"""
  1504. extra_headers = {
  1505. "OpenAI-Beta": "assistants=v2",
  1506. "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
  1507. "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
  1508. **(extra_headers or {}),
  1509. }
  1510. request = self._post(
  1511. "/threads/runs",
  1512. body=maybe_transform(
  1513. {
  1514. "assistant_id": assistant_id,
  1515. "instructions": instructions,
  1516. "max_completion_tokens": max_completion_tokens,
  1517. "max_prompt_tokens": max_prompt_tokens,
  1518. "metadata": metadata,
  1519. "model": model,
  1520. "parallel_tool_calls": parallel_tool_calls,
  1521. "response_format": response_format,
  1522. "temperature": temperature,
  1523. "tool_choice": tool_choice,
  1524. "stream": True,
  1525. "thread": thread,
  1526. "tools": tools,
  1527. "tool_resources": tool_resources,
  1528. "truncation_strategy": truncation_strategy,
  1529. "top_p": top_p,
  1530. },
  1531. thread_create_and_run_params.ThreadCreateAndRunParams,
  1532. ),
  1533. options=make_request_options(
  1534. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  1535. ),
  1536. cast_to=Run,
  1537. stream=True,
  1538. stream_cls=AsyncStream[AssistantStreamEvent],
  1539. )
  1540. return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
  1541. class ThreadsWithRawResponse:
  1542. def __init__(self, threads: Threads) -> None:
  1543. self._threads = threads
  1544. self.create = ( # pyright: ignore[reportDeprecated]
  1545. _legacy_response.to_raw_response_wrapper(
  1546. threads.create, # pyright: ignore[reportDeprecated],
  1547. )
  1548. )
  1549. self.retrieve = ( # pyright: ignore[reportDeprecated]
  1550. _legacy_response.to_raw_response_wrapper(
  1551. threads.retrieve, # pyright: ignore[reportDeprecated],
  1552. )
  1553. )
  1554. self.update = ( # pyright: ignore[reportDeprecated]
  1555. _legacy_response.to_raw_response_wrapper(
  1556. threads.update, # pyright: ignore[reportDeprecated],
  1557. )
  1558. )
  1559. self.delete = ( # pyright: ignore[reportDeprecated]
  1560. _legacy_response.to_raw_response_wrapper(
  1561. threads.delete, # pyright: ignore[reportDeprecated],
  1562. )
  1563. )
  1564. self.create_and_run = ( # pyright: ignore[reportDeprecated]
  1565. _legacy_response.to_raw_response_wrapper(
  1566. threads.create_and_run, # pyright: ignore[reportDeprecated],
  1567. )
  1568. )
  1569. @cached_property
  1570. def runs(self) -> RunsWithRawResponse:
  1571. return RunsWithRawResponse(self._threads.runs)
  1572. @cached_property
  1573. def messages(self) -> MessagesWithRawResponse:
  1574. return MessagesWithRawResponse(self._threads.messages)
  1575. class AsyncThreadsWithRawResponse:
  1576. def __init__(self, threads: AsyncThreads) -> None:
  1577. self._threads = threads
  1578. self.create = ( # pyright: ignore[reportDeprecated]
  1579. _legacy_response.async_to_raw_response_wrapper(
  1580. threads.create, # pyright: ignore[reportDeprecated],
  1581. )
  1582. )
  1583. self.retrieve = ( # pyright: ignore[reportDeprecated]
  1584. _legacy_response.async_to_raw_response_wrapper(
  1585. threads.retrieve, # pyright: ignore[reportDeprecated],
  1586. )
  1587. )
  1588. self.update = ( # pyright: ignore[reportDeprecated]
  1589. _legacy_response.async_to_raw_response_wrapper(
  1590. threads.update, # pyright: ignore[reportDeprecated],
  1591. )
  1592. )
  1593. self.delete = ( # pyright: ignore[reportDeprecated]
  1594. _legacy_response.async_to_raw_response_wrapper(
  1595. threads.delete, # pyright: ignore[reportDeprecated],
  1596. )
  1597. )
  1598. self.create_and_run = ( # pyright: ignore[reportDeprecated]
  1599. _legacy_response.async_to_raw_response_wrapper(
  1600. threads.create_and_run, # pyright: ignore[reportDeprecated],
  1601. )
  1602. )
  1603. @cached_property
  1604. def runs(self) -> AsyncRunsWithRawResponse:
  1605. return AsyncRunsWithRawResponse(self._threads.runs)
  1606. @cached_property
  1607. def messages(self) -> AsyncMessagesWithRawResponse:
  1608. return AsyncMessagesWithRawResponse(self._threads.messages)
  1609. class ThreadsWithStreamingResponse:
  1610. def __init__(self, threads: Threads) -> None:
  1611. self._threads = threads
  1612. self.create = ( # pyright: ignore[reportDeprecated]
  1613. to_streamed_response_wrapper(
  1614. threads.create, # pyright: ignore[reportDeprecated],
  1615. )
  1616. )
  1617. self.retrieve = ( # pyright: ignore[reportDeprecated]
  1618. to_streamed_response_wrapper(
  1619. threads.retrieve, # pyright: ignore[reportDeprecated],
  1620. )
  1621. )
  1622. self.update = ( # pyright: ignore[reportDeprecated]
  1623. to_streamed_response_wrapper(
  1624. threads.update, # pyright: ignore[reportDeprecated],
  1625. )
  1626. )
  1627. self.delete = ( # pyright: ignore[reportDeprecated]
  1628. to_streamed_response_wrapper(
  1629. threads.delete, # pyright: ignore[reportDeprecated],
  1630. )
  1631. )
  1632. self.create_and_run = ( # pyright: ignore[reportDeprecated]
  1633. to_streamed_response_wrapper(
  1634. threads.create_and_run, # pyright: ignore[reportDeprecated],
  1635. )
  1636. )
  1637. @cached_property
  1638. def runs(self) -> RunsWithStreamingResponse:
  1639. return RunsWithStreamingResponse(self._threads.runs)
  1640. @cached_property
  1641. def messages(self) -> MessagesWithStreamingResponse:
  1642. return MessagesWithStreamingResponse(self._threads.messages)
  1643. class AsyncThreadsWithStreamingResponse:
  1644. def __init__(self, threads: AsyncThreads) -> None:
  1645. self._threads = threads
  1646. self.create = ( # pyright: ignore[reportDeprecated]
  1647. async_to_streamed_response_wrapper(
  1648. threads.create, # pyright: ignore[reportDeprecated],
  1649. )
  1650. )
  1651. self.retrieve = ( # pyright: ignore[reportDeprecated]
  1652. async_to_streamed_response_wrapper(
  1653. threads.retrieve, # pyright: ignore[reportDeprecated],
  1654. )
  1655. )
  1656. self.update = ( # pyright: ignore[reportDeprecated]
  1657. async_to_streamed_response_wrapper(
  1658. threads.update, # pyright: ignore[reportDeprecated],
  1659. )
  1660. )
  1661. self.delete = ( # pyright: ignore[reportDeprecated]
  1662. async_to_streamed_response_wrapper(
  1663. threads.delete, # pyright: ignore[reportDeprecated],
  1664. )
  1665. )
  1666. self.create_and_run = ( # pyright: ignore[reportDeprecated]
  1667. async_to_streamed_response_wrapper(
  1668. threads.create_and_run, # pyright: ignore[reportDeprecated],
  1669. )
  1670. )
  1671. @cached_property
  1672. def runs(self) -> AsyncRunsWithStreamingResponse:
  1673. return AsyncRunsWithStreamingResponse(self._threads.runs)
  1674. @cached_property
  1675. def messages(self) -> AsyncMessagesWithStreamingResponse:
  1676. return AsyncMessagesWithStreamingResponse(self._threads.messages)