3434 lines
152 KiB
Python
3434 lines
152 KiB
Python
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
|
|
from __future__ import annotations
|
|
|
|
import inspect
|
|
import warnings
|
|
from typing import TYPE_CHECKING, List, Type, Union, Iterable, Optional, cast
|
|
from functools import partial
|
|
from itertools import chain
|
|
from typing_extensions import Literal, overload
|
|
|
|
import httpx
|
|
import pydantic
|
|
|
|
from .... import _legacy_response
|
|
from .batches import (
|
|
Batches,
|
|
AsyncBatches,
|
|
BatchesWithRawResponse,
|
|
AsyncBatchesWithRawResponse,
|
|
BatchesWithStreamingResponse,
|
|
AsyncBatchesWithStreamingResponse,
|
|
)
|
|
from ...._types import NOT_GIVEN, Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
|
|
from ...._utils import is_given, required_args, maybe_transform, strip_not_given, async_maybe_transform
|
|
from ...._compat import cached_property
|
|
from ...._models import TypeAdapter
|
|
from ...._resource import SyncAPIResource, AsyncAPIResource
|
|
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
|
from ....lib.tools import (
|
|
BetaToolRunner,
|
|
BetaAsyncToolRunner,
|
|
BetaStreamingToolRunner,
|
|
BetaAsyncStreamingToolRunner,
|
|
)
|
|
from ...._constants import DEFAULT_TIMEOUT, MODEL_NONSTREAMING_TOKENS
|
|
from ...._streaming import Stream, AsyncStream
|
|
from ....types.beta import (
|
|
BetaThinkingConfigParam,
|
|
message_create_params,
|
|
message_count_tokens_params,
|
|
)
|
|
from ...._base_client import make_request_options
|
|
from ....lib.streaming import BetaMessageStreamManager, BetaAsyncMessageStreamManager
|
|
from ...messages.messages import DEPRECATED_MODELS
|
|
from ....types.model_param import ModelParam
|
|
from ....lib._parse._response import ResponseFormatT, parse_response
|
|
from ....lib._parse._transform import transform_schema
|
|
from ....types.beta.beta_message import BetaMessage
|
|
from ....lib.tools._beta_functions import BetaRunnableTool, BetaAsyncRunnableTool
|
|
from ....types.anthropic_beta_param import AnthropicBetaParam
|
|
from ....types.beta.beta_message_param import BetaMessageParam
|
|
from ....types.beta.beta_metadata_param import BetaMetadataParam
|
|
from ....types.beta.parsed_beta_message import ParsedBetaMessage
|
|
from ....types.beta.beta_text_block_param import BetaTextBlockParam
|
|
from ....types.beta.beta_tool_union_param import BetaToolUnionParam
|
|
from ....types.beta.beta_tool_choice_param import BetaToolChoiceParam
|
|
from ....lib.tools._beta_compaction_control import CompactionControl
|
|
from ....types.beta.beta_output_config_param import BetaOutputConfigParam
|
|
from ....types.beta.beta_message_tokens_count import BetaMessageTokensCount
|
|
from ....types.beta.beta_thinking_config_param import BetaThinkingConfigParam
|
|
from ....types.beta.beta_json_output_format_param import BetaJSONOutputFormatParam
|
|
from ....types.beta.beta_raw_message_stream_event import BetaRawMessageStreamEvent
|
|
from ....types.beta.beta_context_management_config_param import BetaContextManagementConfigParam
|
|
from ....types.beta.beta_request_mcp_server_url_definition_param import BetaRequestMCPServerURLDefinitionParam
|
|
|
|
if TYPE_CHECKING:
|
|
from ...._client import Anthropic, AsyncAnthropic
|
|
|
|
__all__ = ["Messages", "AsyncMessages"]
|
|
|
|
|
|
class Messages(SyncAPIResource):
|
|
@cached_property
|
|
def batches(self) -> Batches:
|
|
return Batches(self._client)
|
|
|
|
@cached_property
|
|
def with_raw_response(self) -> MessagesWithRawResponse:
|
|
"""
|
|
This property can be used as a prefix for any HTTP method call to return
|
|
the raw response object instead of the parsed content.
|
|
|
|
For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
|
|
"""
|
|
return MessagesWithRawResponse(self)
|
|
|
|
@cached_property
|
|
def with_streaming_response(self) -> MessagesWithStreamingResponse:
|
|
"""
|
|
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
|
|
For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
|
|
"""
|
|
return MessagesWithStreamingResponse(self)
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
stream: Literal[True],
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> Stream[BetaRawMessageStreamEvent]:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
stream: bool,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage | Stream[BetaRawMessageStreamEvent]:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
|
|
def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Literal[True] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage | Stream[BetaRawMessageStreamEvent]:
|
|
validate_output_format(output_format)
|
|
|
|
if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
|
|
timeout = self._client._calculate_nonstreaming_timeout(
|
|
max_tokens, MODEL_NONSTREAMING_TOKENS.get(model, None)
|
|
)
|
|
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
extra_headers = {
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else not_given}),
|
|
**(extra_headers or {}),
|
|
}
|
|
return self._post(
|
|
"/v1/messages?beta=true",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParamsStreaming
|
|
if stream
|
|
else message_create_params.MessageCreateParamsNonStreaming,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessage,
|
|
stream=stream or False,
|
|
stream_cls=Stream[BetaRawMessageStreamEvent],
|
|
)
|
|
|
|
def parse(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Literal[True] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> ParsedBetaMessage[ResponseFormatT]:
|
|
if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
|
|
timeout = self._client._calculate_nonstreaming_timeout(
|
|
max_tokens, MODEL_NONSTREAMING_TOKENS.get(model, None)
|
|
)
|
|
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
betas = [beta for beta in betas] if is_given(betas) else []
|
|
|
|
if "structured-outputs-2025-11-13" not in betas:
|
|
# Ensure structured outputs beta is included for parse method
|
|
betas.append("structured-outputs-2025-11-13")
|
|
|
|
extra_headers = {
|
|
"X-Stainless-Helper": "beta.messages.parse",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
transformed_output_format: Optional[message_create_params.OutputFormat] | NotGiven = NOT_GIVEN
|
|
|
|
if is_given(output_format) and output_format is not None:
|
|
adapted_type: TypeAdapter[ResponseFormatT] = TypeAdapter(output_format)
|
|
|
|
try:
|
|
schema = adapted_type.json_schema()
|
|
transformed_output_format = message_create_params.OutputFormat(
|
|
schema=transform_schema(schema), type="json_schema"
|
|
)
|
|
except pydantic.errors.PydanticSchemaGenerationError as e:
|
|
raise TypeError(
|
|
(
|
|
"Could not generate JSON schema for the given `output_format` type. "
|
|
"Use a type that works with `pydanitc.TypeAdapter`"
|
|
)
|
|
) from e
|
|
|
|
def parser(response: BetaMessage) -> ParsedBetaMessage[ResponseFormatT]:
|
|
return parse_response(
|
|
response=response,
|
|
output_format=cast(
|
|
ResponseFormatT,
|
|
output_format if is_given(output_format) and output_format is not None else NOT_GIVEN,
|
|
),
|
|
)
|
|
|
|
return self._post(
|
|
"/v1/messages?beta=true",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": transformed_output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParamsNonStreaming,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers,
|
|
extra_query=extra_query,
|
|
extra_body=extra_body,
|
|
timeout=timeout,
|
|
post_parser=parser,
|
|
),
|
|
cast_to=cast(Type[ParsedBetaMessage[ResponseFormatT]], BetaMessage),
|
|
stream=False,
|
|
)
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
max_iterations: int | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaToolRunner[ResponseFormatT]: ...
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
stream: Literal[True],
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaStreamingToolRunner[ResponseFormatT]: ...
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
stream: bool,
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaStreamingToolRunner[ResponseFormatT] | BetaToolRunner[ResponseFormatT]: ...
|
|
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: bool | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaStreamingToolRunner[ResponseFormatT] | BetaToolRunner[ResponseFormatT]:
|
|
"""Create a Message stream"""
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
extra_headers = {
|
|
"X-Stainless-Helper": "BetaToolRunner",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
params = cast(
|
|
message_create_params.ParseMessageCreateParamsBase[ResponseFormatT],
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": [tool.to_dict() for tool in tools],
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
)
|
|
|
|
if stream:
|
|
return BetaStreamingToolRunner[ResponseFormatT](
|
|
tools=tools,
|
|
params=params,
|
|
options={
|
|
"extra_headers": extra_headers,
|
|
"extra_query": extra_query,
|
|
"extra_body": extra_body,
|
|
"timeout": timeout,
|
|
},
|
|
client=cast("Anthropic", self._client),
|
|
max_iterations=max_iterations if is_given(max_iterations) else None,
|
|
compaction_control=compaction_control if is_given(compaction_control) else None,
|
|
)
|
|
return BetaToolRunner[ResponseFormatT](
|
|
tools=tools,
|
|
params=params,
|
|
options={
|
|
"extra_headers": extra_headers,
|
|
"extra_query": extra_query,
|
|
"extra_body": extra_body,
|
|
"timeout": timeout,
|
|
},
|
|
client=cast("Anthropic", self._client),
|
|
max_iterations=max_iterations if is_given(max_iterations) else None,
|
|
compaction_control=compaction_control if is_given(compaction_control) else None,
|
|
)
|
|
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaMessageStreamManager[ResponseFormatT]:
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
"""Create a Message stream"""
|
|
extra_headers = {
|
|
"X-Stainless-Helper-Method": "stream",
|
|
"X-Stainless-Stream-Helper": "beta.messages",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
transformed_output_format: Optional[message_create_params.OutputFormat] | NotGiven = NOT_GIVEN
|
|
|
|
if is_given(output_format) and output_format is not None:
|
|
adapted_type: TypeAdapter[ResponseFormatT] = TypeAdapter(output_format)
|
|
|
|
try:
|
|
schema = adapted_type.json_schema()
|
|
transformed_output_format = message_create_params.OutputFormat(
|
|
schema=transform_schema(schema), type="json_schema"
|
|
)
|
|
except pydantic.errors.PydanticSchemaGenerationError as e:
|
|
raise TypeError(
|
|
(
|
|
"Could not generate JSON schema for the given `output_format` type. "
|
|
"Use a type that works with `pydanitc.TypeAdapter`"
|
|
)
|
|
) from e
|
|
|
|
make_request = partial(
|
|
self._post,
|
|
"/v1/messages?beta=true",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": transformed_output_format,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
"tools": tools,
|
|
"tool_choice": tool_choice,
|
|
"stream": True,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessage,
|
|
stream=True,
|
|
stream_cls=Stream[BetaRawMessageStreamEvent],
|
|
)
|
|
return BetaMessageStreamManager(make_request, output_format=cast(ResponseFormatT, output_format))
|
|
|
|
def count_tokens(
|
|
self,
|
|
*,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[message_count_tokens_params.Tool] | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessageTokensCount:
|
|
"""
|
|
Count the number of tokens in a Message.
|
|
|
|
The Token Count API can be used to count the number of tokens in a Message,
|
|
including tools, images, and documents, without creating it.
|
|
|
|
Learn more about token counting in our
|
|
[user guide](https://docs.claude.com/en/docs/build-with-claude/token-counting)
|
|
|
|
Args:
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
extra_headers = {
|
|
**strip_not_given(
|
|
{
|
|
"anthropic-beta": ",".join(chain((str(e) for e in betas), ["token-counting-2024-11-01"]))
|
|
if is_given(betas)
|
|
else not_given
|
|
}
|
|
),
|
|
**(extra_headers or {}),
|
|
}
|
|
extra_headers = {"anthropic-beta": "token-counting-2024-11-01", **(extra_headers or {})}
|
|
return self._post(
|
|
"/v1/messages/count_tokens?beta=true",
|
|
body=maybe_transform(
|
|
{
|
|
"messages": messages,
|
|
"model": model,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"system": system,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
},
|
|
message_count_tokens_params.MessageCountTokensParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessageTokensCount,
|
|
)
|
|
|
|
|
|
class AsyncMessages(AsyncAPIResource):
|
|
@cached_property
|
|
def batches(self) -> AsyncBatches:
|
|
return AsyncBatches(self._client)
|
|
|
|
@cached_property
|
|
def with_raw_response(self) -> AsyncMessagesWithRawResponse:
|
|
"""
|
|
This property can be used as a prefix for any HTTP method call to return
|
|
the raw response object instead of the parsed content.
|
|
|
|
For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
|
|
"""
|
|
return AsyncMessagesWithRawResponse(self)
|
|
|
|
@cached_property
|
|
def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
|
|
"""
|
|
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
|
|
For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
|
|
"""
|
|
return AsyncMessagesWithStreamingResponse(self)
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
stream: Literal[True],
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> AsyncStream[BetaRawMessageStreamEvent]:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@overload
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
stream: bool,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
|
|
"""
|
|
Send a structured list of input messages with text and/or image content, and the
|
|
model will generate the next message in the conversation.
|
|
|
|
The Messages API can be used for either single queries or stateless multi-turn
|
|
conversations.
|
|
|
|
Learn more about the Messages API in our
|
|
[user guide](https://docs.claude.com/en/docs/initial-setup)
|
|
|
|
Args:
|
|
max_tokens: The maximum number of tokens to generate before stopping.
|
|
|
|
Note that our models may stop _before_ reaching this maximum. This parameter
|
|
only specifies the absolute maximum number of tokens to generate.
|
|
|
|
Different models have different maximum values for this parameter. See
|
|
[models](https://docs.claude.com/en/docs/models-overview) for details.
|
|
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
stream: Whether to incrementally stream the response using server-sent events.
|
|
|
|
See [streaming](https://docs.claude.com/en/api/messages-streaming) for details.
|
|
|
|
container: Container identifier for reuse across requests.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
metadata: An object describing metadata about the request.
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
service_tier: Determines whether to use priority capacity (if available) or standard capacity
|
|
for this request.
|
|
|
|
Anthropic offers different levels of service for your API requests. See
|
|
[service-tiers](https://docs.claude.com/en/api/service-tiers) for details.
|
|
|
|
stop_sequences: Custom text sequences that will cause the model to stop generating.
|
|
|
|
Our models will normally stop when they have naturally completed their turn,
|
|
which will result in a response `stop_reason` of `"end_turn"`.
|
|
|
|
If you want the model to stop generating when it encounters custom strings of
|
|
text, you can use the `stop_sequences` parameter. If the model encounters one of
|
|
the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
|
|
and the response `stop_sequence` value will contain the matched stop sequence.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
temperature: Amount of randomness injected into the response.
|
|
|
|
Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
|
|
for analytical / multiple choice, and closer to `1.0` for creative and
|
|
generative tasks.
|
|
|
|
Note that even with `temperature` of `0.0`, the results will not be fully
|
|
deterministic.
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
top_k: Only sample from the top K options for each subsequent token.
|
|
|
|
Used to remove "long tail" low probability responses.
|
|
[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
top_p: Use nucleus sampling.
|
|
|
|
In nucleus sampling, we compute the cumulative distribution over all the options
|
|
for each subsequent token in decreasing probability order and cut it off once it
|
|
reaches a particular probability specified by `top_p`. You should either alter
|
|
`temperature` or `top_p`, but not both.
|
|
|
|
Recommended for advanced use cases only. You usually only need to use
|
|
`temperature`.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
...
|
|
|
|
@required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
|
|
async def create(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Literal[True] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
|
|
validate_output_format(output_format)
|
|
|
|
if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
|
|
timeout = self._client._calculate_nonstreaming_timeout(
|
|
max_tokens, MODEL_NONSTREAMING_TOKENS.get(model, None)
|
|
)
|
|
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
extra_headers = {
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else not_given}),
|
|
**(extra_headers or {}),
|
|
}
|
|
return await self._post(
|
|
"/v1/messages?beta=true",
|
|
body=await async_maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParamsStreaming
|
|
if stream
|
|
else message_create_params.MessageCreateParamsNonStreaming,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessage,
|
|
stream=stream or False,
|
|
stream_cls=AsyncStream[BetaRawMessageStreamEvent],
|
|
)
|
|
|
|
async def parse(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Literal[True] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> ParsedBetaMessage[ResponseFormatT]:
|
|
if not stream and not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
|
|
timeout = self._client._calculate_nonstreaming_timeout(
|
|
max_tokens, MODEL_NONSTREAMING_TOKENS.get(model, None)
|
|
)
|
|
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
betas = [beta for beta in betas] if is_given(betas) else []
|
|
|
|
if "structured-outputs-2025-11-13" not in betas:
|
|
# Ensure structured outputs beta is included for parse method
|
|
betas.append("structured-outputs-2025-11-13")
|
|
|
|
extra_headers = {
|
|
"X-Stainless-Helper": "beta.messages.parse",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
transformed_output_format: Optional[message_create_params.OutputFormat] | NotGiven = NOT_GIVEN
|
|
|
|
if is_given(output_format) and output_format is not None:
|
|
adapted_type: TypeAdapter[ResponseFormatT] = TypeAdapter(output_format)
|
|
|
|
try:
|
|
schema = adapted_type.json_schema()
|
|
transformed_output_format = message_create_params.OutputFormat(
|
|
schema=transform_schema(schema), type="json_schema"
|
|
)
|
|
except pydantic.errors.PydanticSchemaGenerationError as e:
|
|
raise TypeError(
|
|
(
|
|
"Could not generate JSON schema for the given `output_format` type. "
|
|
"Use a type that works with `pydanitc.TypeAdapter`"
|
|
)
|
|
) from e
|
|
|
|
def parser(response: BetaMessage) -> ParsedBetaMessage[ResponseFormatT]:
|
|
return parse_response(
|
|
response=response,
|
|
output_format=cast(
|
|
ResponseFormatT,
|
|
output_format if is_given(output_format) and output_format is not None else NOT_GIVEN,
|
|
),
|
|
)
|
|
|
|
return await self._post(
|
|
"/v1/messages?beta=true",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"output_config": output_config,
|
|
"metadata": metadata,
|
|
"output_format": transformed_output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"stream": stream,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
message_create_params.MessageCreateParamsNonStreaming,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers,
|
|
extra_query=extra_query,
|
|
extra_body=extra_body,
|
|
timeout=timeout,
|
|
post_parser=parser,
|
|
),
|
|
cast_to=cast(Type[ParsedBetaMessage[ResponseFormatT]], BetaMessage),
|
|
stream=False,
|
|
)
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaAsyncRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[False] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaAsyncToolRunner[ResponseFormatT]: ...
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaAsyncRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
stream: Literal[True],
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaAsyncStreamingToolRunner[ResponseFormatT]: ...
|
|
|
|
@overload
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaAsyncRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
stream: bool,
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaAsyncStreamingToolRunner[ResponseFormatT] | BetaAsyncToolRunner[ResponseFormatT]: ...
|
|
|
|
def tool_runner(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
tools: Iterable[BetaAsyncRunnableTool],
|
|
compaction_control: CompactionControl | Omit = omit,
|
|
max_iterations: int | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
stream: Literal[True] | Literal[False] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaAsyncToolRunner[ResponseFormatT] | BetaAsyncStreamingToolRunner[ResponseFormatT]:
|
|
"""Create a Message stream"""
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
extra_headers = {
|
|
"X-Stainless-Helper": "BetaToolRunner",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
params = cast(
|
|
message_create_params.ParseMessageCreateParamsBase[ResponseFormatT],
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": [tool.to_dict() for tool in tools],
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
},
|
|
)
|
|
|
|
if stream:
|
|
return BetaAsyncStreamingToolRunner[ResponseFormatT](
|
|
tools=tools,
|
|
params=params,
|
|
options={
|
|
"extra_headers": extra_headers,
|
|
"extra_query": extra_query,
|
|
"extra_body": extra_body,
|
|
"timeout": timeout,
|
|
},
|
|
client=cast("AsyncAnthropic", self._client),
|
|
max_iterations=max_iterations if is_given(max_iterations) else None,
|
|
compaction_control=compaction_control if is_given(compaction_control) else None,
|
|
)
|
|
return BetaAsyncToolRunner[ResponseFormatT](
|
|
tools=tools,
|
|
params=params,
|
|
options={
|
|
"extra_headers": extra_headers,
|
|
"extra_query": extra_query,
|
|
"extra_body": extra_body,
|
|
"timeout": timeout,
|
|
},
|
|
client=cast("AsyncAnthropic", self._client),
|
|
max_iterations=max_iterations if is_given(max_iterations) else None,
|
|
compaction_control=compaction_control if is_given(compaction_control) else None,
|
|
)
|
|
|
|
def stream(
|
|
self,
|
|
*,
|
|
max_tokens: int,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
metadata: BetaMetadataParam | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[type[ResponseFormatT]] | Omit = omit,
|
|
container: Optional[message_create_params.Container] | Omit = omit,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
service_tier: Literal["auto", "standard_only"] | Omit = omit,
|
|
stop_sequences: SequenceNotStr[str] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
temperature: float | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[BetaToolUnionParam] | Omit = omit,
|
|
top_k: int | Omit = omit,
|
|
top_p: float | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
) -> BetaAsyncMessageStreamManager[ResponseFormatT]:
|
|
if model in DEPRECATED_MODELS:
|
|
warnings.warn(
|
|
f"The model '{model}' is deprecated and will reach end-of-life on {DEPRECATED_MODELS[model]}.\nPlease migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.",
|
|
DeprecationWarning,
|
|
stacklevel=3,
|
|
)
|
|
|
|
extra_headers = {
|
|
"X-Stainless-Helper-Method": "stream",
|
|
"X-Stainless-Stream-Helper": "beta.messages",
|
|
**strip_not_given({"anthropic-beta": ",".join(str(e) for e in betas) if is_given(betas) else NOT_GIVEN}),
|
|
**(extra_headers or {}),
|
|
}
|
|
|
|
transformed_output_format: Optional[message_create_params.OutputFormat] | NotGiven = NOT_GIVEN
|
|
|
|
if is_given(output_format) and output_format is not None:
|
|
adapted_type: TypeAdapter[ResponseFormatT] = TypeAdapter(output_format)
|
|
|
|
try:
|
|
schema = adapted_type.json_schema()
|
|
transformed_output_format = message_create_params.OutputFormat(
|
|
schema=transform_schema(schema), type="json_schema"
|
|
)
|
|
except pydantic.errors.PydanticSchemaGenerationError as e:
|
|
raise TypeError(
|
|
(
|
|
"Could not generate JSON schema for the given `output_format` type. "
|
|
"Use a type that works with `pydanitc.TypeAdapter`"
|
|
)
|
|
) from e
|
|
request = self._post(
|
|
"/v1/messages",
|
|
body=maybe_transform(
|
|
{
|
|
"max_tokens": max_tokens,
|
|
"messages": messages,
|
|
"model": model,
|
|
"metadata": metadata,
|
|
"output_config": output_config,
|
|
"output_format": transformed_output_format,
|
|
"container": container,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"service_tier": service_tier,
|
|
"stop_sequences": stop_sequences,
|
|
"system": system,
|
|
"temperature": temperature,
|
|
"thinking": thinking,
|
|
"top_k": top_k,
|
|
"top_p": top_p,
|
|
"tools": tools,
|
|
"tool_choice": tool_choice,
|
|
"stream": True,
|
|
},
|
|
message_create_params.MessageCreateParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessage,
|
|
stream=True,
|
|
stream_cls=AsyncStream[BetaRawMessageStreamEvent],
|
|
)
|
|
return BetaAsyncMessageStreamManager(request, output_format=cast(ResponseFormatT, output_format))
|
|
|
|
async def count_tokens(
|
|
self,
|
|
*,
|
|
messages: Iterable[BetaMessageParam],
|
|
model: ModelParam,
|
|
context_management: Optional[BetaContextManagementConfigParam] | Omit = omit,
|
|
mcp_servers: Iterable[BetaRequestMCPServerURLDefinitionParam] | Omit = omit,
|
|
output_config: BetaOutputConfigParam | Omit = omit,
|
|
output_format: Optional[BetaJSONOutputFormatParam] | Omit = omit,
|
|
system: Union[str, Iterable[BetaTextBlockParam]] | Omit = omit,
|
|
thinking: BetaThinkingConfigParam | Omit = omit,
|
|
tool_choice: BetaToolChoiceParam | Omit = omit,
|
|
tools: Iterable[message_count_tokens_params.Tool] | Omit = omit,
|
|
betas: List[AnthropicBetaParam] | Omit = omit,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Headers | None = None,
|
|
extra_query: Query | None = None,
|
|
extra_body: Body | None = None,
|
|
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
) -> BetaMessageTokensCount:
|
|
"""
|
|
Count the number of tokens in a Message.
|
|
|
|
The Token Count API can be used to count the number of tokens in a Message,
|
|
including tools, images, and documents, without creating it.
|
|
|
|
Learn more about token counting in our
|
|
[user guide](https://docs.claude.com/en/docs/build-with-claude/token-counting)
|
|
|
|
Args:
|
|
messages: Input messages.
|
|
|
|
Our models are trained to operate on alternating `user` and `assistant`
|
|
conversational turns. When creating a new `Message`, you specify the prior
|
|
conversational turns with the `messages` parameter, and the model then generates
|
|
the next `Message` in the conversation. Consecutive `user` or `assistant` turns
|
|
in your request will be combined into a single turn.
|
|
|
|
Each input message must be an object with a `role` and `content`. You can
|
|
specify a single `user`-role message, or you can include multiple `user` and
|
|
`assistant` messages.
|
|
|
|
If the final message uses the `assistant` role, the response content will
|
|
continue immediately from the content in that message. This can be used to
|
|
constrain part of the model's response.
|
|
|
|
Example with a single `user` message:
|
|
|
|
```json
|
|
[{ "role": "user", "content": "Hello, Claude" }]
|
|
```
|
|
|
|
Example with multiple conversational turns:
|
|
|
|
```json
|
|
[
|
|
{ "role": "user", "content": "Hello there." },
|
|
{ "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
|
|
{ "role": "user", "content": "Can you explain LLMs in plain English?" }
|
|
]
|
|
```
|
|
|
|
Example with a partially-filled response from Claude:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"role": "user",
|
|
"content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
|
|
},
|
|
{ "role": "assistant", "content": "The best answer is (" }
|
|
]
|
|
```
|
|
|
|
Each input message `content` may be either a single `string` or an array of
|
|
content blocks, where each block has a specific `type`. Using a `string` for
|
|
`content` is shorthand for an array of one content block of type `"text"`. The
|
|
following input messages are equivalent:
|
|
|
|
```json
|
|
{ "role": "user", "content": "Hello, Claude" }
|
|
```
|
|
|
|
```json
|
|
{ "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
|
|
```
|
|
|
|
See [input examples](https://docs.claude.com/en/api/messages-examples).
|
|
|
|
Note that if you want to include a
|
|
[system prompt](https://docs.claude.com/en/docs/system-prompts), you can use the
|
|
top-level `system` parameter — there is no `"system"` role for input messages in
|
|
the Messages API.
|
|
|
|
There is a limit of 100,000 messages in a single request.
|
|
|
|
model: The model that will complete your prompt.\n\nSee
|
|
[models](https://docs.anthropic.com/en/docs/models-overview) for additional
|
|
details and options.
|
|
|
|
context_management: Context management configuration.
|
|
|
|
This allows you to control how Claude manages context across multiple requests,
|
|
such as whether to clear function results or not.
|
|
|
|
mcp_servers: MCP servers to be utilized in this request
|
|
|
|
output_config: Configuration options for the model's output. Controls aspects like how much
|
|
effort the model puts into its response.
|
|
|
|
output_format: A schema to specify Claude's output format in responses.
|
|
|
|
system: System prompt.
|
|
|
|
A system prompt is a way of providing context and instructions to Claude, such
|
|
as specifying a particular goal or role. See our
|
|
[guide to system prompts](https://docs.claude.com/en/docs/system-prompts).
|
|
|
|
thinking: Configuration for enabling Claude's extended thinking.
|
|
|
|
When enabled, responses include `thinking` content blocks showing Claude's
|
|
thinking process before the final answer. Requires a minimum budget of 1,024
|
|
tokens and counts towards your `max_tokens` limit.
|
|
|
|
See
|
|
[extended thinking](https://docs.claude.com/en/docs/build-with-claude/extended-thinking)
|
|
for details.
|
|
|
|
tool_choice: How the model should use the provided tools. The model can use a specific tool,
|
|
any available tool, decide by itself, or not use tools at all.
|
|
|
|
tools: Definitions of tools that the model may use.
|
|
|
|
If you include `tools` in your API request, the model may return `tool_use`
|
|
content blocks that represent the model's use of those tools. You can then run
|
|
those tools using the tool input generated by the model and then optionally
|
|
return results back to the model using `tool_result` content blocks.
|
|
|
|
There are two types of tools: **client tools** and **server tools**. The
|
|
behavior described below applies to client tools. For
|
|
[server tools](https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview#server-tools),
|
|
see their individual documentation as each has its own behavior (e.g., the
|
|
[web search tool](https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool)).
|
|
|
|
Each tool definition includes:
|
|
|
|
- `name`: Name of the tool.
|
|
- `description`: Optional, but strongly-recommended description of the tool.
|
|
- `input_schema`: [JSON schema](https://json-schema.org/draft/2020-12) for the
|
|
tool `input` shape that the model will produce in `tool_use` output content
|
|
blocks.
|
|
|
|
For example, if you defined `tools` as:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"name": "get_stock_price",
|
|
"description": "Get the current stock price for a given ticker symbol.",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ticker": {
|
|
"type": "string",
|
|
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
|
|
}
|
|
},
|
|
"required": ["ticker"]
|
|
}
|
|
}
|
|
]
|
|
```
|
|
|
|
And then asked the model "What's the S&P 500 at today?", the model might produce
|
|
`tool_use` content blocks in the response like this:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_use",
|
|
"id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"name": "get_stock_price",
|
|
"input": { "ticker": "^GSPC" }
|
|
}
|
|
]
|
|
```
|
|
|
|
You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
|
|
input, and return the following back to the model in a subsequent `user`
|
|
message:
|
|
|
|
```json
|
|
[
|
|
{
|
|
"type": "tool_result",
|
|
"tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
|
|
"content": "259.75 USD"
|
|
}
|
|
]
|
|
```
|
|
|
|
Tools can be used for workflows that include running client-side tools and
|
|
functions, or more generally whenever you want the model to produce a particular
|
|
JSON structure of output.
|
|
|
|
See our [guide](https://docs.claude.com/en/docs/tool-use) for more details.
|
|
|
|
betas: Optional header to specify the beta version(s) you want to use.
|
|
|
|
extra_headers: Send extra headers
|
|
|
|
extra_query: Add additional query parameters to the request
|
|
|
|
extra_body: Add additional JSON properties to the request
|
|
|
|
timeout: Override the client-level default timeout for this request, in seconds
|
|
"""
|
|
extra_headers = {
|
|
**strip_not_given(
|
|
{
|
|
"anthropic-beta": ",".join(chain((str(e) for e in betas), ["token-counting-2024-11-01"]))
|
|
if is_given(betas)
|
|
else not_given
|
|
}
|
|
),
|
|
**(extra_headers or {}),
|
|
}
|
|
extra_headers = {"anthropic-beta": "token-counting-2024-11-01", **(extra_headers or {})}
|
|
return await self._post(
|
|
"/v1/messages/count_tokens?beta=true",
|
|
body=await async_maybe_transform(
|
|
{
|
|
"messages": messages,
|
|
"model": model,
|
|
"context_management": context_management,
|
|
"mcp_servers": mcp_servers,
|
|
"output_config": output_config,
|
|
"output_format": output_format,
|
|
"system": system,
|
|
"thinking": thinking,
|
|
"tool_choice": tool_choice,
|
|
"tools": tools,
|
|
},
|
|
message_count_tokens_params.MessageCountTokensParams,
|
|
),
|
|
options=make_request_options(
|
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
),
|
|
cast_to=BetaMessageTokensCount,
|
|
)
|
|
|
|
|
|
class MessagesWithRawResponse:
|
|
def __init__(self, messages: Messages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = _legacy_response.to_raw_response_wrapper(
|
|
messages.create,
|
|
)
|
|
self.count_tokens = _legacy_response.to_raw_response_wrapper(
|
|
messages.count_tokens,
|
|
)
|
|
|
|
@cached_property
|
|
def batches(self) -> BatchesWithRawResponse:
|
|
return BatchesWithRawResponse(self._messages.batches)
|
|
|
|
|
|
class AsyncMessagesWithRawResponse:
|
|
def __init__(self, messages: AsyncMessages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = _legacy_response.async_to_raw_response_wrapper(
|
|
messages.create,
|
|
)
|
|
self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
|
|
messages.count_tokens,
|
|
)
|
|
|
|
@cached_property
|
|
def batches(self) -> AsyncBatchesWithRawResponse:
|
|
return AsyncBatchesWithRawResponse(self._messages.batches)
|
|
|
|
|
|
class MessagesWithStreamingResponse:
|
|
def __init__(self, messages: Messages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = to_streamed_response_wrapper(
|
|
messages.create,
|
|
)
|
|
self.count_tokens = to_streamed_response_wrapper(
|
|
messages.count_tokens,
|
|
)
|
|
|
|
@cached_property
|
|
def batches(self) -> BatchesWithStreamingResponse:
|
|
return BatchesWithStreamingResponse(self._messages.batches)
|
|
|
|
|
|
class AsyncMessagesWithStreamingResponse:
|
|
def __init__(self, messages: AsyncMessages) -> None:
|
|
self._messages = messages
|
|
|
|
self.create = async_to_streamed_response_wrapper(
|
|
messages.create,
|
|
)
|
|
self.count_tokens = async_to_streamed_response_wrapper(
|
|
messages.count_tokens,
|
|
)
|
|
|
|
@cached_property
|
|
def batches(self) -> AsyncBatchesWithStreamingResponse:
|
|
return AsyncBatchesWithStreamingResponse(self._messages.batches)
|
|
|
|
|
|
def validate_output_format(output_format: object) -> None:
|
|
if inspect.isclass(output_format) and issubclass(output_format, pydantic.BaseModel):
|
|
raise TypeError(
|
|
"You tried to pass a `BaseModel` class to `beta.messages.create()`; You must use `beta.messages.parse()` instead"
|
|
)
|