How to Use the Claude API with Python: Complete 2026 Guide

Quick answer: Install anthropic, set your ANTHROPIC_API_KEY environment variable, and call client.messages.create(). The whole thing is working in under 5 minutes. This guide covers everything beyond the basics.

Installation and setup

pip install anthropic
export ANTHROPIC_API_KEY="sk-ant-..."

Basic call

import anthropic

client = anthropic.Anthropic()

message = client.messages.create(
    model="claude-sonnet-4",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Explain prompt caching in one paragraph."}
    ]
)

print(message.content[0].text)
print(f"Input tokens: {message.usage.input_tokens}")
print(f"Output tokens: {message.usage.output_tokens}")

System prompts

message = client.messages.create(
    model="claude-sonnet-4",
    max_tokens=1024,
    system="You are a senior Python engineer. Always return working code with type hints and docstrings.",
    messages=[
        {"role": "user", "content": "Write a function to chunk text into token-sized pieces."}
    ]
)

Streaming responses

with client.messages.stream(
    model="claude-sonnet-4",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Write a short story."}]
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

Multi-turn conversations

conversation = []

def chat(user_message: str) -> str:
    conversation.append({"role": "user", "content": user_message})
    response = client.messages.create(
        model="claude-haiku-4",
        max_tokens=1024,
        messages=conversation
    )
    assistant_message = response.content[0].text
    conversation.append({"role": "assistant", "content": assistant_message})
    return assistant_message

print(chat("What's 2+2?"))
print(chat("What did I just ask you?"))  # Remembers context

Tool use (function calling)

tools = [
    {
        "name": "get_weather",
        "description": "Get current weather for a location",
        "input_schema": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "City and country"}
            },
            "required": ["location"]
        }
    }
]

response = client.messages.create(
    model="claude-sonnet-4",
    max_tokens=1024,
    tools=tools,
    messages=[{"role": "user", "content": "What's the weather in Tokyo?"}]
)

# Check if model wants to use a tool
if response.stop_reason == "tool_use":
    tool_use = next(b for b in response.content if b.type == "tool_use")
    print(f"Tool: {tool_use.name}, Input: {tool_use.input}")

Vision (image analysis)

import base64

with open("chart.png", "rb") as f:
    image_data = base64.b64encode(f.read()).decode()

message = client.messages.create(
    model="claude-sonnet-4",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {"type": "base64", "media_type": "image/png", "data": image_data}
                },
                {"type": "text", "text": "Describe the trends in this chart."}
            ]
        }
    ]
)

Prompt caching for cost reduction

# Cache a long document that appears in every request
long_doc = open("knowledge_base.txt").read()  # 50,000+ tokens

response = client.messages.create(
    model="claude-sonnet-4",
    max_tokens=1024,
    system=[
        {"type": "text", "text": "You answer questions based on the provided document."},
        {"type": "text", "text": long_doc, "cache_control": {"type": "ephemeral"}}
    ],
    messages=[{"role": "user", "content": user_question}]
)

# Check cache usage
print(f"Cache read tokens: {response.usage.cache_read_input_tokens}")

Async calls for production throughput

import asyncio
import anthropic

async def process_document(client, doc: str) -> str:
    response = await client.messages.create(
        model="claude-haiku-4",
        max_tokens=500,
        messages=[{"role": "user", "content": f"Summarize: {doc}"}]
    )
    return response.content[0].text

async def main():
    client = anthropic.AsyncAnthropic()
    # Process 10 documents concurrently
    tasks = [process_document(client, doc) for doc in documents]
    results = await asyncio.gather(*tasks)
    return results

Model selection in 2026

For cost-performance guidance:

Claude Opus 4: Maximum quality, complex reasoning, $15/$75 per 1M tokens
Claude Sonnet 4: Best balance, most production use cases, $3/$15 per 1M tokens
Claude Haiku 4: High volume, cost-sensitive, $0.80/$4.00 per 1M tokens

Compare Claude against other providers at LLMversus or calculate your exact costs with the cost calculator.