Cost Tracking Example¶

Track token usage and USD costs across agent runs.

Basic Cost Tracking¶

Python

from pydantic_ai import Agent
from pydantic_ai.models.test import TestModel
from pydantic_ai_middleware import MiddlewareAgent, MiddlewareContext
from pydantic_ai_middleware.cost_tracking import create_cost_tracking_middleware


def on_cost(info):
    print(
        f"Run #{info.run_count}: "
        f"{info.run_request_tokens} in / {info.run_response_tokens} out"
    )
    if info.run_cost_usd is not None:
        print(f"  Run cost:   ${info.run_cost_usd:.4f}")
        print(f"  Total cost: ${info.total_cost_usd:.4f}")


cost_mw = create_cost_tracking_middleware(
    model_name="openai:gpt-4.1",
    on_cost_update=on_cost,
)

agent = MiddlewareAgent(
    agent=Agent(model=TestModel()),
    middleware=[cost_mw],
    context=MiddlewareContext(),
)

result = await agent.run("What is the capital of France?")

Budget Enforcement¶

Python

from pydantic_ai import Agent
from pydantic_ai.models.test import TestModel
from pydantic_ai_middleware import MiddlewareAgent, MiddlewareContext
from pydantic_ai_middleware.cost_tracking import create_cost_tracking_middleware
from pydantic_ai_middleware.exceptions import BudgetExceededError

cost_mw = create_cost_tracking_middleware(
    model_name="openai:gpt-4.1",
    budget_limit_usd=0.50,
)

agent = MiddlewareAgent(
    agent=Agent(model=TestModel()),
    middleware=[cost_mw],
    context=MiddlewareContext(),
)

prompts = [
    "Summarize this report",
    "Translate to Spanish",
    "Generate test cases",
]

for prompt in prompts:
    try:
        result = await agent.run(prompt)
        print(f"OK: {prompt}")
    except BudgetExceededError as e:
        print(f"Stopped: ${e.cost:.4f} >= ${e.budget:.4f} limit")
        break

Cost Tracking with Logging¶

Python

import logging
from pydantic_ai import Agent
from pydantic_ai.models.test import TestModel
from pydantic_ai_middleware import (
    AgentMiddleware,
    MiddlewareAgent,
    MiddlewareContext,
)
from pydantic_ai_middleware.cost_tracking import CostTrackingMiddleware

logger = logging.getLogger(__name__)


class LoggingMiddleware(AgentMiddleware[None]):
    async def before_run(self, prompt, deps, ctx):
        logger.info(f"Starting run: {prompt[:80]}")
        return prompt

    async def after_run(self, prompt, output, deps, ctx):
        logger.info(f"Finished run: {output}")
        return output


cost_mw = CostTrackingMiddleware(
    model_name="anthropic:claude-sonnet-4-5-20250929",
    budget_limit_usd=5.0,
    on_cost_update=lambda info: logger.info(
        f"Cost: ${info.total_cost_usd:.4f} "
        f"({info.total_request_tokens} in / {info.total_response_tokens} out)"
    ),
)

agent = MiddlewareAgent(
    agent=Agent(model=TestModel()),
    middleware=[LoggingMiddleware(), cost_mw],
    context=MiddlewareContext(),
)

result = await agent.run("Explain middleware patterns")

Async Callback with Database¶

Python

from pydantic_ai import Agent
from pydantic_ai.models.test import TestModel
from pydantic_ai_middleware import MiddlewareAgent, MiddlewareContext
from pydantic_ai_middleware.cost_tracking import (
    CostInfo,
    create_cost_tracking_middleware,
)


async def persist_cost(info: CostInfo) -> None:
    """Save cost data to a database."""
    # Replace with your actual database call
    print(
        f"Saving: run={info.run_count}, "
        f"tokens_in={info.run_request_tokens}, "
        f"tokens_out={info.run_response_tokens}, "
        f"cost={info.run_cost_usd}"
    )


cost_mw = create_cost_tracking_middleware(
    model_name="openai:gpt-4.1",
    on_cost_update=persist_cost,
)

agent = MiddlewareAgent(
    agent=Agent(model=TestModel()),
    middleware=[cost_mw],
    context=MiddlewareContext(),
)

result = await agent.run("Generate a summary")

Resetting Counters¶

Python

from pydantic_ai_middleware.cost_tracking import CostTrackingMiddleware

cost_mw = CostTrackingMiddleware(model_name="openai:gpt-4.1")

# ... run agent multiple times ...

print(f"Session cost: ${cost_mw.total_cost:.4f}")
print(f"Session runs: {cost_mw.run_count}")

# Start a new billing period
cost_mw.reset()
print(f"After reset: ${cost_mw.total_cost:.4f}")