Custom Providers¶

memv uses two protocols for external services: EmbeddingClient and LLMClient. Implement them to use any provider.

EmbeddingClient¶

Python

from memv.protocols import EmbeddingClient


class MyEmbedder:
    async def embed(self, text: str) -> list[float]:
        """Embed single text, return vector."""
        ...

    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
        """Embed multiple texts, return list of vectors."""
        ...

Dimension consistency

The vector dimensions must match embedding_dimensions in your config (default: 1536). If your model produces 768-dimensional vectors, set embedding_dimensions=768.

Example: Cohere¶

Python

import cohere


class CohereEmbedder:
    def __init__(self, model: str = "embed-english-v3.0"):
        self.client = cohere.AsyncClient()
        self.model = model

    async def embed(self, text: str) -> list[float]:
        response = await self.client.embed(
            texts=[text],
            model=self.model,
            input_type="search_query",
        )
        return response.embeddings[0]

    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
        response = await self.client.embed(
            texts=texts,
            model=self.model,
            input_type="search_document",
        )
        return response.embeddings


memory = Memory(
    embedding_client=CohereEmbedder(),
    embedding_dimensions=1024,  # Cohere v3 outputs 1024
    # ...
)

LLMClient¶

Python

from memv.protocols import LLMClient
from typing import TypeVar

T = TypeVar("T")


class MyLLM:
    async def generate(self, prompt: str) -> str:
        """Generate unstructured text response."""
        ...

    async def generate_structured(self, prompt: str, response_model: type[T]) -> T:
        """Generate structured response matching Pydantic model."""
        ...

generate_structured must return an instance of the given Pydantic model. This is used for episode generation and knowledge extraction where memv needs structured output.

Example: Anthropic (direct)¶

Python

import anthropic
import json


class AnthropicLLM:
    def __init__(self, model: str = "claude-sonnet-4-20250514"):
        self.client = anthropic.AsyncAnthropic()
        self.model = model

    async def generate(self, prompt: str) -> str:
        response = await self.client.messages.create(
            model=self.model,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}],
        )
        return response.content[0].text

    async def generate_structured(self, prompt: str, response_model: type[T]) -> T:
        schema = response_model.model_json_schema()
        response = await self.client.messages.create(
            model=self.model,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}],
            tools=[{"name": "output", "description": "Output", "input_schema": schema}],
            tool_choice={"type": "tool", "name": "output"},
        )
        data = response.content[0].input
        return response_model.model_validate(data)

Built-in Adapters¶

memv ships with two adapters that cover most use cases:

OpenAIEmbedAdapter¶

Python

from memv.embeddings import OpenAIEmbedAdapter

embedder = OpenAIEmbedAdapter(
    api_key=None,                          # Uses OPENAI_API_KEY env var
    model="text-embedding-3-small",        # Default model
)

PydanticAIAdapter¶

Multi-provider LLM via PydanticAI. Supports OpenAI, Anthropic, Google, Groq, and more.

Python

from memv.llm import PydanticAIAdapter

llm = PydanticAIAdapter("openai:gpt-4o-mini")
llm = PydanticAIAdapter("anthropic:claude-3-5-sonnet-latest")
llm = PydanticAIAdapter("google-gla:gemini-2.5-flash")
llm = PydanticAIAdapter("groq:llama-3.3-70b-versatile")

See PydanticAI models for the full provider list.