The biggest limitation of most AI agents isn't intelligence—it's amnesia. Every conversation starts from scratch. Every interaction loses context. Your agent can write code, analyze data, and reason through complex problems, but it can't remember that the user prefers TypeScript, dislikes verbose output, or is working on a project called "Atlas."
This guide walks through building an AI agent with true persistent memory using Smara's API.
Consider a customer support agent. A user contacts support three times over a month:
Without persistent memory, the agent on Day 25 has no idea this is a recurring problem. With memory, it can say: "I see this billing issue happened before. Let me escalate to investigate the root cause rather than applying another temporary fix."
Before each LLM call, your agent queries Smara for relevant memories and injects them into the system prompt. After each conversation, your agent extracts and stores new facts.
Sign up at smara.io to get an API key, or self-host:
# Self-host option
docker run -d \
-e DATABASE_URL="postgresql://user:pass@host:5432/smara" \
-e VOYAGE_API_KEY="your-voyage-key" \
-p 3010:3010 \
ghcr.io/parallelromb/smara:latest
Install the Python SDK:
pip install smara
from smara import Smara
client = Smara(
api_key="smara_your_api_key_here",
base_url="https://api.smara.io"
)
# Store a simple fact
result = client.store(
user_id="user-123",
fact="Prefers Python over JavaScript for backend work",
importance=0.7
)
print(result)
# {"action": "stored", "id": "mem-abc-123"}
# Core identity fact - 10-day half-life
client.store("user-123", "Is a senior backend engineer at Stripe", importance=1.0)
# Active project - 7-day half-life
client.store("user-123", "Currently building a payment reconciliation service", importance=0.7)
# Passing mention - 2-day half-life
client.store("user-123", "Had a meeting with the team today", importance=0.2)
# First store
client.store("user-123", "Lives in New York")
# {"action": "stored", "id": "mem-1"}
# Duplicate detection (cosine ≥ 0.985)
client.store("user-123", "Lives in New York City")
# {"action": "duplicate", "id": "mem-1"}
# Contradiction handling (cosine 0.94-0.985)
client.store("user-123", "Lives in San Francisco")
# {"action": "replaced", "id": "mem-2", "replaced_id": "mem-1"}
results = client.search(
user_id="user-123",
q="cloud infrastructure preferences",
limit=5,
namespace="infrastructure"
)
for r in results:
print(f"[{r.score:.3f}] {r.fact}")
print(f" similarity: {r.similarity}, decay: {r.decay_score}")
import requests
import anthropic
response = requests.get(
"https://api.smara.io/v1/users/user-123/context",
headers={"Authorization": "Bearer smara_..."},
params={"q": "coding preferences", "top_n": 5}
)
memory_context = response.json()["context"]
# Inject directly into LLM call
client_anthropic = anthropic.Anthropic()
response = client_anthropic.messages.create(
model="claude-sonnet-4-20250514",
system=f"""You are a helpful coding assistant.
Here is what you know about this user:
{memory_context}
Use this context to personalize your responses.""",
messages=[{"role": "user", "content": "Help me set up a new API project"}]
)
When you have multiple agents, each one can maintain its own memory scope:
# Create an agent
agent = requests.post(f"{BASE}/v1/agents", headers=HEADERS, json={
"name": "CodeAssist",
"description": "A coding assistant with persistent memory",
"owner_id": "developer-sri",
"model": "claude-sonnet-4-20250514",
}).json()
# Store memory scoped to this agent
requests.post(f"{BASE}/v1/agents/{agent['id']}/memories", headers=HEADERS, json={
"user_id": "user-123",
"fact": "Prefers functional programming patterns",
"importance": 0.8
})
# Search only this agent's memories
results = requests.get(
f"{BASE}/v1/agents/{agent['id']}/memories",
headers=HEADERS,
params={"user_id": "user-123", "q": "programming style"}
).json()
Flat memory storage misses relationships between facts. Smara's graph memory lets you create typed, weighted edges:
# Store related memories
mem1 = client.store("user-123", "Works at Stripe on payment systems")
mem2 = client.store("user-123", "Building a payment reconciliation service")
mem3 = client.store("user-123", "Needs to handle idempotency in payment retries")
# Connect them
requests.post(f"{BASE}/v1/graph/connect", headers=HEADERS, json={
"from_memory_id": mem1["id"],
"to_memory_id": mem2["id"],
"relationship_type": "context_for",
"weight": 0.9
})
# Traverse the graph
graph = requests.get(
f"{BASE}/v1/graph/traverse/{mem1['id']}",
headers=HEADERS,
params={"depth": 3}
).json()
for node in graph["nodes"]:
print(f"[depth {node['depth']}] {node['fact']}")
# Store in different namespaces
client.store("user-123", "Prefers dark mode", namespace="preferences")
client.store("user-123", "Sprint 42 ends Friday", namespace="work")
client.store("user-123", "Allergic to shellfish", namespace="personal")
# Search within a specific namespace
work_results = client.search("user-123", q="current sprint", namespace="work")
# Create a team
team = requests.post(f"{BASE}/v1/teams", headers=HEADERS, json={
"name": "Engineering",
"slug": "engineering",
"user_id": "sri"
}).json()
# Store a team-visible memory
requests.post(f"{BASE}/v1/memories", headers=HEADERS, json={
"user_id": "sri",
"fact": "The API migration deadline is May 15",
"team_id": team["id"],
"visibility": "team",
"importance": 0.9
})
# Any team member can search team memories
results = requests.get(f"{BASE}/v1/memories/search", headers=HEADERS, params={
"user_id": "agent-codeassist",
"q": "migration deadline",
"team_id": team["id"],
"include_team": "true"
}).json()
Here's a complete agent loop with persistent memory:
import anthropic
import requests
SMARA_KEY = "smara_..."
SMARA_BASE = "https://api.smara.io"
HEADERS = {"Authorization": f"Bearer {SMARA_KEY}"}
claude = anthropic.Anthropic()
def get_memory_context(user_id: str, message: str) -> str:
"""Retrieve relevant memories for the current conversation."""
response = requests.get(
f"{SMARA_BASE}/v1/users/{user_id}/context",
headers=HEADERS,
params={"q": message, "top_n": 10}
)
return response.json().get("context", "No previous context.")
def extract_and_store_facts(user_id: str, conversation: str):
"""Use Claude to extract storable facts from the conversation."""
extraction = claude.messages.create(
model="claude-haiku-4-20250414",
system="Extract discrete facts about the user. "
"Return each on a new line. Rate importance 0.0-1.0 after |",
messages=[{"role": "user", "content": conversation}]
)
facts = extraction.content[0].text.strip().split("\n")
for line in facts:
if "|" not in line: continue
fact, imp = line.rsplit("|", 1)
requests.post(f"{SMARA_BASE}/v1/memories", headers=HEADERS, json={
"user_id": user_id,
"fact": fact.strip(),
"importance": min(max(float(imp.strip()), 0.0), 1.0),
"source": "conversation-extraction"
})
def agent_respond(user_id: str, user_message: str) -> str:
"""Main agent loop: retrieve, respond, store."""
memory_context = get_memory_context(user_id, user_message)
response = claude.messages.create(
model="claude-sonnet-4-20250514",
system=f"""You are a helpful assistant with persistent memory.
Known facts about this user:
{memory_context}""",
messages=[{"role": "user", "content": user_message}]
)
assistant_message = response.content[0].text
extract_and_store_facts(user_id, f"User: {user_message}\nAssistant: {assistant_message}")
return assistant_message
Start building agents with persistent memory. Free tier: 100 memories, 1 agent.
Try Smara Free →