Building a Persona Context Engine: Character Memory That Evolves Over Sessions

What a persona engine needs

A character-based AI — a branded persona, a fictional companion, a customer-facing agent with consistent personality — needs to know its users across sessions. Not just what they said last time, but who they are: their preferences, their context, their relationship with the persona, and what they've shared over multiple interactions.

This is harder than plain conversation memory because different types of knowledge should decay at different rates. That a user prefers brief responses (stated explicitly, session 1) should be permanent. That the user was anxious about a job interview (session 14) should fade once time passes. That the user's name is Alex should be immortal. A flat conversation log treats all of these equally — a context engine doesn't.

Step 1: Persona memory schema

Define the memory categories and their decay parameters upfront:

import feather_db as fdb
from datetime import datetime
from anthropic import Anthropic

db = fdb.DB.open("persona_memory.feather", dim=768)
client = Anthropic()

# Memory schema: entity names and their half_life configurations
MEMORY_SCHEMA = {
    "core-identity": {
        "half_life": 3650,  # 10 years — practically permanent
        "importance": 2.5,
        "desc": "Name, location, core facts the user has explicitly stated"
    },
    "stated-preferences": {
        "half_life": 365,   # 1 year — stable but can evolve
        "importance": 2.0,
        "desc": "Explicitly stated preferences (response style, topics, tone)"
    },
    "inferred-preferences": {
        "half_life": 90,    # 3 months — may not be accurate
        "importance": 1.2,
        "desc": "Preferences inferred from behavior, not stated directly"
    },
    "relationship-state": {
        "half_life": 180,   # 6 months
        "importance": 1.8,
        "desc": "How the user feels about the persona, trust level, rapport"
    },
    "current-context": {
        "half_life": 14,    # 2 weeks — situational
        "importance": 1.0,
        "desc": "What the user is going through right now"
    },
    "session-notes": {
        "half_life": 7,     # 1 week — ephemeral
        "importance": 0.8,
        "desc": "Details from recent sessions that may not carry forward"
    },
}

def add_persona_memory(user_id: str, persona_id: str, text: str,
                        category: str, source: str = "inferred"):
    """Add a memory for a specific user-persona relationship."""
    schema = MEMORY_SCHEMA.get(category, MEMORY_SCHEMA["session-notes"])
    vec = embed(text)

    mem = db.add(vec, text=text,
                 namespace=f"{persona_id}::{user_id}",
                 entity=category)
    mem.meta.set_attribute("importance", schema["importance"])
    mem.meta.set_attribute("source", source)
    mem.meta.set_attribute("category", category)
    mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
    return mem

Step 2: Write-back after each session

After each user session, extract and save structured memories from the conversation.

EXTRACTION_PROMPT = """Analyze this conversation and extract memories to save.
For each memory, output a JSON line: {{"text": "...", "category": "...", "source": "stated|inferred"}}

Categories:
- core-identity: name, location, permanent facts explicitly stated
- stated-preferences: explicitly stated preferences about response style, topics
- inferred-preferences: preferences inferred from behavior
- relationship-state: how the user feels about this conversation/assistant
- current-context: what the user is going through right now
- session-notes: ephemeral details from this session

Conversation:
{conversation}

Output only JSON lines, one per memory. No other text."""

def extract_and_save_memories(user_id: str, persona_id: str,
                               conversation: list) -> list:
    """Extract memories from a conversation and save them."""
    conv_text = "\n".join(
        f"{m['role'].title()}: {m['content']}" for m in conversation
    )

    response = client.messages.create(
        model="claude-opus-4-5",
        max_tokens=1000,
        messages=[{"role": "user",
                    "content": EXTRACTION_PROMPT.format(conversation=conv_text)}]
    )

    import json
    saved = []
    for line in response.content[0].text.strip().split("\n"):
        line = line.strip()
        if not line:
            continue
        try:
            mem_data = json.loads(line)
            if "text" in mem_data and "category" in mem_data:
                mem = add_persona_memory(
                    user_id, persona_id,
                    text=mem_data["text"],
                    category=mem_data["category"],
                    source=mem_data.get("source", "inferred")
                )
                saved.append(mem)
        except json.JSONDecodeError:
            continue

    return saved

Step 3: Importance weighting by confidence

def add_with_confidence(user_id: str, persona_id: str, text: str,
                         category: str, confidence: str):
    """Add memory with importance scaled by confidence level."""
    base_importance = MEMORY_SCHEMA.get(category, {}).get("importance", 1.0)
    confidence_multipliers = {
        "explicit": 1.0,    # User directly stated this
        "confirmed": 0.9,   # User confirmed when asked
        "inferred": 0.7,    # We inferred from behavior
        "speculative": 0.4, # We guessed from limited signal
    }
    multiplier = confidence_multipliers.get(confidence, 0.7)
    final_importance = base_importance * multiplier

    schema = MEMORY_SCHEMA.get(category, MEMORY_SCHEMA["session-notes"])
    vec = embed(text)
    mem = db.add(vec, text=text,
                 namespace=f"{persona_id}::{user_id}",
                 entity=category)
    mem.meta.set_attribute("importance", final_importance)
    mem.meta.set_attribute("confidence", confidence)
    return mem

Step 4: The persona session loop

def run_persona_session(user_id: str, persona_id: str,
                         persona_name: str, user_query: str,
                         conversation_history: list) -> str:
    """Full persona session: recall, respond, write-back."""
    # Recall relevant memories across all categories
    query_vec = embed(user_query)
    namespace = f"{persona_id}::{user_id}"

    # Use context_chain to surface memories + their connected context
    chain = db.context_chain(
        query_vec,
        k=8,
        namespace=namespace,
        max_depth=2
    )

    # Format memories for the system prompt
    memory_sections = {}
    for mem in chain:
        cat = mem.meta.get_attribute("category") or "session-notes"
        if cat not in memory_sections:
            memory_sections[cat] = []
        memory_sections[cat].append(mem.text)

    memory_text = ""
    for cat, items in memory_sections.items():
        schema_desc = MEMORY_SCHEMA.get(cat, {}).get("desc", cat)
        memory_text += f"\n{schema_desc}:\n"
        for item in items:
            memory_text += f"  - {item}\n"

    system_prompt = f"""You are {persona_name}, a consistent persona with memory.

What you know about this user:
{memory_text if memory_text.strip() else 'No prior memories — this may be a first interaction.'}

Be consistent with what you know. Reference memories naturally when relevant.
Do not list memories — weave them into your response as a person would."""

    # Add user message and call the model
    conversation_history.append({"role": "user", "content": user_query})
    response = client.messages.create(
        model="claude-opus-4-5",
        max_tokens=800,
        system=system_prompt,
        messages=conversation_history
    )
    reply = response.content[0].text
    conversation_history.append({"role": "assistant", "content": reply})

    # Write-back after the turn
    extract_and_save_memories(user_id, persona_id, conversation_history[-4:])

    return reply

# Example usage
history = []
user_id = "user-alex-42"
persona_id = "aria-assistant"

# Seed core identity if first session
add_persona_memory(user_id, persona_id,
    text="User's name is Alex.",
    category="core-identity",
    source="stated")

reply = run_persona_session(
    user_id, persona_id,
    persona_name="Aria",
    user_query="I'm stressed about the product launch next week.",
    conversation_history=history
)
print(reply)

Step 5: Stickiness for core personality vs session details

The stickiness mechanism (recall count increasing effective half_life) works naturally with this schema: core identity and stated preferences get recalled every session because they're always relevant, so their stickiness builds rapidly. Session notes are rarely recalled across sessions, so they decay at their natural rate. You don't need to manage this explicitly — the retrieval pattern itself creates the right stickiness gradient.

After 20 sessions with a user, their name and stated preferences will have been recalled 20+ times (stickiness ≈ 3.2), giving them an effective half_life of over 10 years for a 3650-day half_life — practically immortal. A one-time anxious comment from session 3 will have been recalled 0–1 times, decaying normally at its 14-day half_life. The persona remembers what it's used for remembering.

Install: pip install feather-db anthropic · GitHub: github.com/feather-store/feather