Building a Coding Assistant with Persistent Memory Using Feather DB

What a memory-backed coding assistant looks like

A standard coding assistant knows only what's in the current context window. You explain the project architecture every session, re-paste the same files, repeat the same constraints. A memory-backed assistant knows your project. It remembers that you chose PostgreSQL over MongoDB in January because of ACID requirements, that you've had three separate debugging sessions around the ORM's connection pool, and that the architecture decision to use event sourcing was made during a specific refactor.

This tutorial builds that assistant using Feather DB and the Anthropic Python SDK.

Step 1: Project context ingestion

Start by ingesting your project's static context: README, architecture docs, and past decision records. These get high importance and long half_life — architecture decisions are relevant for months.

import os
import feather_db as fdb
from anthropic import Anthropic

# Initialize Feather DB
db = fdb.DB.open("project_memory.feather", dim=768)
client = Anthropic()

def embed(text: str) -> list:
    """Embed text using your preferred provider."""
    # Using Voyage AI via feather-serve, or directly:
    import voyageai
    vo = voyageai.Client()
    result = vo.embed([text], model="voyage-3")
    return result.embeddings[0]

def ingest_document(path: str, doc_type: str, importance: float = 1.0):
    """Chunk and ingest a document file."""
    with open(path) as f:
        content = f.read()

    # Simple paragraph-level chunking
    chunks = [c.strip() for c in content.split("\n\n") if len(c.strip()) > 50]

    mems = []
    for i, chunk in enumerate(chunks):
        vec = embed(chunk)
        mem = db.add(vec, text=chunk,
                     namespace="project-myapp",
                     entity=doc_type)
        mem.meta.set_attribute("source", path)
        mem.meta.set_attribute("chunk_index", i)
        mem.meta.set_attribute("importance", importance)
        mems.append(mem)
    return mems

# Ingest project context — high importance, searched often so stickiness builds
arch_mems = ingest_document("ARCHITECTURE.md", "architecture", importance=1.8)
readme_mems = ingest_document("README.md", "overview", importance=1.2)

print(f"Ingested {len(arch_mems) + len(readme_mems)} context chunks")

Step 2: Conversation memory with write-back

Each assistant turn generates new knowledge: decisions made, code patterns discussed, bugs discovered. Write these back to the memory store.

def save_conversation_memory(user_msg: str, assistant_msg: str,
                              memory_type: str = "conversation"):
    """Distill and save a conversation turn to memory."""
    # Distill the key takeaway from the exchange
    distill_prompt = f"""Extract the key technical fact, decision, or insight from this exchange.
Write one clear sentence. No preamble.

User: {user_msg}
Assistant: {assistant_msg[:500]}"""

    distilled = client.messages.create(
        model="claude-opus-4-5",
        max_tokens=100,
        messages=[{"role": "user", "content": distill_prompt}]
    ).content[0].text.strip()

    if len(distilled) > 20:  # skip trivial exchanges
        vec = embed(distilled)
        mem = db.add(vec, text=distilled,
                     namespace="project-myapp",
                     entity="decisions" if memory_type == "decision" else "conversations")
        mem.meta.set_attribute("type", memory_type)
        mem.meta.set_attribute("importance", 1.5 if memory_type == "decision" else 1.0)
        return mem
    return None

Step 3: Linking related memories

Decisions are connected to their rationale and resulting code changes. Use Feather DB's edge types to wire these relationships.

def record_decision(decision_text: str, rationale_text: str,
                    related_code_desc: str = None):
    """Record an architecture decision with its rationale and code impact."""
    vec_d = embed(decision_text)
    vec_r = embed(rationale_text)

    # Add the decision
    decision_mem = db.add(vec_d, text=decision_text,
                          namespace="project-myapp",
                          entity="decisions")
    decision_mem.meta.set_attribute("type", "architecture_decision")
    decision_mem.meta.set_attribute("importance", 2.0)

    # Add the rationale
    rationale_mem = db.add(vec_r, text=rationale_text,
                           namespace="project-myapp",
                           entity="decisions")
    rationale_mem.meta.set_attribute("type", "rationale")

    # Link: decision leads_to rationale
    db.add_edge(decision_mem.id, rationale_mem.id, edge_type="leads_to")

    if related_code_desc:
        vec_c = embed(related_code_desc)
        code_mem = db.add(vec_c, text=related_code_desc,
                          namespace="project-myapp",
                          entity="code-changes")
        # Decision causes the code change
        db.add_edge(decision_mem.id, code_mem.id, edge_type="causes")

    return decision_mem

# Example: recording the DB choice
record_decision(
    "Chose PostgreSQL over MongoDB for the primary datastore.",
    "ACID guarantees required for financial transaction records. MongoDB's eventual consistency model was incompatible with audit requirements.",
    "Migrated user_transactions table to use pg via psycopg2 with connection pooling."
)

Step 4: Querying with context_chain

context_chain() combines ANN search with BFS graph traversal. For a query about a decision, it returns the decision plus its rationale, related code changes, and superseded alternatives — the full context trail.

def get_relevant_context(query: str, k: int = 8) -> str:
    """Retrieve relevant memories + their connected context."""
    vec = embed(query)

    # context_chain: ANN to find seeds, then BFS to follow edges
    chain = db.context_chain(
        vec,
        k=k,
        namespace="project-myapp",
        max_depth=2,   # traverse up to 2 hops from each seed
        half_life=90   # architecture context stays relevant for months
    )

    if not chain:
        return "No relevant context found."

    context_parts = []
    for mem in chain:
        context_parts.append(f"- [{mem.meta.get_attribute('type', 'memory')}] {mem.text}")

    return "\n".join(context_parts)

Step 5: The assistant loop

def coding_assistant(user_input: str, conversation_history: list) -> str:
    """Main assistant loop with memory retrieval and write-back."""
    # Retrieve relevant context from memory
    context = get_relevant_context(user_input)

    # Build the system prompt with retrieved context
    system = f"""You are a coding assistant with memory of this project.

Relevant context from project memory:
{context}

Use this context to give accurate, consistent answers that align with past decisions.
If a past decision is relevant, reference it explicitly."""

    # Add user message to history
    conversation_history.append({"role": "user", "content": user_input})

    # Call Claude
    response = client.messages.create(
        model="claude-opus-4-5",
        max_tokens=2000,
        system=system,
        messages=conversation_history
    )
    assistant_response = response.content[0].text

    # Add assistant response to history
    conversation_history.append({"role": "assistant", "content": assistant_response})

    # Write-back: save the distilled insight from this turn
    # Detect if a decision was made
    is_decision = any(kw in user_input.lower() or kw in assistant_response.lower()
                      for kw in ["decided", "chose", "architecture", "tradeoff", "going with"])
    save_conversation_memory(user_input, assistant_response,
                              memory_type="decision" if is_decision else "conversation")

    return assistant_response

# Run the assistant
history = []
while True:
    user_input = input("You: ").strip()
    if user_input.lower() in ["exit", "quit"]:
        break
    response = coding_assistant(user_input, history)
    print(f"Assistant: {response}\n")

Step 6: Decay tuning for code contexts

Different types of code knowledge decay at different rates. Tune half_life per entity type:

HALF_LIVES = {
    "architecture": 365,   # architecture decisions stay relevant for a year+
    "decisions": 180,      # explicit decisions: 6 months
    "conversations": 30,   # general Q&A: 30 days
    "debugging": 14,       # debugging sessions: 2 weeks (bugs get fixed)
    "code-changes": 60,    # code change context: 2 months
}

def search_by_entity(query: str, entity: str, k: int = 5):
    vec = embed(query)
    half_life = HALF_LIVES.get(entity, 30)
    return db.search(vec, k=k,
                     namespace="project-myapp",
                     entity=entity,
                     half_life=half_life)

The result is an assistant that becomes more useful the longer you use it. Architecture decisions made six months ago surface when relevant. Debugging sessions from last week appear when you hit a similar issue. The conversation history isn't a flat log — it's a weighted, decaying knowledge graph that understands what mattered and for how long.

Install: pip install feather-db anthropic voyageai · GitHub: github.com/feather-store/feather