# Feather DB for Research Assistants: Memory That Accumulates Across Sessions

> A research assistant should know what you've read, what you've concluded, and which papers contradict each other — without repeating ingestion every session. Here's how to build one that accumulates knowledge with Feather DB.

- **Category**: Tutorial
- **Read time**: 8 min read
- **Date**: June 16, 2026
- **Author**: Feather DB (Engineering)
- **URL**: https://getfeather.store/theory/feather-db-research-assistant-memory

---

## The research assistant memory problem

Research assistants fail at the knowledge accumulation problem. You ingest 50 papers in session 1, ask questions, get answers. In session 2, you have to re-ingest the same papers or start fresh. And even within a session, the assistant doesn't know which papers contradict each other, which papers a key claim cites, or which open questions have accumulated evidence over time.

A Feather DB-backed research assistant solves this: papers are ingested once and persist. Edges link citation relationships and contradictions. Open research questions track evidence as it accumulates. And different paper types decay at different rates — foundational papers stay relevant indefinitely, preprints fade as they're superseded.

## Step 1: Paper ingestion with metadata

```python
import feather_db as fdb
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional

db = fdb.DB.open("research_memory.feather", dim=768)

@dataclass
class Paper:
    title: str
    abstract: str
    authors: List[str]
    year: int
    key_claims: List[str]
    venue: str  # "NeurIPS 2024", "arxiv", etc.
    arxiv_id: Optional[str] = None

def ingest_paper(paper: Paper, namespace: str, embed_fn) -> dict:
    """Ingest a paper and return memory IDs for edge linking."""
    mem_ids = {}

    # Determine half_life by venue
    if "arxiv" in paper.venue.lower():
        half_life = 90   # preprints: 3 months
        importance = 0.9
    elif paper.year >= datetime.now().year - 1:
        half_life = 365  # recent conference papers: 1 year
        importance = 1.2
    else:
        half_life = 730  # older papers: 2 years+ (foundational)
        importance = 1.5 if paper.year  {cited_title[:50]}")

def link_contradiction(paper_a_title: str, paper_b_title: str,
                        claim_a: str, claim_b: str,
                        namespace: str, db: fdb.DB, embed_fn):
    """Link two contradicting claims with a contradicts edge."""
    # Find specific claim memories
    results_a = db.search(
        embed_fn(f"[{paper_a_title}] {claim_a}"), k=1, namespace=namespace,
        filter={"type": "claim"}
    )
    results_b = db.search(
        embed_fn(f"[{paper_b_title}] {claim_b}"), k=1, namespace=namespace,
        filter={"type": "claim"}
    )

    if results_a and results_b:
        db.add_edge(results_a[0].id, results_b[0].id,
                    edge_type="contradicts")
        db.add_edge(results_b[0].id, results_a[0].id,
                    edge_type="contradicts")
        print(f"Contradiction linked between {paper_a_title[:30]} and {paper_b_title[:30]}")

```

## Step 3: Research question tracking

```python
def add_research_question(question: str, namespace: str, embed_fn):
    """Add an open research question to track evidence for."""
    vec = embed_fn(question)
    mem = db.add(vec, text=question,
                  namespace=namespace,
                  entity="research-questions")
    mem.meta.set_attribute("type", "open_question")
    mem.meta.set_attribute("importance", 2.0)
    mem.meta.set_attribute("evidence_count", 0)
    return mem

def add_evidence(question_text: str, evidence_text: str, paper_title: str,
                  namespace: str, embed_fn, supports: bool = True):
    """Link evidence from a paper to a research question."""
    q_vec = embed_fn(question_text)
    e_vec = embed_fn(evidence_text)

    # Find the question
    q_results = db.search(q_vec, k=1, namespace=namespace,
                           filter={"type": "open_question"})
    if not q_results:
        return None

    question_mem = q_results[0]
    evidence_mem = db.add(e_vec,
                           text=f"[{paper_title}] {evidence_text}",
                           namespace=namespace,
                           entity="evidence")
    evidence_mem.meta.set_attribute("type", "evidence")
    evidence_mem.meta.set_attribute("supports_question", supports)
    evidence_mem.meta.set_attribute("paper_title", paper_title)

    edge_type = "supports" if supports else "contradicts"
    db.add_edge(evidence_mem.id, question_mem.id, edge_type=edge_type)

    # Increment evidence count
    count = int(question_mem.meta.get_attribute("evidence_count") or 0)
    question_mem.meta.set_attribute("evidence_count", count + 1)

    return evidence_mem

```

## Step 4: context_chain for full paper context

```python
def research_query(query: str, namespace: str, embed_fn, k: int = 5):
    """Query the research memory and surface full citation context."""
    vec = embed_fn(query)

    # context_chain: find relevant papers + traverse citations + contradictions
    chain = db.context_chain(
        vec,
        k=k,
        namespace=namespace,
        max_depth=2,   # 2 hops = paper -> citation -> their citations
        half_life=365  # research stays relevant for a year
    )

    # Group results by type
    papers, claims, evidence, questions = [], [], [], []
    for mem in chain:
        t = mem.meta.get_attribute("type")
        if t == "abstract":      papers.append(mem)
        elif t == "claim":       claims.append(mem)
        elif t == "evidence":    evidence.append(mem)
        elif t == "open_question": questions.append(mem)

    return {"papers": papers, "claims": claims,
            "evidence": evidence, "questions": questions}

# Usage
results = research_query(
    query="Does attention mechanism benefit from sparse patterns?",
    namespace="phd-research",
    embed_fn=embed
)

print(f"Found {len(results['papers'])} relevant papers")
print(f"Found {len(results['claims'])} supporting/contradicting claims")
print(f"Found {len(results['questions'])} related open questions")

for paper_mem in results["papers"]:
    print(f"\n{paper_mem.meta.get_attribute('paper_title')} "
          f"({paper_mem.meta.get_attribute('year')}) "
          f"- score: {paper_mem.score:.3f}")
    print(f"  {paper_mem.text[:150]}...")

```

The result is a research assistant that gets smarter over time. Every paper you ingest becomes part of a knowledge graph. Every contradiction you identify is a permanent edge that will surface whenever either paper is retrieved. Every open question accumulates evidence from future ingested papers. And foundational papers — the ones you cite in every session — build stickiness through recall, making them effectively permanent fixtures in your knowledge base.

**Install:** `pip install feather-db` · **GitHub:** [github.com/feather-store/feather](https://github.com/feather-store/feather)

---

*This is the machine-readable mirror of the theory post at [getfeather.store/theory/feather-db-research-assistant-memory](https://getfeather.store/theory/feather-db-research-assistant-memory). For the full Feather DB documentation, see [getfeather.store/llms-full.txt](https://getfeather.store/llms-full.txt).*