Upgrading from LangChain ConversationBufferMemory to Feather DB
ConversationBufferMemory grows unbounded, has no decay, and no graph. Here's a migration guide with a FeatherMemory drop-in class, side-by-side comparison, and performance numbers showing what you gain.
Why ConversationBufferMemory fails at scale
LangChain's ConversationBufferMemory is fine for demos. In production, it hits three walls:
- Grows unbounded: Every message is appended to the buffer. After 50 exchanges, you're passing 10,000+ tokens to the LLM on every call. At 100 exchanges, you've likely blown the context window or are spending $0.20 per query on tokens that are mostly irrelevant to the current question.
- No decay: A factual error from turn 2 gets as much weight as a correction from turn 48. An outdated preference sits alongside a new one with equal authority. The buffer has no way to indicate that some information is more reliable or more recent.
- No graph: If the user says "remember what I said about the API" — the buffer doesn't know that "the API" refers to the decision made in turn 15. Everything is flat text with no structure.
Feather DB solves all three: vector search retrieves only relevant context (not the full history), adaptive scoring handles decay and recency, and edges link related memories into a traversable graph.
The FeatherMemory drop-in class
We'll implement FeatherMemory as a BaseChatMemory subclass, making it a drop-in for ConversationBufferMemory in any LangChain chain:
from langchain.memory.chat_memory import BaseChatMemory
from langchain.schema import BaseMessage, HumanMessage, AIMessage
from langchain.schema.messages import get_buffer_string
from typing import Any, Dict, List
import feather_db as fdb
from datetime import datetime
class FeatherMemory(BaseChatMemory):
"""
LangChain BaseChatMemory backed by Feather DB.
Drop-in replacement for ConversationBufferMemory.
Retrieves semantically relevant history instead of the full buffer.
"""
db: Any = None
embed_fn: Any = None
namespace: str = "default"
k: int = 6 # number of memories to retrieve
half_life: int = 30 # decay in days
memory_key: str = "chat_history"
return_messages: bool = True
class Config:
arbitrary_types_allowed = True
def __init__(self, db_path: str, embed_fn, namespace: str = "default",
k: int = 6, half_life: int = 30, **kwargs):
super().__init__(**kwargs)
self.db = fdb.DB.open(db_path, dim=768)
self.embed_fn = embed_fn
self.namespace = namespace
self.k = k
self.half_life = half_life
@property
def memory_variables(self) -> List[str]:
return [self.memory_key]
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""
Called by LangChain to load context before the LLM call.
Returns the k most relevant past messages.
"""
# Extract the current query from inputs
query = inputs.get("input") or inputs.get("question") or ""
if not query:
return {self.memory_key: []}
vec = self.embed_fn(query)
results = self.db.context_chain(
vec,
k=self.k,
namespace=self.namespace,
max_depth=1,
half_life=self.half_life
)
# Reconstruct as LangChain messages
messages = []
for mem in results:
role = mem.meta.get_attribute("role") or "human"
if role == "human":
messages.append(HumanMessage(content=mem.text))
else:
messages.append(AIMessage(content=mem.text))
if self.return_messages:
return {self.memory_key: messages}
return {self.memory_key: get_buffer_string(messages)}
def save_context(self, inputs: Dict[str, Any],
outputs: Dict[str, str]) -> None:
"""
Called by LangChain after the LLM call.
Saves the human input and AI output to Feather DB.
"""
human_text = inputs.get("input") or inputs.get("question") or ""
ai_text = outputs.get("output") or outputs.get("answer") or ""
if human_text:
vec = self.embed_fn(human_text)
mem = self.db.add(vec, text=human_text,
namespace=self.namespace,
entity="conversation")
mem.meta.set_attribute("role", "human")
mem.meta.set_attribute("importance", 1.0)
mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
human_id = mem.id
if ai_text:
vec = self.embed_fn(ai_text)
mem = self.db.add(vec, text=ai_text,
namespace=self.namespace,
entity="conversation")
mem.meta.set_attribute("role", "ai")
mem.meta.set_attribute("importance", 1.0)
mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
# Link: human message leads_to AI response
if human_text:
self.db.add_edge(human_id, mem.id, edge_type="leads_to")
def clear(self) -> None:
"""Clear all memories in this namespace."""
# Note: clears only this namespace's memories
self.db.delete_namespace(self.namespace)
Side-by-side: ConversationBufferMemory vs FeatherMemory in a chain
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# BEFORE: ConversationBufferMemory
buffer_chain = ConversationChain(
llm=llm,
memory=ConversationBufferMemory(return_messages=True)
)
# AFTER: FeatherMemory — one-line swap
from langchain_openai import OpenAIEmbeddings
oai_embed = OpenAIEmbeddings(model="text-embedding-3-small")
def embed(text: str) -> list:
return oai_embed.embed_query(text)
feather_chain = ConversationChain(
llm=llm,
memory=FeatherMemory(
db_path="conversation.feather",
embed_fn=embed,
namespace="user-session-1",
k=6,
half_life=30
)
)
# Both chains have the same interface
buffer_chain.predict(input="My name is Alex and I work on a fintech startup.")
feather_chain.predict(input="My name is Alex and I work on a fintech startup.")
Performance comparison
| Metric | ConversationBufferMemory | FeatherMemory |
|---|---|---|
| Tokens per LLM call (at turn 50) | ~8,000–15,000 | ~400–800 |
| Tokens per LLM call (at turn 200) | Context limit exceeded | ~400–800 (stable) |
| Memory retrieval latency | 0ms (in-memory dict) | ~1–3ms (ANN search) |
| Relevance of retrieved context | All history (mostly irrelevant) | Semantically matched |
| Handles stale information | No | Yes (adaptive decay) |
| Persists across sessions | No (in-memory) | Yes (.feather file) |
| Cost at 1000 turns (gpt-4o-mini) | ~$1.20+ (growing) | ~$0.04 (stable) |
Advanced: boosting important memories
# After saving a memory, boost its importance if it's a key fact
def save_important_fact(chain_memory: FeatherMemory, fact: str,
importance: float = 2.0):
"""Save a high-importance memory that should always surface."""
vec = chain_memory.embed_fn(fact)
mem = chain_memory.db.add(
vec, text=fact,
namespace=chain_memory.namespace,
entity="key-facts"
)
mem.meta.set_attribute("role", "system")
mem.meta.set_attribute("importance", importance)
mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
return mem
# Usage: save the user's name as a permanent high-importance fact
memory = feather_chain.memory
save_important_fact(memory, "User's name is Alex.", importance=2.5)
save_important_fact(memory, "Alex works at a fintech startup on a FastAPI backend.",
importance=2.0)
The migration cost is low: implement FeatherMemory once, swap it in wherever you use ConversationBufferMemory, and you get persistent cross-session memory, adaptive decay, graph structure, and a token cost that stays flat regardless of how long the conversation has been running. The 1–3ms retrieval latency is the only trade-off — and it's invisible in any LLM-latency-dominated application.
Install: pip install feather-db langchain langchain-openai · GitHub: github.com/feather-store/feather