Production Document Q&A with Feather DB: Beyond Naive Chunking
Naive RAG — chunk text, embed, retrieve — breaks in production. Documents have structure, sections have different importance, and user engagement signals which content is actually useful. Here's how to build Q&A that accounts for all of it.
Why naive chunking fails in production
The simplest RAG pipeline: split every document into 512-token chunks, embed each chunk, store in a vector DB, retrieve top-k for every query. It works in demos. In production, it fails in predictable ways:
- A footnote about a legal disclaimer gets the same importance as a key product specification in the main section
- Section headers — which carry the most navigational context — are either absorbed into adjacent chunks (losing their emphasis) or chunked alone (becoming useless short vectors)
- A chunk boundary cuts a numbered list in half, embedding an incomplete thought
- A user finds a chunk helpful and clicks "thumbs up" — but the system has no way to boost that chunk for future retrievals
Production doc Q&A needs adaptive chunking, metadata-rich ingestion, structural importance weights, cross-section linking, and feedback loops. Here's how to build it with Feather DB.
Step 1: Adaptive chunking strategy
import re
from dataclasses import dataclass
from typing import List
@dataclass
class Chunk:
text: str
section: str
chunk_type: str # "header", "body", "list", "footnote", "code"
position: float # 0.0 = document start, 1.0 = end
def adaptive_chunk(doc_text: str, doc_title: str) -> List[Chunk]:
"""Structure-aware chunking that respects document hierarchy."""
chunks = []
current_section = doc_title
lines = doc_text.split("\n")
buffer = []
buffer_type = "body"
def flush_buffer():
if buffer:
text = " ".join(" ".join(buffer).split()) # normalize whitespace
if len(text) > 30: # skip trivially short chunks
chunks.append(Chunk(
text=text,
section=current_section,
chunk_type=buffer_type,
position=len(chunks) / max(len(lines), 1)
))
buffer.clear()
for i, line in enumerate(lines):
stripped = line.strip()
# Markdown headers — always their own chunk
if re.match(r'^#{1,3}\s+', stripped):
flush_buffer()
current_section = re.sub(r'^#+\s+', '', stripped)
chunks.append(Chunk(
text=stripped,
section=current_section,
chunk_type="header",
position=i / len(lines)
))
buffer_type = "body"
# Code blocks — keep together
elif stripped.startswith("```"):
flush_buffer()
buffer_type = "code"
buffer.append(stripped)
# Footnotes (lines starting with * or [1])
elif re.match(r'^(\*|\[\d+\])', stripped):
flush_buffer()
buffer_type = "footnote"
buffer.append(stripped)
# List items — group consecutive lists
elif re.match(r'^[-*•]\s+', stripped) or re.match(r'^\d+\.\s+', stripped):
if buffer_type not in ("list", "body"):
flush_buffer()
buffer_type = "list"
buffer.append(stripped)
# Regular paragraph text
elif stripped:
if buffer_type not in ("body",):
flush_buffer()
buffer_type = "body"
buffer.append(stripped)
# Flush at natural paragraph breaks (~300 words)
if len(" ".join(buffer).split()) > 300:
flush_buffer()
elif buffer: # blank line = paragraph break
flush_buffer()
flush_buffer()
return chunks
Step 2: Metadata-rich ingestion
import feather_db as fdb
from datetime import datetime
IMPORTANCE_BY_TYPE = {
"header": 1.8, # headers = high navigational value
"body": 1.0, # standard body text
"list": 1.2, # lists are often key facts or steps
"code": 1.3, # code examples are high-value
"footnote": 0.5, # footnotes are supplementary
}
def ingest_document(doc_text: str, source_url: str, doc_title: str,
namespace: str, db: fdb.DB, embed_fn):
"""Ingest a document with structure-aware chunking and metadata."""
chunks = adaptive_chunk(doc_text, doc_title)
prev_mem = None
mems = []
for chunk in chunks:
vec = embed_fn(chunk.text)
importance = IMPORTANCE_BY_TYPE.get(chunk.chunk_type, 1.0)
mem = db.add(vec, text=chunk.text,
namespace=namespace,
entity=f"doc:{doc_title[:50]}")
mem.meta.set_attribute("source", source_url)
mem.meta.set_attribute("section", chunk.section)
mem.meta.set_attribute("chunk_type", chunk.chunk_type)
mem.meta.set_attribute("doc_title", doc_title)
mem.meta.set_attribute("importance", importance)
mem.meta.set_attribute("position", chunk.position)
mem.meta.set_attribute("ingested_at", datetime.utcnow().isoformat())
mem.meta.set_attribute("thumbs_up", 0)
# Link consecutive chunks: each chunk leads_to the next
if prev_mem is not None:
db.add_edge(prev_mem.id, mem.id, edge_type="leads_to")
mems.append(mem)
prev_mem = mem
return mems
Step 3: Cross-section edge linking
def link_related_sections(mems: list, db: fdb.DB, embed_fn,
similarity_threshold: float = 0.82):
"""Add 'refines' edges between semantically similar sections."""
# Compare each pair of header chunks
headers = [m for m in mems
if m.meta.get_attribute("chunk_type") == "header"]
for i, h1 in enumerate(headers):
for h2 in headers[i+1:]:
# Compute similarity between section headers
v1 = embed_fn(h1.text)
v2 = embed_fn(h2.text)
sim = fdb.cosine_similarity(v1, v2)
if sim > similarity_threshold:
db.add_edge(h1.id, h2.id, edge_type="refines")
Step 4: Hybrid search for precise retrieval
def search_documents(query: str, namespace: str, db: fdb.DB, embed_fn,
k: int = 6, doc_title: str = None):
"""Hybrid search with optional document scope."""
query_vec = embed_fn(query)
entity = f"doc:{doc_title[:50]}" if doc_title else None
# Hybrid search: exact terms (version numbers, names) + semantic
results = db.hybrid_search(
query_text=query,
query_vec=query_vec,
k=k,
namespace=namespace,
entity=entity,
half_life=180 # documents stay relevant for months
)
return results
def search_with_context_chain(query: str, namespace: str, db: fdb.DB,
embed_fn, k: int = 4):
"""Search and follow edges to surface full section context."""
query_vec = embed_fn(query)
chain = db.context_chain(
query_vec,
k=k,
namespace=namespace,
max_depth=2,
half_life=180
)
return chain
Step 5: Importance boost from user feedback
def record_thumbs_up(mem_id: int, db: fdb.DB):
"""Boost importance when a user marks a result as helpful."""
mem = db.get(mem_id)
current_importance = float(mem.meta.get_attribute("importance") or 1.0)
current_thumbs = int(mem.meta.get_attribute("thumbs_up") or 0)
# Each thumbs up adds 0.2 importance, capped at 3.0
new_importance = min(current_importance + 0.2, 3.0)
mem.meta.set_attribute("importance", new_importance)
mem.meta.set_attribute("thumbs_up", current_thumbs + 1)
# Also increment recall count to resist decay
db.update_recall(mem_id)
def record_thumbs_down(mem_id: int, db: fdb.DB):
"""Reduce importance when a user marks a result as unhelpful."""
mem = db.get(mem_id)
current_importance = float(mem.meta.get_attribute("importance") or 1.0)
new_importance = max(current_importance - 0.3, 0.1)
mem.meta.set_attribute("importance", new_importance)
Putting it together
# Initialize
db = fdb.DB.open("docs.feather", dim=768)
# Ingest a document
with open("product_manual.md") as f:
content = f.read()
mems = ingest_document(
doc_text=content,
source_url="https://docs.example.com/manual",
doc_title="Product Manual v2.3",
namespace="tenant-acme",
db=db,
embed_fn=embed
)
link_related_sections(mems, db, embed)
# Search at query time
results = search_documents(
query="How do I reset the authentication token?",
namespace="tenant-acme",
db=db,
embed_fn=embed
)
for r in results:
section = r.meta.get_attribute("section")
ctype = r.meta.get_attribute("chunk_type")
importance = r.meta.get_attribute("importance")
print(f"[{ctype}] [{section}] score={r.score:.3f} importance={importance}")
print(f" {r.text[:100]}...")
# User marks result as helpful
if results:
record_thumbs_up(results[0].id, db)
The difference from naive chunking: your chunks reflect document structure, importance weights amplify high-value sections, cross-section edges let context_chain surface related content, hybrid search handles exact version/feature name queries, and thumbs-up feedback continuously improves retrieval quality. Each of these is a few lines of code on top of Feather DB — but together they're the difference between a demo and a production system.
Install: pip install feather-db · GitHub: github.com/feather-store/feather