# Production Document Q&A with Feather DB: Beyond Naive Chunking > Naive RAG — chunk text, embed, retrieve — breaks in production. Documents have structure, sections have different importance, and user engagement signals which content is actually useful. Here's how to build Q&A that accounts for all of it. - **Category**: Tutorial - **Read time**: 10 min read - **Date**: June 16, 2026 - **Author**: Feather DB (Engineering) - **URL**: https://getfeather.store/theory/feather-db-document-qa-production --- ## Why naive chunking fails in production The simplest RAG pipeline: split every document into 512-token chunks, embed each chunk, store in a vector DB, retrieve top-k for every query. It works in demos. In production, it fails in predictable ways: - A footnote about a legal disclaimer gets the same importance as a key product specification in the main section - Section headers — which carry the most navigational context — are either absorbed into adjacent chunks (losing their emphasis) or chunked alone (becoming useless short vectors) - A chunk boundary cuts a numbered list in half, embedding an incomplete thought - A user finds a chunk helpful and clicks "thumbs up" — but the system has no way to boost that chunk for future retrievals Production doc Q&A needs adaptive chunking, metadata-rich ingestion, structural importance weights, cross-section linking, and feedback loops. Here's how to build it with Feather DB. ## Step 1: Adaptive chunking strategy ```python import re from dataclasses import dataclass from typing import List @dataclass class Chunk: text: str section: str chunk_type: str # "header", "body", "list", "footnote", "code" position: float # 0.0 = document start, 1.0 = end def adaptive_chunk(doc_text: str, doc_title: str) -> List[Chunk]: """Structure-aware chunking that respects document hierarchy.""" chunks = [] current_section = doc_title lines = doc_text.split("\n") buffer = [] buffer_type = "body" def flush_buffer(): if buffer: text = " ".join(" ".join(buffer).split()) # normalize whitespace if len(text) > 30: # skip trivially short chunks chunks.append(Chunk( text=text, section=current_section, chunk_type=buffer_type, position=len(chunks) / max(len(lines), 1) )) buffer.clear() for i, line in enumerate(lines): stripped = line.strip() # Markdown headers — always their own chunk if re.match(r'^#{1,3}\s+', stripped): flush_buffer() current_section = re.sub(r'^#+\s+', '', stripped) chunks.append(Chunk( text=stripped, section=current_section, chunk_type="header", position=i / len(lines) )) buffer_type = "body" # Code blocks — keep together elif stripped.startswith("```"): flush_buffer() buffer_type = "code" buffer.append(stripped) # Footnotes (lines starting with * or [1]) elif re.match(r'^(\*|\[\d+\])', stripped): flush_buffer() buffer_type = "footnote" buffer.append(stripped) # List items — group consecutive lists elif re.match(r'^[-*•]\s+', stripped) or re.match(r'^\d+\.\s+', stripped): if buffer_type not in ("list", "body"): flush_buffer() buffer_type = "list" buffer.append(stripped) # Regular paragraph text elif stripped: if buffer_type not in ("body",): flush_buffer() buffer_type = "body" buffer.append(stripped) # Flush at natural paragraph breaks (~300 words) if len(" ".join(buffer).split()) > 300: flush_buffer() elif buffer: # blank line = paragraph break flush_buffer() flush_buffer() return chunks ``` ## Step 2: Metadata-rich ingestion ```python import feather_db as fdb from datetime import datetime IMPORTANCE_BY_TYPE = { "header": 1.8, # headers = high navigational value "body": 1.0, # standard body text "list": 1.2, # lists are often key facts or steps "code": 1.3, # code examples are high-value "footnote": 0.5, # footnotes are supplementary } def ingest_document(doc_text: str, source_url: str, doc_title: str, namespace: str, db: fdb.DB, embed_fn): """Ingest a document with structure-aware chunking and metadata.""" chunks = adaptive_chunk(doc_text, doc_title) prev_mem = None mems = [] for chunk in chunks: vec = embed_fn(chunk.text) importance = IMPORTANCE_BY_TYPE.get(chunk.chunk_type, 1.0) mem = db.add(vec, text=chunk.text, namespace=namespace, entity=f"doc:{doc_title[:50]}") mem.meta.set_attribute("source", source_url) mem.meta.set_attribute("section", chunk.section) mem.meta.set_attribute("chunk_type", chunk.chunk_type) mem.meta.set_attribute("doc_title", doc_title) mem.meta.set_attribute("importance", importance) mem.meta.set_attribute("position", chunk.position) mem.meta.set_attribute("ingested_at", datetime.utcnow().isoformat()) mem.meta.set_attribute("thumbs_up", 0) # Link consecutive chunks: each chunk leads_to the next if prev_mem is not None: db.add_edge(prev_mem.id, mem.id, edge_type="leads_to") mems.append(mem) prev_mem = mem return mems ``` ## Step 3: Cross-section edge linking ```python def link_related_sections(mems: list, db: fdb.DB, embed_fn, similarity_threshold: float = 0.82): """Add 'refines' edges between semantically similar sections.""" # Compare each pair of header chunks headers = [m for m in mems if m.meta.get_attribute("chunk_type") == "header"] for i, h1 in enumerate(headers): for h2 in headers[i+1:]: # Compute similarity between section headers v1 = embed_fn(h1.text) v2 = embed_fn(h2.text) sim = fdb.cosine_similarity(v1, v2) if sim > similarity_threshold: db.add_edge(h1.id, h2.id, edge_type="refines") ``` ## Step 4: Hybrid search for precise retrieval ```python def search_documents(query: str, namespace: str, db: fdb.DB, embed_fn, k: int = 6, doc_title: str = None): """Hybrid search with optional document scope.""" query_vec = embed_fn(query) entity = f"doc:{doc_title[:50]}" if doc_title else None # Hybrid search: exact terms (version numbers, names) + semantic results = db.hybrid_search( query_text=query, query_vec=query_vec, k=k, namespace=namespace, entity=entity, half_life=180 # documents stay relevant for months ) return results def search_with_context_chain(query: str, namespace: str, db: fdb.DB, embed_fn, k: int = 4): """Search and follow edges to surface full section context.""" query_vec = embed_fn(query) chain = db.context_chain( query_vec, k=k, namespace=namespace, max_depth=2, half_life=180 ) return chain ``` ## Step 5: Importance boost from user feedback ```python def record_thumbs_up(mem_id: int, db: fdb.DB): """Boost importance when a user marks a result as helpful.""" mem = db.get(mem_id) current_importance = float(mem.meta.get_attribute("importance") or 1.0) current_thumbs = int(mem.meta.get_attribute("thumbs_up") or 0) # Each thumbs up adds 0.2 importance, capped at 3.0 new_importance = min(current_importance + 0.2, 3.0) mem.meta.set_attribute("importance", new_importance) mem.meta.set_attribute("thumbs_up", current_thumbs + 1) # Also increment recall count to resist decay db.update_recall(mem_id) def record_thumbs_down(mem_id: int, db: fdb.DB): """Reduce importance when a user marks a result as unhelpful.""" mem = db.get(mem_id) current_importance = float(mem.meta.get_attribute("importance") or 1.0) new_importance = max(current_importance - 0.3, 0.1) mem.meta.set_attribute("importance", new_importance) ``` ## Putting it together ```python # Initialize db = fdb.DB.open("docs.feather", dim=768) # Ingest a document with open("product_manual.md") as f: content = f.read() mems = ingest_document( doc_text=content, source_url="https://docs.example.com/manual", doc_title="Product Manual v2.3", namespace="tenant-acme", db=db, embed_fn=embed ) link_related_sections(mems, db, embed) # Search at query time results = search_documents( query="How do I reset the authentication token?", namespace="tenant-acme", db=db, embed_fn=embed ) for r in results: section = r.meta.get_attribute("section") ctype = r.meta.get_attribute("chunk_type") importance = r.meta.get_attribute("importance") print(f"[{ctype}] [{section}] score={r.score:.3f} importance={importance}") print(f" {r.text[:100]}...") # User marks result as helpful if results: record_thumbs_up(results[0].id, db) ``` The difference from naive chunking: your chunks reflect document structure, importance weights amplify high-value sections, cross-section edges let context_chain surface related content, hybrid search handles exact version/feature name queries, and thumbs-up feedback continuously improves retrieval quality. Each of these is a few lines of code on top of Feather DB — but together they're the difference between a demo and a production system. **Install:** `pip install feather-db` · **GitHub:** [github.com/feather-store/feather](https://github.com/feather-store/feather) --- *This is the machine-readable mirror of the theory post at [getfeather.store/theory/feather-db-document-qa-production](https://getfeather.store/theory/feather-db-document-qa-production). For the full Feather DB documentation, see [getfeather.store/llms-full.txt](https://getfeather.store/llms-full.txt).*