Production Document Q&A with Feather DB: Beyond Naive Chunking

Why naive chunking fails in production

The simplest RAG pipeline: split every document into 512-token chunks, embed each chunk, store in a vector DB, retrieve top-k for every query. It works in demos. In production, it fails in predictable ways:

A footnote about a legal disclaimer gets the same importance as a key product specification in the main section
Section headers — which carry the most navigational context — are either absorbed into adjacent chunks (losing their emphasis) or chunked alone (becoming useless short vectors)
A chunk boundary cuts a numbered list in half, embedding an incomplete thought
A user finds a chunk helpful and clicks "thumbs up" — but the system has no way to boost that chunk for future retrievals

Production doc Q&A needs adaptive chunking, metadata-rich ingestion, structural importance weights, cross-section linking, and feedback loops. Here's how to build it with Feather DB.

Step 1: Adaptive chunking strategy

import re
from dataclasses import dataclass
from typing import List

@dataclass
class Chunk:
    text: str
    section: str
    chunk_type: str  # "header", "body", "list", "footnote", "code"
    position: float  # 0.0 = document start, 1.0 = end

def adaptive_chunk(doc_text: str, doc_title: str) -> List[Chunk]:
    """Structure-aware chunking that respects document hierarchy."""
    chunks = []
    current_section = doc_title
    lines = doc_text.split("\n")
    buffer = []
    buffer_type = "body"

    def flush_buffer():
        if buffer:
            text = " ".join(" ".join(buffer).split())  # normalize whitespace
            if len(text) > 30:  # skip trivially short chunks
                chunks.append(Chunk(
                    text=text,
                    section=current_section,
                    chunk_type=buffer_type,
                    position=len(chunks) / max(len(lines), 1)
                ))
            buffer.clear()

    for i, line in enumerate(lines):
        stripped = line.strip()

        # Markdown headers — always their own chunk
        if re.match(r'^#{1,3}\s+', stripped):
            flush_buffer()
            current_section = re.sub(r'^#+\s+', '', stripped)
            chunks.append(Chunk(
                text=stripped,
                section=current_section,
                chunk_type="header",
                position=i / len(lines)
            ))
            buffer_type = "body"

        # Code blocks — keep together
        elif stripped.startswith("```"):
            flush_buffer()
            buffer_type = "code"
            buffer.append(stripped)

        # Footnotes (lines starting with * or [1])
        elif re.match(r'^(\*|\[\d+\])', stripped):
            flush_buffer()
            buffer_type = "footnote"
            buffer.append(stripped)

        # List items — group consecutive lists
        elif re.match(r'^[-*•]\s+', stripped) or re.match(r'^\d+\.\s+', stripped):
            if buffer_type not in ("list", "body"):
                flush_buffer()
            buffer_type = "list"
            buffer.append(stripped)

        # Regular paragraph text
        elif stripped:
            if buffer_type not in ("body",):
                flush_buffer()
            buffer_type = "body"
            buffer.append(stripped)
            # Flush at natural paragraph breaks (~300 words)
            if len(" ".join(buffer).split()) > 300:
                flush_buffer()

        elif buffer:  # blank line = paragraph break
            flush_buffer()

    flush_buffer()
    return chunks

Step 2: Metadata-rich ingestion

import feather_db as fdb
from datetime import datetime

IMPORTANCE_BY_TYPE = {
    "header": 1.8,    # headers = high navigational value
    "body": 1.0,      # standard body text
    "list": 1.2,      # lists are often key facts or steps
    "code": 1.3,      # code examples are high-value
    "footnote": 0.5,  # footnotes are supplementary
}

def ingest_document(doc_text: str, source_url: str, doc_title: str,
                    namespace: str, db: fdb.DB, embed_fn):
    """Ingest a document with structure-aware chunking and metadata."""
    chunks = adaptive_chunk(doc_text, doc_title)
    prev_mem = None
    mems = []

    for chunk in chunks:
        vec = embed_fn(chunk.text)
        importance = IMPORTANCE_BY_TYPE.get(chunk.chunk_type, 1.0)

        mem = db.add(vec, text=chunk.text,
                     namespace=namespace,
                     entity=f"doc:{doc_title[:50]}")
        mem.meta.set_attribute("source", source_url)
        mem.meta.set_attribute("section", chunk.section)
        mem.meta.set_attribute("chunk_type", chunk.chunk_type)
        mem.meta.set_attribute("doc_title", doc_title)
        mem.meta.set_attribute("importance", importance)
        mem.meta.set_attribute("position", chunk.position)
        mem.meta.set_attribute("ingested_at", datetime.utcnow().isoformat())
        mem.meta.set_attribute("thumbs_up", 0)

        # Link consecutive chunks: each chunk leads_to the next
        if prev_mem is not None:
            db.add_edge(prev_mem.id, mem.id, edge_type="leads_to")

        mems.append(mem)
        prev_mem = mem

    return mems

Step 3: Cross-section edge linking

def link_related_sections(mems: list, db: fdb.DB, embed_fn,
                           similarity_threshold: float = 0.82):
    """Add 'refines' edges between semantically similar sections."""
    # Compare each pair of header chunks
    headers = [m for m in mems
               if m.meta.get_attribute("chunk_type") == "header"]

    for i, h1 in enumerate(headers):
        for h2 in headers[i+1:]:
            # Compute similarity between section headers
            v1 = embed_fn(h1.text)
            v2 = embed_fn(h2.text)
            sim = fdb.cosine_similarity(v1, v2)
            if sim > similarity_threshold:
                db.add_edge(h1.id, h2.id, edge_type="refines")

Step 4: Hybrid search for precise retrieval

def search_documents(query: str, namespace: str, db: fdb.DB, embed_fn,
                     k: int = 6, doc_title: str = None):
    """Hybrid search with optional document scope."""
    query_vec = embed_fn(query)

    entity = f"doc:{doc_title[:50]}" if doc_title else None

    # Hybrid search: exact terms (version numbers, names) + semantic
    results = db.hybrid_search(
        query_text=query,
        query_vec=query_vec,
        k=k,
        namespace=namespace,
        entity=entity,
        half_life=180  # documents stay relevant for months
    )

    return results

def search_with_context_chain(query: str, namespace: str, db: fdb.DB,
                               embed_fn, k: int = 4):
    """Search and follow edges to surface full section context."""
    query_vec = embed_fn(query)
    chain = db.context_chain(
        query_vec,
        k=k,
        namespace=namespace,
        max_depth=2,
        half_life=180
    )
    return chain

Step 5: Importance boost from user feedback

def record_thumbs_up(mem_id: int, db: fdb.DB):
    """Boost importance when a user marks a result as helpful."""
    mem = db.get(mem_id)
    current_importance = float(mem.meta.get_attribute("importance") or 1.0)
    current_thumbs = int(mem.meta.get_attribute("thumbs_up") or 0)

    # Each thumbs up adds 0.2 importance, capped at 3.0
    new_importance = min(current_importance + 0.2, 3.0)
    mem.meta.set_attribute("importance", new_importance)
    mem.meta.set_attribute("thumbs_up", current_thumbs + 1)

    # Also increment recall count to resist decay
    db.update_recall(mem_id)

def record_thumbs_down(mem_id: int, db: fdb.DB):
    """Reduce importance when a user marks a result as unhelpful."""
    mem = db.get(mem_id)
    current_importance = float(mem.meta.get_attribute("importance") or 1.0)
    new_importance = max(current_importance - 0.3, 0.1)
    mem.meta.set_attribute("importance", new_importance)

Putting it together

# Initialize
db = fdb.DB.open("docs.feather", dim=768)

# Ingest a document
with open("product_manual.md") as f:
    content = f.read()

mems = ingest_document(
    doc_text=content,
    source_url="https://docs.example.com/manual",
    doc_title="Product Manual v2.3",
    namespace="tenant-acme",
    db=db,
    embed_fn=embed
)
link_related_sections(mems, db, embed)

# Search at query time
results = search_documents(
    query="How do I reset the authentication token?",
    namespace="tenant-acme",
    db=db,
    embed_fn=embed
)

for r in results:
    section = r.meta.get_attribute("section")
    ctype = r.meta.get_attribute("chunk_type")
    importance = r.meta.get_attribute("importance")
    print(f"[{ctype}] [{section}] score={r.score:.3f} importance={importance}")
    print(f"  {r.text[:100]}...")

# User marks result as helpful
if results:
    record_thumbs_up(results[0].id, db)

The difference from naive chunking: your chunks reflect document structure, importance weights amplify high-value sections, cross-section edges let context_chain surface related content, hybrid search handles exact version/feature name queries, and thumbs-up feedback continuously improves retrieval quality. Each of these is a few lines of code on top of Feather DB — but together they're the difference between a demo and a production system.

Install: pip install feather-db · GitHub: github.com/feather-store/feather