uunnxx
diff --git a/‎examples/ai_rag_demo/__init__.py‎ b/‎examples/ai_rag_demo/__init__.py‎
diff --git a/‎examples/ai_rag_demo/api/__init__.py‎ b/‎examples/ai_rag_demo/api/__init__.py‎
diff --git a/‎examples/ai_rag_demo/api/bad_shortcut.py‎
Lines changed: 22 additions & 0 deletions b/‎examples/ai_rag_demo/api/bad_shortcut.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎examples/ai_rag_demo/api/endpoints.py‎
Lines changed: 27 additions & 0 deletions b/‎examples/ai_rag_demo/api/endpoints.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/ai_rag_demo/architecture.puml‎
Lines changed: 18 additions & 0 deletions b/‎examples/ai_rag_demo/architecture.puml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎examples/ai_rag_demo/llm/__init__.py‎ b/‎examples/ai_rag_demo/llm/__init__.py‎
diff --git a/‎examples/ai_rag_demo/llm/client.py‎
Lines changed: 27 additions & 0 deletions b/‎examples/ai_rag_demo/llm/client.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/ai_rag_demo/models/__init__.py‎ b/‎examples/ai_rag_demo/models/__init__.py‎
diff --git a/‎examples/ai_rag_demo/models/document.py‎
Lines changed: 33 additions & 0 deletions b/‎examples/ai_rag_demo/models/document.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎examples/ai_rag_demo/models/query.py‎
Lines changed: 25 additions & 0 deletions b/‎examples/ai_rag_demo/models/query.py‎
Lines changed: 25 additions & 0 deletions
@@ -0,0 +1,22 @@
+"""BAD: API layer directly accessing retrieval layer, bypassing services.
+
+This is an intentional architecture violation for demonstration.
+The API layer should only talk to the services layer, never directly
+to retrieval or LLM.
+"""
+
+from retrieval.vector_store import VectorStore
+from retrieval.embedder import Embedder
+
+
+def quick_search(text: str) -> list[dict]:
+    """A 'shortcut' endpoint that bypasses the service layer.
+
+    This violates the layered architecture by reaching directly
+    into the retrieval layer from the API layer.
+    """
+    embedder = Embedder()
+    store = VectorStore()
+    embedding = embedder.embed(text)
+    results = store.search(embedding, top_k=3)
+    return [{"text": r.text, "score": r.score} for r in results]
@@ -0,0 +1,27 @@
+"""API endpoints for the RAG service (mock FastAPI-style)."""
+
+from models.document import Document
+from models.query import Query, RAGResponse
+from services.rag_service import RAGService
+
+
+_rag_service = RAGService()
+
+
+def ingest_document(doc_id: str, content: str, source: str) -> dict:
+    """POST /ingest - Ingest a document into the RAG pipeline."""
+    doc = Document(id=doc_id, content=content, source=source)
+    count = _rag_service.ingest(doc)
+    return {"status": "ok", "chunks_stored": count}
+
+
+def ask(question: str, top_k: int = 5) -> dict:
+    """POST /ask - Ask a question using RAG."""
+    query = Query(text=question, top_k=top_k)
+    response: RAGResponse = _rag_service.query(query)
+    return {
+        "answer": response.answer,
+        "sources": [{"text": s.text[:100], "score": s.score} for s in response.sources],
+        "model": response.model,
+        "tokens_used": response.tokens_used,
+    }
@@ -0,0 +1,18 @@
+@startuml
+  component [api]
+  component [services]
+  component [models]
+  component [retrieval]
+  component [llm]
+  component [shared]
+
+  [api] --> [services]
+  [services] --> [retrieval]
+  [services] --> [llm]
+  [services] --> [models]
+  [retrieval] --> [models]
+  [retrieval] --> [shared]
+  [llm] --> [models]
+  [llm] --> [shared]
+  [models] --> [shared]
+@enduml
@@ -0,0 +1,27 @@
+"""Mock LLM client."""
+
+from models.query import RetrievalResult
+from shared.config import Config
+
+
+class LLMClient:
+    """Mock LLM client that generates fake responses."""
+
+    def __init__(self):
+        self.model = Config.LLM_MODEL
+        self.max_tokens = Config.LLM_MAX_TOKENS
+
+    def generate(self, prompt: str, context: list[RetrievalResult]) -> tuple[str, int]:
+        """Generate a mock response based on the prompt and retrieved context.
+
+        Returns:
+            Tuple of (response_text, tokens_used).
+        """
+        context_text = "\n".join(f"- {r.text[:100]}" for r in context)
+        answer = (
+            f"Based on {len(context)} retrieved documents, "
+            f"here is the answer to '{prompt[:50]}...': "
+            f"[Mock LLM response using {self.model}]"
+        )
+        tokens_used = len(answer.split()) * 2  # rough mock
+        return answer, tokens_used
@@ -0,0 +1,33 @@
+"""Document models for the RAG pipeline."""
+
+from dataclasses import dataclass, field
+
+from shared.config import Config
+
+
+@dataclass
+class Chunk:
+    text: str
+    metadata: dict = field(default_factory=dict)
+    embedding: list[float] = field(default_factory=list)
+
+
+@dataclass
+class Document:
+    id: str
+    content: str
+    source: str
+    chunks: list[Chunk] = field(default_factory=list)
+
+    def split_into_chunks(self) -> list[Chunk]:
+        size = Config.CHUNK_SIZE
+        overlap = Config.CHUNK_OVERLAP
+        text = self.content
+        chunks = []
+        start = 0
+        while start < len(text):
+            end = start + size
+            chunks.append(Chunk(text=text[start:end], metadata={"source": self.source}))
+            start = end - overlap
+        self.chunks = chunks
+        return chunks
@@ -0,0 +1,25 @@
+"""Query and response models."""
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class Query:
+    text: str
+    top_k: int = 5
+    filters: dict = field(default_factory=dict)
+
+
+@dataclass
+class RetrievalResult:
+    text: str
+    score: float
+    source: str
+
+
+@dataclass
+class RAGResponse:
+    answer: str
+    sources: list[RetrievalResult]
+    model: str
+    tokens_used: int