YouTip LogoYouTip

Langchain Project Knowledge Qa

This article builds a personal knowledge base Q&A system that can load Markdown files and PDF documents, and answer questions based on this content. * * * ## System Design * Document Loading: Supports Markdown, TXT, PDF and other formats * Vector Retrieval: Chroma persistent storage with incremental update support * Source Citation: Include source document and fragment location in answers * Streaming Output: Display answers token by token * * * ## Complete Code ## Instance # File Path: knowledge_qa.py # pip install langchain langchain-deepseek langchain-chroma chromadb pypdf from dotenv import load_dotenv load_dotenv() import os from pathlib import Path from langchain.tools import tool from langchain.agents import create_agent from langchain.chat_models import init_chat_model from langchain.messages import HumanMessage from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader class KnowledgeBase: """Personal Knowledge Base Manager""" def __init__ (self, persist_dir: str="./my_knowledge_db"): self.persist_dir= persist_dir self.embeddings= OpenAIEmbeddings(model="text-embedding-3-small") self.text_splitter= RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50, separators=[" "," ","。","!","?",". ","! ","? "," "], ) self.vector_store=None self._load_or_create() def _load_or_create(self): """Load existing vector store or create new one""" if os.path.exists(self.persist_dir)and os.listdir(self.persist_dir): self.vector_store= Chroma( persist_directory=self.persist_dir, embedding_function=self.embeddings, ) print(f"Loaded vector store: {self.vector_store._collection.count()} document chunks") else: self.vector_store= Chroma( embedding_function=self.embeddings, persist_directory=self.persist_dir, ) print("Created new vector store") def add_file(self, file_path: str) ->int: """Add file to knowledge base, return number of document chunks added""" loader = TextLoader(file_path, encoding="utf-8") docs = loader.load() # Add file source metadata for doc in docs: doc.metadata= Path(file_path).name chunks =self.text_splitter.split_documents(docs) self.vector_store.add_documents(chunks) print(f"Added {Path(file_path).name}: {len(chunks)} document chunks") return len(chunks) def add_text(self, text: str, source: str="Manually Added") ->int: """Add text directly to knowledge base""" chunks =self.text_splitter.create_documents( , metadatas=[{"source": source}] ) self.vector_store.add_documents(chunks) return len(chunks) def search(self, query: str, k: int=3) ->list: """Search knowledge base""" return self.vector_store.similarity_search(query, k=k) def get_retriever(self): """Get retriever""" return self.vector_store.as_retriever(search_kwargs={"k": 3}) Continue with Agent section: ## Instance # ========== Create Knowledge Base and Add Sample Data ========== kb = KnowledgeBase("./my_knowledge_db") # Add some sample knowledge kb.add_text( "Tutorial's Python3 Basic Tutorial includes the following chapters:" "1. Python Introduction and Environment Setup 2. Basic Data Types 3. Operators and Expressions " "4. Conditional Statements if-else 5. Loops for/while 6. Function Definition and Calling " "7. Modules and Packages 8. File Operations 9. Exception Handling 10. Object-Oriented Programming", source="Python3 Tutorial Outline" ) kb.add_text( "To become an excellent Python developer, it is recommended to follow this learning path:" "Step 1: Master Python basic syntax (1-2 weeks);" "Step 2: Learn data structures and algorithm basics (2-3 weeks);" "Step 3: Choose a direction for in-depth learning (Web Development/Data Analysis/AI);" "Step 4: Complete 2-3 practical projects to consolidate knowledge.", source="Python Learning Path" ) kb.add_text( "Tutorial's online programming environment supports Python, JavaScript, Java, C++ and many other languages." "Users don't need to install any software, they can write and run code directly in the browser." "The online environment also supports code highlighting, auto-completion and errorPrompt Function.", source="Online Programming Environment Description" ) # ========== Create RAG Agent ========== @tool def search_knowledge(query: str) ->str: """Search for relevant information in the personal knowledge base. Use complete questions or key phrases when searching. Args: query: Search query or key phrase """ docs = kb.search(query, k=3) if not docs: return"No relevant information found in knowledge base." results =[] for i, doc in enumerate(docs,1): source = doc.metadata.get("source","Unknown Source") content = doc.page_content[:200] results.append(f"[{i}] Source: {source} {content}") return" --- ".join(results) model = init_chat_model("deepseek:deepseek-v4-flash", temperature=0) agent = create_agent( model=model, tools=, system_prompt="""You are a personal knowledge base assistant. ## Rules 1. All questions must
← Langchain LangsmithLangchainrag Agent β†’