一、安装Ollama
1.下载Ollama并安装:https://ollama.com/download
2.拉取大模型:
#拉取用于嵌入外部数据的模型
ollama pull nomic-embed-text
#拉取推理模型
ollama pull llama3.1:8b # 约 4.7 GB,适合 8GB 显存
# 或
ollama pull qwen3:30b # 约 19 GB,需要较强硬件
# 或 其他模型
二、安装Python
1.安装Python3.11并安装:https://www.python.org/downloads/windows/
2.注意一定要把python设置到PATH环境变量中
三、创建虚拟环境
1.创建虚拟环境(第一次)
mkdir D:\Rag
cd D:\Rag
python -m venv rag_env
2.切换到虚拟环境
rag_env\Scripts\activate #rag_env应该与创建的环境名保持一致
3.下载依赖
pip install langchain-ollama langchain-chroma langchain-community langchain-text-splitters langchain-core
四.整理业务数据
1.把业务数据以.txt或.pdf或.md的文件形式放置到一个专门放置这类数据的目录下,例如:D:\Rag\my_knowledge
2.⭐️文本文件的字符编码必须为utf-8,否则会导致加载文件失败
五.写python代码创建向量数据,加载向量数据并连接Ollama服务进行聊天
# python3
# rag.py - 基于本地知识库的 RAG 问答系统
import os
import shutil
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# ================= 配置区(根据实际情况修改) =================
KNOWLEDGE_DIR = r"D:\Rag\my_knowledge" # 你的业务知识文件夹路径
PERSIST_DIR = r"D:\Rag\chroma_db" # 向量数据库存储路径(绝对路径)
OLLAMA_BASE_URL = "http://localhost:11434" # 本地Ollama服务根API
EMBEDDING_MODEL = "nomic-embed-text" # 用于向量化的模型
LLM_MODEL = "qwen3:30b" # 用于对话的模型
# ================= 1. 加载文档 =================
def load_documents():
"""加载目录下所有支持的文档"""
loaders = {
".txt": DirectoryLoader(KNOWLEDGE_DIR, glob="**/*.txt", loader_cls=TextLoader, loader_kwargs={'encoding': 'utf-8'}),
".pdf": DirectoryLoader(KNOWLEDGE_DIR, glob="**/*.pdf", loader_cls=PyPDFLoader),
".md": DirectoryLoader(KNOWLEDGE_DIR, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={'encoding': 'utf-8'}),
# 可添加其他扩展名,如 .h, .cpp 等
}
documents = []
for ext, loader in loaders.items():
try:
docs = loader.load()
documents.extend(docs)
print(f"加载 {ext} 文件: {len(docs)} 个文档")
except Exception as e:
print(f"加载 {ext} 文件时出错: {e}。请保证文本文件字符编码为utf-8")
return documents
# ================= 2. 文本切块 =================
def split_documents(documents):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500, # 每块字符数
chunk_overlap=50, # 块间重叠
)
chunks = text_splitter.split_documents(documents)
print(f"文档切分为 {len(chunks)} 个文本块")
if chunks:
print(f"示例块内容(前150字符): {chunks[0].page_content[:150]}")
else:
print("警告:切分后没有文本块!")
return chunks
# ================= 3. 创建向量数据库(强制重建) =================
def create_vectorstore(chunks):
embeddings = OllamaEmbeddings(
base_url=OLLAMA_BASE_URL,
model=EMBEDDING_MODEL
)
# # 删除旧库,强制重建
# if os.path.exists(PERSIST_DIR):
# shutil.rmtree(PERSIST_DIR)
# print(f"已删除旧的向量库目录: {PERSIST_DIR}")
action = ""
if os.path.exists(PERSIST_DIR):
vectorstore = Chroma(persist_directory=PERSIST_DIR, embedding_function=embeddings)
action = "加载"
else:
# 创建新库(自动持久化)
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
persist_directory=PERSIST_DIR
)
action = "创建"
# 验证向量数据数量
actual_count = vectorstore._collection.count()
print(f"向量数据库已{action},共有{actual_count} 条向量")
if actual_count != len(chunks):
print(f"警告:向量数据数量({actual_count})与文本块数({len(chunks)})不一致!")
return vectorstore
# ================= 4. 构建问答链 =================
def build_qa_chain(vectorstore):
llm = OllamaLLM(
base_url=OLLAMA_BASE_URL,
model=LLM_MODEL,
temperature=0.1,
)
system_prompt = (
"你是一个专业的问答助手。请根据以下提供的上下文信息来回答用户的问题。"
"如果无法从上下文中找到答案,请直接说你不知道,不要编造答案。"
"\n\n"
"上下文信息:\n{context}"
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{input}"),
])
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
qa_chain = (
{
"context": retriever | format_docs,
"input": RunnablePassthrough(),
}
| prompt
| llm
| StrOutputParser()
)
return qa_chain
# ================= 5. 主程序 =================
def main():
print("=" * 50)
print("本地 RAG 问答系统启动")
print("=" * 50)
print("加载文档...")
docs = load_documents()
if not docs:
print("错误:未找到任何文档,请检查 KNOWLEDGE_DIR 路径")
return
print("切分文档...")
chunks = split_documents(docs)
if not chunks:
print("错误:切分后无文本块")
return
print("处理向量数据库...")
vectorstore = create_vectorstore(chunks)
print("构建问答链...")
qa_chain = build_qa_chain(vectorstore)
print("\n知识库已就绪!输入问题开始提问(输入 'exit' 退出)\n")
while True:
query = input("问题: ").strip()
if query.lower() == "exit":
break
if not query:
continue
try:
answer = qa_chain.invoke(query)
print(f"回答: {answer}\n")
except Exception as e:
print(f"出错: {e}\n")
if __name__ == "__main__":
main()

284

被折叠的 条评论
为什么被折叠?



