ngày 05-12-2024
!pip install -qU \python-dotenv \langchain \langchain_openai \langchain_community \langchain-huggingface \langchain-google-genai \streamlit \faiss-cpu \sentence-transformers \pypdf \docx2txt
#Tải dữ liệu từ folder (.txt)from langchain_community.document_loaders import DirectoryLoader, TextLoaderfile_path = "/content/drive/MyDrive/Cam_nang_txt"loader = DirectoryLoader(file_path,glob="*.txt",loader_cls=TextLoader)docs = loader.load()
from langchain_text_splitters.character import RecursiveCharacterTextSplitterfrom langchain_community.vectorstores import FAISSfrom langchain_huggingface import HuggingFaceEmbeddingstext_splitter = RecursiveCharacterTextSplitter(chunk_size=1200,chunk_overlap=200)doc_chunks = text_splitter.split_documents(docs)#Embedding các chunks này và đưa vào vector storeembeddings = HuggingFaceEmbeddings()vectordb = FAISS.from_documents(doc_chunks, embeddings)
# Lưu vector storevectordb.save_local("index")
from langchain.memory import ConversationBufferMemoryfrom langchain.chains import ConversationalRetrievalChainfrom langchain_google_genai import ChatGoogleGenerativeAIllm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")retriever = vectordb.as_retriever(search_kwargs={"k": 2, # số lượng kết quả của truy vấn trả về})memory = ConversationBufferMemory(llm=llm,output_key="answer",memory_key="chat_history",return_messages=True)chain = ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,chain_type="map_reduce",memory=memory,verbose=True)
memory.clear()while True:user_input = input("user: ")if user_input.lower() == "exit":breakresponse = chain({"question": user_input, "chat_history": memory.chat_memory.messages})assistant_response = response["answer"]print("assistant: ", assistant_response)