Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""Chatbot_LLM_with_RAG Quyche_FINAL.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1kRGRGeOuF9JORajZPlEI2H0IrvcrgYr0 | |
""" | |
import os | |
import textwrap | |
import chromadb | |
import langchain | |
import openai | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader, PyPDFLoader | |
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.llms import OpenAI | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.llms import GPT4All | |
from pdf2image import convert_from_path | |
# !pip uninstall 'git+https://github.com/facebookresearch/detectron2.git@57bdb21249d5418c130d54e2ebdc94dda7a4c01a' | |
"""Download file pdf""" | |
# Download file pdf | |
# !gdown https://drive.google.com/uc?id=19_MlM7Cmw8z_j40dk80PQbITYNET3tL2 | |
# !gdown https://drive.google.com/uc?id=1gdM3TfvyQPDXOuFjNS9n_DgD24ThDB84 | |
FILE_NAME="quyche_uit_plus_removed.pdf" | |
"""Load Data & Model""" | |
from getpass import getpass | |
OPENAI_API_KEY = "sk-proj-jFDUBtItWEzg2vE9ZZhaT3BlbkFJi3l93u3z3FuQItueKZQp" | |
# OPENAI_API_KEY = getpass() | |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
model = OpenAI(temperature=0, model_name="gpt-3.5-turbo") | |
# (trang) | |
images = convert_from_path(FILE_NAME, dpi=88) | |
# len(images) | |
# images[-1] | |
"""Use UnstructuredPDFLoader to load PDFs""" | |
# Use UnstructuredPDFLoader to load PDFs from the Internets | |
pdf_loader = UnstructuredPDFLoader(FILE_NAME) | |
pdf_pages = pdf_loader.load_and_split() | |
# Text Splitters | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64) | |
texts = text_splitter.split_documents(pdf_pages) | |
# len(texts) | |
# texts[0] | |
# texts[-1] | |
"""Create Embeddings & Vectorstores""" | |
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
hf_embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME) | |
db = Chroma.from_documents(texts, hf_embeddings, persist_directory="db") | |
"""#Use a Chain""" | |
custom_prompt_template = """Sử dụng các thông tin sau đây để trả lời câu hỏi của người dùng. | |
Nếu bạn không biết câu trả lời, chỉ cần nói rằng bạn không biết, đừng cố bịa ra câu trả lời. | |
Tất cả câu trả lời của bạn đều phải trả lời bằng tiếng việt | |
Context: {context} | |
Question: {question} | |
""" | |
from langchain import PromptTemplate | |
def set_custom_prompt(): | |
""" | |
Prompt template for QA retrieval for each vectorstore | |
""" | |
prompt = PromptTemplate(template=custom_prompt_template, | |
input_variables=['context', 'question']) | |
return prompt | |
prompt = set_custom_prompt() | |
chain = RetrievalQA.from_chain_type( | |
llm=model, | |
chain_type="stuff", | |
retriever=db.as_retriever(search_kwargs={"k": 3}), | |
chain_type_kwargs={'prompt': prompt} | |
) | |
"""#QA Chatbot""" | |
def print_response(response: str): | |
print("\n".join(textwrap.wrap(response, width=100))) | |
# query = "Các môn bổ túc kiến thức của khóa cao học ngành khoa học máy tính gồm những môn nào?" | |
# response = chain.run(query) | |
# print_response(response) | |
# from langchain.chat_models import ChatOpenAI | |
from langchain.schema import AIMessage, HumanMessage | |
# import openai | |
import gradio as gr | |
def predict(message, history): | |
history_langchain_format = [] | |
for human, ai in history: | |
history_langchain_format.append(HumanMessage(content=human)) | |
history_langchain_format.append(AIMessage(content=ai)) | |
history_langchain_format.append(HumanMessage(content=message)) | |
# gpt_response = llm(history_langchain_format) | |
return chain.run(message) | |
chatbot=gr.ChatInterface(predict) | |
chatbot.launch(share=True) |