import numpy as np import pandas as pd import openai from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores.faiss import FAISS from langchain.docstore.document import Document from langchain.prompts import PromptTemplate from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI from langchain.text_splitter import RecursiveCharacterTextSplitter import gradio as gr import os import PyPDF2 import zipfile openai.api_key = os.environ['OPENAI_API_KEY'] embeddings = OpenAIEmbeddings() df = pd.read_csv('reglamento-cdmx.csv') text = df['text'].tolist() text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. chunk_size=500, chunk_overlap=0, length_function=len, ) texts = text_splitter.split_text(text) docsearch = FAISS.from_texts(texts, embeddings) def answer_question(query): docs = docsearch.similarity_search(query) refine_prompt_template = ( "The original question is as follows: {question}\n" "We have provided an existing answer: {existing_answer}\n" "You have the opportunity to refine the existing answer," "only if needed, with the context below.\n" "------------\n" "{context_str}\n" "------------\n" "If that context is not helpful to answer the question, then omit it.\n" "Your answer should be correct, and concise.\n" "Shorten the answer if possible.\n" "Reply in the same language as the question.\n" "Answer:" ) refine_prompt = PromptTemplate( input_variables=["question", "existing_answer", "context_str"], template=refine_prompt_template, ) initial_qa_template = ( "Context information is below. \n" "---------------------\n" "{context_str}" "\n---------------------\n" "Given the context information and not prior knowledge, " "answer the question: {question}\n" ) initial_qa_prompt = PromptTemplate( input_variables=["context_str", "question"], template=initial_qa_template ) chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=False, question_prompt=initial_qa_prompt, refine_prompt=refine_prompt) answer = chain({"input_documents": docs, "question": query}, return_only_outputs=True)['output_text'] return answer demo = gr.Interface( fn=answer_question, inputs=[ gr.Textbox(label="Hola soy tu asesor personal de tránsito, ¿cuál es tu pregunta? / Hi, I am your CDMX transit law personal assistant, ask me anything about Mexico City's transit law in any language.", lines=3,), ], outputs=[gr.Textbox(label="Respuesta / Answer: ")], title="Asesor de Reglamento de Tránsito CDMX", examples=[ ["cuál es la multa por no llevar casco?"], ["qué pasa si no tengo licencia de conducir?"], ["What would happen if I drove under the influence of alcohol?"] ], ) if __name__ == "__main__": demo.launch()