import gradio as gr import transformers def ask(pipeline, query, temperature=0.7, top_k=50, top_p=0.95): docs = vectordb.similarity_search_with_score(query) context = [] for doc,score in docs: if(score<7): doc_details = doc.to_json()['kwargs'] context.append( doc_details['page_content']) if(len(context)!=0): messages = [{"role": "user", "content": "Based on the following information: " + "\n".join(context) + "\n Answer in english to the question: " + query}] prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=temperature, top_k=top_k, top_p=top_p) answer = outputs[0]["generated_text"] return answer[answer.rfind("[/INST]")+8:], docs else: return "I don't have any information to answer this question", docs pipeline = transformers.pipeline('text-generation', model='el-filatova/rag') # pipeline = transformers.pipeline( # "text-generation", # model='el-filatova/rag', # tokenizer=tokenizer, # torch_dtype=torch.float16, # device_map="auto", max_new_tokens=200) with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): bot_message = ask(pipeline, message)[0] chat_history.append((message, bot_message)) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()