import gradio as gr from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import FAISS from langchain_community.chat_models.huggingface import ChatHuggingFace from langchain.schema import SystemMessage, HumanMessage, AIMessage from langchain_community.llms import HuggingFaceEndpoint model_name = "sentence-transformers/all-mpnet-base-v2" embedding_llm = SentenceTransformerEmbeddings(model_name=model_name) db = FAISS.load_local("faiss_index", embedding_llm, allow_dangerous_deserialization=True) # Set up Hugging Face model llm = HuggingFaceEndpoint( repo_id="HuggingFaceH4/starchat2-15b-v0.1", task="text-generation", max_new_tokens=4096, temperature=0.6, top_p=0.9, top_k=40, repetition_penalty=1.2, do_sample=True, ) chat_model = ChatHuggingFace(llm=llm) messages = [ SystemMessage(content="You are a helpful assistant."), HumanMessage(content="Hi AI, how are you today?"), AIMessage(content="I'm great thank you. How can I help you?") ] def handle_message(message: str, mode: str): # Check if message is empty if not message.strip(): return "Enter a valid message." if mode == "Chat": return chat_mode(message) elif mode == "Web-Search": return web_search(message) else: return "Select a valid mode." def chat_mode(message: str): global messages prompt = HumanMessage(content=message) messages.append(prompt) response = chat_model.invoke(messages) messages.append(response.content) if len(messages) >= 6: messages = messages[-6:] return f"You: {message}\n\nIT-Assistant: {response.content}" def web_search(message: str): global messages similar_docs = db.similarity_search(message, k=3) if similar_docs: source_knowledge = "\n".join([x.page_content for x in similar_docs]) else: source_knowledge = "" augmented_prompt = f""" If the answer to the next query is not contained in the Web Search, say 'No Answer Is Available' and then just give guidance for the query. Query: {message} Web Search: {source_knowledge} """ prompt = HumanMessage(content=augmented_prompt) messages.append(prompt) response = chat_model.invoke(messages) messages.append(response.content) if len(messages) >= 6: messages = messages[-6:] return f"You: {message}\n\nIT-Assistant: {response.content}" demo = gr.Interface( fn=handle_message, inputs=["text", gr.Radio(["Chat", "Web-Search"], label="mode", info="Choose a mode and enter your message, then click submit to interact.")], outputs="text", title="IT Assistant") demo.launch()