Oddo_ChatBot / main.py
NadiaBedhiafi's picture
Update main.py
9750d4d verified
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient(model='mistralai/Mistral-7B-Instruct-v0.2')
# from fastapi import FastAPI
# from fastapi.staticfiles import StaticFiles
# from fastapi.responses import FileResponse
# from transformers import pipeline
# app = FastAPI()
# pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
# @app.get("/infer_t5")
# def t5(input):
# output = pipe_flan(input)
# return {"output": output[0]["generated_text"]}
# app.mount("/", StaticFiles(directory="static", html=True), name="static")
# @app.get("/")
# def index() -> FileResponse:
# return FileResponse(path="/app/static/index.html", media_type="text/html")
#from langchain import HuggingFaceHub
#import os
#from langchain.vectorstores import Chroma
#from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# mistral_llm = HuggingFaceHub(
# repo_id='mistralai/Mistral-7B-Instruct-v0.2',
# model_kwargs= {'temperature':0.1, 'max_length':1024},
# )
# emb_model = "sentence-transformers/all-MiniLM-L6-v2"
# embeddings = HuggingFaceEmbeddings(
# model_name=emb_model,
# cache_folder=os.getenv('SENTENCE_TRANSFORMERS_HOME')
# )
# # The vectorstore to use to index the summaries
# vectorstore = Chroma(
# collection_name="mm_rag_mistral",
# embedding_function=embeddings,
# persist_directory="odoo_vector_store",
# )
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token,
):
# asimilarity_search(message)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# response = ""
# context= f'ceci est un context utilise le avec le context {context} ceci est lhistorique {history} repond à cette question:'
outputs=client.text_generation(
#system_message + context+ message,
system_message + message,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
top_k=50,
repetition_penalty=1.1
)
yield outputs
app = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face token here"),
],
css="footer{display:none !important}",
)
#if __name__ == "__main__":
app.queue() # Activer la file d'attente
app.launch()