File size: 3,248 Bytes
8d49e37
 
c2868ee
8d49e37
3f3bc98
 
 
 
 
 
 
 
c2868ee
3f3bc98
 
 
 
 
 
 
 
 
 
8d49e37
 
 
 
c2868ee
8d49e37
 
 
 
c2868ee
8d49e37
 
 
 
 
 
 
 
 
 
 
c2868ee
8d49e37
 
 
 
 
 
 
 
 
5576788
ffe8361
8d49e37
 
5576788
8d49e37
 
 
 
 
ffe8361
8d49e37
ffe8361
8d49e37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0d4a82
8d49e37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ba2223
9750d4d
9ba2223
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient(model='mistralai/Mistral-7B-Instruct-v0.2')
# from fastapi import FastAPI
# from fastapi.staticfiles import StaticFiles
# from fastapi.responses import FileResponse
# from transformers import pipeline

# app = FastAPI()

# pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")

# @app.get("/infer_t5")
# def t5(input):
#     output = pipe_flan(input)
#     return {"output": output[0]["generated_text"]}

# app.mount("/", StaticFiles(directory="static", html=True), name="static")

# @app.get("/")
# def index() -> FileResponse:
#     return FileResponse(path="/app/static/index.html", media_type="text/html")
#from langchain import HuggingFaceHub
#import os
#from langchain.vectorstores import Chroma
#from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# mistral_llm = HuggingFaceHub(
#     repo_id='mistralai/Mistral-7B-Instruct-v0.2',
#     model_kwargs= {'temperature':0.1, 'max_length':1024},
#     )

# emb_model = "sentence-transformers/all-MiniLM-L6-v2"
# embeddings = HuggingFaceEmbeddings(
#     model_name=emb_model,
#     cache_folder=os.getenv('SENTENCE_TRANSFORMERS_HOME')
# )
# # The vectorstore to use to index the summaries
# vectorstore = Chroma(
#     collection_name="mm_rag_mistral",
#     embedding_function=embeddings,
#     persist_directory="odoo_vector_store",
# )

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token,
):


    # asimilarity_search(message)
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    # response = ""
    # context= f'ceci est un context utilise le avec le context {context} ceci est lhistorique {history} repond à cette question:'
    
    outputs=client.text_generation(
                        #system_message + context+ message,
                        system_message + message,
                        max_new_tokens=max_tokens,
                        do_sample=True,
                        temperature=temperature,
                        top_p=top_p,
                        top_k=50,
                        repetition_penalty=1.1
                    )
    yield outputs


app = gr.ChatInterface(
    respond,

    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",

        ),
        gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face token here"),
    ],
    css="footer{display:none !important}",
)


#if __name__ == "__main__":
app.queue()  # Activer la file d'attente
app.launch()