File size: 4,716 Bytes
dc54faf
 
 
1dbb9f0
c298b5c
 
7cc71c0
4c8a6f3
 
dc54faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf7aaf
 
 
 
dc54faf
 
4c8a6f3
 
9875a43
4c8a6f3
a0ebadb
 
e75389f
4c8a6f3
 
 
 
 
 
 
 
 
 
 
46009af
d7fef5b
 
 
 
 
 
 
 
4c8a6f3
 
 
3ad6441
46009af
4c8a6f3
 
 
 
 
9875a43
 
a0ebadb
dc54faf
69c15f2
b57c9e2
 
 
 
 
 
 
dc54faf
69c15f2
 
 
 
 
 
 
 
 
a0ebadb
 
b57c9e2
dc54faf
 
c9ff488
b57c9e2
c9ff488
b57c9e2
c9ff488
a0ebadb
 
 
b57c9e2
a0ebadb
b57c9e2
a0ebadb
b57c9e2
 
 
 
 
 
 
 
 
 
 
69c15f2
b57c9e2
 
 
 
 
 
 
 
 
 
 
688d0cc
dc54faf
b57c9e2
 
dc54faf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader


title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"

description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all). 
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue. 
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing). 
"""

"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""

model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"

hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)

print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")



# creating a pdf reader object

"""
reader = PdfReader("./resource/NGAP 01042024.pdf")
text = []
for p in np.arange(0, len(reader.pages), 1):
  page = reader.pages[int(p)]

  # extracting text from page
  text.append(page.extract_text())

text = ' '.join(text)

chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

def get_text_embedding(text):

    return embeddings.embed_query(text)


text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

#index = faiss.read_index("./resourse/embeddings_ngap.faiss")
"""
print("Finish the model init process")

def format_chat_prompt(message, chat_history):
    prompt = ""
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

context = [
    {
        "role": "system",
        "content": """Tu est un assitant virtuel au service des assurés pour l'assurance maladie en France.
        Réponds en français avec politesse et signes tes réponses par 'Votre assitant virtuel Ameli'.
        """,
    }
 ]

max_new_tokens = 2048

def respond(message, chat_history):



        prompt = message

        context.append({'role':'user', 'content':f"{prompt}"})

        #tokenized_chat = tokenizer.apply_chat_template(context, tokenize=True, add_generation_prompt=True, return_tensors="pt")

        #outputs = model.generate(tokenized_chat, max_new_tokens=1000, temperature = 0.0)

        #bot_message = tokenizer.decode(outputs[0]).split("<|assistant|>")[-1].replace("</s>","")

        bot_message = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens, streaming=False)

        context.append({'role':'assistant', 'content':f"{bot_message}"})

        chat_history.append((message, bot_message))
        return "", chat_history

with gr.Blocks() as demo:
    gr.Markdown("# Assistant virtuel Ameli")
    gr.Markdown("Mes réponses sont générées par IA. Elles peuvent être fausses ou imprécises.")
    with gr.Row():
        with gr.Column(scale=1):
         


          text = gr.Textbox(lines =5)

          #msg = gr.Textbox(label="Posez votre question")
          btn = gr.Button("Soumettre la question")


        with gr.Column(scale=2, min_width=50):
          chatbot = gr.Chatbot(height=700) #just to fit the notebook
          clear = gr.ClearButton(components=[text, chatbot], value="Clear console")


    btn.click(respond, inputs=[text, chatbot], outputs=[text, chatbot])
    text.submit(respond, inputs=[text, chatbot], outputs=[text, chatbot]) #Press enter to submit

if __name__ == "__main__":
    demo.queue(max_size=3).launch()