Spaces:
Sleeping
Sleeping
File size: 4,716 Bytes
dc54faf 1dbb9f0 c298b5c 7cc71c0 4c8a6f3 dc54faf aaf7aaf dc54faf 4c8a6f3 9875a43 4c8a6f3 a0ebadb e75389f 4c8a6f3 46009af d7fef5b 4c8a6f3 3ad6441 46009af 4c8a6f3 9875a43 a0ebadb dc54faf 69c15f2 b57c9e2 dc54faf 69c15f2 a0ebadb b57c9e2 dc54faf c9ff488 b57c9e2 c9ff488 b57c9e2 c9ff488 a0ebadb b57c9e2 a0ebadb b57c9e2 a0ebadb b57c9e2 69c15f2 b57c9e2 688d0cc dc54faf b57c9e2 dc54faf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader
title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
"""
"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""
model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
# creating a pdf reader object
"""
reader = PdfReader("./resource/NGAP 01042024.pdf")
text = []
for p in np.arange(0, len(reader.pages), 1):
page = reader.pages[int(p)]
# extracting text from page
text.append(page.extract_text())
text = ' '.join(text)
chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
def get_text_embedding(text):
return embeddings.embed_query(text)
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)
#index = faiss.read_index("./resourse/embeddings_ngap.faiss")
"""
print("Finish the model init process")
def format_chat_prompt(message, chat_history):
prompt = ""
for turn in chat_history:
user_message, bot_message = turn
prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
prompt = f"{prompt}\nUser: {message}\nAssistant:"
return prompt
context = [
{
"role": "system",
"content": """Tu est un assitant virtuel au service des assurés pour l'assurance maladie en France.
Réponds en français avec politesse et signes tes réponses par 'Votre assitant virtuel Ameli'.
""",
}
]
max_new_tokens = 2048
def respond(message, chat_history):
prompt = message
context.append({'role':'user', 'content':f"{prompt}"})
#tokenized_chat = tokenizer.apply_chat_template(context, tokenize=True, add_generation_prompt=True, return_tensors="pt")
#outputs = model.generate(tokenized_chat, max_new_tokens=1000, temperature = 0.0)
#bot_message = tokenizer.decode(outputs[0]).split("<|assistant|>")[-1].replace("</s>","")
bot_message = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens, streaming=False)
context.append({'role':'assistant', 'content':f"{bot_message}"})
chat_history.append((message, bot_message))
return "", chat_history
with gr.Blocks() as demo:
gr.Markdown("# Assistant virtuel Ameli")
gr.Markdown("Mes réponses sont générées par IA. Elles peuvent être fausses ou imprécises.")
with gr.Row():
with gr.Column(scale=1):
text = gr.Textbox(lines =5)
#msg = gr.Textbox(label="Posez votre question")
btn = gr.Button("Soumettre la question")
with gr.Column(scale=2, min_width=50):
chatbot = gr.Chatbot(height=700) #just to fit the notebook
clear = gr.ClearButton(components=[text, chatbot], value="Clear console")
btn.click(respond, inputs=[text, chatbot], outputs=[text, chatbot])
text.submit(respond, inputs=[text, chatbot], outputs=[text, chatbot]) #Press enter to submit
if __name__ == "__main__":
demo.queue(max_size=3).launch() |