Spaces:

xavierbarbier
/

rag_ngap

Sleeping

File size: 4,882 Bytes

dc54faf
 
 
1dbb9f0
c298b5c
 
7cc71c0
4c8a6f3
 
dc54faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf7aaf
 
 
 
dc54faf
 
4c8a6f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc54faf
 
 
8bd2156
dc54faf
 
 
 
1dbb9f0
 
 
 
 
 
 
 
688d0cc
8ff604a
 
 
dc54faf
 
 
 
8ff604a
4c8a6f3
8ff604a
dc54faf
4c8a6f3
dc54faf
 
 
 
 
 
 
 
 
 
 
 
688d0cc
dc54faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688d0cc
 
 
 
dc54faf
 
 
 
 
 
688d0cc
dc54faf

import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader


title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"

description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all). 
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue. 
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing). 
"""

"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""

model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"

hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)

print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")



# creating a pdf reader object
reader = PdfReader("resourse/NGAP 01042024.pdf")
text = []
for p in np.arange(0, len(reader.pages), 1):
  page = reader.pages[int(p)]

  # extracting text from page
  text.append(page.extract_text())

text = ' '.join(text)

chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

def get_text_embedding(text):

    return embeddings.embed_query(text)
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)



print("Finish the model init process")

model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français"
model._is_chat_session_activated = False

max_new_tokens = 2048

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

#index = faiss.load_local("resourse/embeddings_ngap.faiss")



def generater(message, history, temperature, top_p, top_k):
    prompt = "<s>"
    for user_message, assistant_message in history:
        prompt += model.config["promptTemplate"].format(user_message)
        
        prompt += assistant_message "</s>"
        
    prompt += model.config["promptTemplate"].format(message)
      
    outputs = []    
    for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
        outputs.append(token)
        yield "".join(outputs)

def vote(data: gr.LikeData):
    if data.liked:
        return
    else:
        return

chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-icon.png'),bubble_full_width = False)
"""
additional_inputs=[
    gr.Slider(
        label="temperature",
        value=0.5,
        minimum=0.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
    ),
    gr.Slider(
        label="top_p",
        value=1.0,
        minimum=0.0,
        maximum=1.0,
        step=0.01,
        interactive=True,
        info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
    ),
    gr.Slider(
        label="top_k",
        value=40,
        minimum=0,
        maximum=1000,
        step=1,
        interactive=True,
        info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
    )
]
"""
additional_inputs=[
gr.UploadButton(file_types=[".pdf",".csv",".doc"])
]
iface = gr.ChatInterface(
    fn = generater,
    title=title,
    description = description,
    chatbot=chatbot,
    additional_inputs=additional_inputs,

)

with gr.Blocks(css="resourse/style/custom.css") as demo:
    chatbot.like(vote, None, None)
    iface.render()

if __name__ == "__main__":
    demo.queue(max_size=3).launch()