Spaces:
Sleeping
Sleeping
File size: 4,348 Bytes
dc54faf c34df49 56abc69 70b610d 1b23aed c34df49 a8fbe43 c34df49 a80badd 6e9cf31 a80badd a8fbe43 c34df49 1e5ea25 c34df49 1e5ea25 c34df49 70b610d dc54faf 61f44e7 c34df49 4ec8b91 61f44e7 fd3b445 e6a57c2 fd3b445 70b610d 61f44e7 0bc15db 4ec8b91 0bc15db f6305a7 ee2bff4 f6305a7 a8fbe43 efdd19e a8fbe43 8410290 a8fbe43 70b610d 0bc15db c34df49 f6305a7 0bc15db f6305a7 447319c f6305a7 c34df49 9838c31 dc54faf c34df49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader
from gradio_pdf import PDF
from transformers import pipeline
title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
"""
"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""
"""
model_path = "models"
model_name = "SmolLM-1.7B-Instruct.Q2_K.gguf"
hf_hub_download(repo_id="mradermacher/SmolLM-1.7B-Instruct-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
"""
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "croissantllm/CroissantLLMBase"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
print("Start the model init process")
"""model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français"
model._is_chat_session_activated = False
max_new_tokens = 2048"""
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
# creating a pdf reader object
print("Finish the model init process")
def get_text_embedding(text):
return embeddings.embed_query(text)
# FAISS index
doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space")
index = faiss.read_index(doc_path)
# Chunks
doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/NGAP 01042024.pdf", repo_type="space")
reader = PdfReader(doc_path)
text = []
for p in np.arange(0, len(reader.pages), 1):
page = reader.pages[int(p)]
# extracting text from page
text.append(page.extract_text())
text = ' '.join(text)
chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
def qa(question):
question_embeddings = np.array([get_text_embedding(question)])
D, I = index.search(question_embeddings, k=1) # distance, index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""
"""
max_new_tokens = 2048
outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
tokens = model.generate(**inputs, max_length=1000, do_sample=True, top_p=0.95, top_k=60, temperature=0.3)
return tokenizer.decode(tokens[0])
with gr.Blocks() as demo:
question_input = gr.Textbox(label="Question")
qa_button = gr.Button("Click to qa")
promp_output = gr.Textbox(label="prompt")
qa_button.click(qa, question_input, promp_output)
if __name__ == "__main__":
demo.queue(max_size=3).launch() |