import gradio as gr from gpt4all import GPT4All from huggingface_hub import hf_hub_download import faiss #from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_huggingface import HuggingFaceEmbeddings import numpy as np from pypdf import PdfReader from gradio_pdf import PDF from transformers import pipeline title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware" description = """ 🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all). 🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue. Mistral does not support system prompt symbol (such as ```<>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing). """ """ [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing) """ """ model_path = "models" model_name = "SmolLM-1.7B-Instruct.Q2_K.gguf" hf_hub_download(repo_id="mradermacher/SmolLM-1.7B-Instruct-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False) """ import torch from transformers import AutoModelForCausalLM, AutoTokenizer model_name = "microsoft/Phi-3.5-mini-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") print("Start the model init process") """model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu") model.config["promptTemplate"] = "[INST] {0} [/INST]" model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français" model._is_chat_session_activated = False max_new_tokens = 2048""" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} embeddings = HuggingFaceEmbeddings( model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) # creating a pdf reader object print("Finish the model init process") def get_text_embedding(text): return embeddings.embed_query(text) # FAISS index doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space") index = faiss.read_index(doc_path) # Chunks doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/NGAP 01042024.pdf", repo_type="space") reader = PdfReader(doc_path) text = [] for p in np.arange(0, len(reader.pages), 1): page = reader.pages[int(p)] # extracting text from page text.append(page.extract_text()) text = ' '.join(text) chunk_size = 2048 chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] def qa(question): question_embeddings = np.array([get_text_embedding(question)]) D, I = index.search(question_embeddings, k=1) # distance, index retrieved_chunk = [chunks[i] for i in I.tolist()[0]] prompt = f""" Context information is below. --------------------- {retrieved_chunk} --------------------- Given the context information and not prior knowledge, answer the query. Query: {question} Answer: """ """ max_new_tokens = 2048 outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)""" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) tokens = model.generate(**inputs, max_length=1000, do_sample=True, top_p=0.95, top_k=60, temperature=0.3) return tokenizer.decode(tokens[0]) with gr.Blocks() as demo: question_input = gr.Textbox(label="Question") qa_button = gr.Button("Click to qa") promp_output = gr.Textbox(label="prompt") qa_button.click(qa, question_input, promp_output) if __name__ == "__main__": demo.queue(max_size=3).launch()