import gradio as gr import os from langchain.llms import CTransformers from transformers import AutoTokenizer, AutoModelForCausalLM import torch MODEL_PATH = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF' # Some basic configurations for the model config = { "max_new_tokens": 1000, "context_length": 1000, "repetition_penalty": 1.1, "temperature": 0.5, "top_k": 50, "top_p": 0.9, "stream": True, "threads": int(os.cpu_count() / 2) } model_name = "mistralai/Mistral-7B-Instruct-v0.1" # We use Langchain's CTransformers llm class to load our quantized model llm = CTransformers(model=MODEL_PATH, config=config) # Tokenizer for Mistral-7B-Instruct from HuggingFace tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") def greet(input_text): question = input_text prompt = f"""[INST] Le contexte est l'assurance maladie en France[/INST] {question} [INST] Rédige un email courtois de réponse en français à la question [/INST]""" answer = llm(prompt) answer = answer.replace("", "").replace("[Votre nom]", "").replace("[nom]", "") return answer iface = gr.Interface(fn=greet, inputs=["text"], outputs="text") iface.launch()