xavierbarbier commited on
Commit
cde44e0
·
1 Parent(s): c26fe23

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
3
+ import torch
4
+
5
+ bnb_config = BitsAndBytesConfig(
6
+ load_in_4bit=True,
7
+ bnb_4bit_quant_type="nf4",
8
+ bnb_4bit_use_double_quant=True,
9
+ )
10
+
11
+ model_name = "mistralai/Mistral-7B-Instruct-v0.1"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ load_in_4bit=True,
17
+ quantization_config=bnb_config,
18
+ torch_dtype=torch.bfloat16,
19
+ device_map="auto",
20
+ trust_remote_code=True,
21
+ )
22
+
23
+ device = "cuda"
24
+
25
+ def greet(input_text):
26
+ question = input_text
27
+
28
+ prompt = f"""<s>[INST] Le contexte est l'assurance maladie en France[/INST]
29
+ {question}</s>
30
+ [INST] Rédige un email courtois de réponse en français à la question [/INST]"""
31
+
32
+ messages = [
33
+ {"role": "user", "content": question},
34
+ {"role": "assistant", "content": "le contexte est l'assurance maladie en France"},
35
+ {"role": "user", "content": "Rédige un email courtois de réponse en français à la question"}
36
+ ]
37
+
38
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
39
+
40
+ model_inputs = encodeds.to(device)
41
+
42
+ generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
43
+ decoded = tokenizer.batch_decode(generated_ids)
44
+
45
+ answer = decoded[0].split("[/INST]")[2].replace("</s>", "").replace("[Votre nom]", "").replace("[nom]", "")
46
+
47
+ return answer
48
+
49
+ iface = gr.Interface(fn=greet, inputs=["text"],
50
+ outputs="text")
51
+ iface.launch()