dofbi commited on
Commit
9a04ccb
1 Parent(s): 67d74e3
Files changed (1) hide show
  1. app.py +17 -53
app.py CHANGED
@@ -1,59 +1,23 @@
1
- # Fichier app.py
2
- import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import torch
5
-
6
- # Configuration du modèle
7
- device = "cuda" if torch.cuda.is_available() else "cpu"
8
- model = AutoModelForCausalLM.from_pretrained(
9
- "soynade-research/Oolel-v0.1",
10
- torch_dtype=torch.bfloat16,
11
- device_map="auto" if torch.cuda.is_available() else None
12
- )
13
- tokenizer = AutoTokenizer.from_pretrained("soynade-research/Oolel-v0.1")
14
 
15
- def generate_response(messages, max_new_tokens=1024, temperature=0.1):
16
- text = tokenizer.apply_chat_template(
17
- messages,
18
- tokenize=False,
19
- add_generation_prompt=True
20
- )
21
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
22
- generated_ids = model.generate(
23
- model_inputs.input_ids,
24
- max_new_tokens=max_new_tokens,
25
- temperature=temperature
26
- )
27
-
28
- generated_ids = [
29
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
30
- ]
31
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
32
- return response
33
 
34
- # Configuration de l'interface Gradio
35
- def chat_interface(message, history):
36
- # Convertir l'historique de Gradio au format requis par le modèle
37
- formatted_history = [
38
- {"role": "user" if idx % 2 == 0 else "assistant", "content": msg}
39
- for idx, msg in enumerate(sum(history, []))
40
- ]
41
-
42
- # Ajouter le nouveau message
43
- formatted_history.append({"role": "user", "content": message})
44
-
45
- # Générer la réponse
46
- response = generate_response(formatted_history)
47
-
48
- return response
49
 
50
- # Créer l'interface Gradio
51
- iface = gr.ChatInterface(
52
- fn=chat_interface,
53
- title="Chat avec Oolel",
54
- description="Conversez avec le modèle Oolel",
55
- type="messages"
56
  )
57
 
58
- if __name__ == "__main__":
59
- iface.launch()
 
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import gradio as gr
 
 
 
 
 
 
 
 
 
3
 
4
+ # Charger le modèle
5
+ model_name = "soynade-research/Oolel-v0.1"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Fonction pour générer une réponse
10
+ def generate_response(user_input, max_new_tokens=150, temperature=0.7):
11
+ inputs = tokenizer(user_input, return_tensors="pt").to("cuda")
12
+ outputs = model.generate(inputs.input_ids, max_new_tokens=max_new_tokens, temperature=temperature)
13
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Interface Gradio
16
+ iface = gr.Interface(
17
+ fn=generate_response,
18
+ inputs=[gr.Textbox(label="Message utilisateur"), gr.Slider(50, 500, value=150, label="Nombre max de tokens")],
19
+ outputs="text",
20
+ title="Oolel Chatbot"
21
  )
22
 
23
+ iface.launch()