M4sterStudy commited on
Commit
14c1ab3
verified
1 Parent(s): 65e1e1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
 
5
 
6
  # Autenticar usando el token almacenado como secreto
7
  hf_token = os.getenv("HF_API_TOKEN")
@@ -10,17 +11,21 @@ login(hf_token)
10
  # Cargar el modelo y el tokenizador
11
  model_name = "DeepESP/gpt2-spanish"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForCausalLM.from_pretrained(model_name)
14
 
15
  def chat_with_gpt2_spanish(input_text):
16
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
17
  outputs = model.generate(
18
  **inputs,
19
- max_length=100, # Limitar la longitud de la respuesta
20
- num_beams=1, # Usar solo un haz para velocidad
21
- temperature=0.7, # Ajustar la temperatura para respuestas menos repetitivas
22
- top_p=0.9, # Usar top-p (nucleus sampling) para variedad
23
- no_repeat_ngram_size=2, # Evitar la repetici贸n de n-gramas
24
  early_stopping=True
25
  )
26
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
5
+ import torch
6
 
7
  # Autenticar usando el token almacenado como secreto
8
  hf_token = os.getenv("HF_API_TOKEN")
 
11
  # Cargar el modelo y el tokenizador
12
  model_name = "DeepESP/gpt2-spanish"
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
15
 
16
  def chat_with_gpt2_spanish(input_text):
17
+ # Comprobar si la GPU est谩 disponible
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+ print(f"Using device: {device}")
20
+
21
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
22
  outputs = model.generate(
23
  **inputs,
24
+ max_length=100,
25
+ num_beams=1,
26
+ temperature=0.7,
27
+ top_p=0.9,
28
+ no_repeat_ngram_size=2,
29
  early_stopping=True
30
  )
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)