ZeusCabanas commited on
Commit
05a057d
·
1 Parent(s): b42d5ea
Files changed (1) hide show
  1. app.py +46 -39
app.py CHANGED
@@ -1,50 +1,57 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from typing import List, Tuple, Dict
 
4
 
5
- client = InferenceClient("AuriLab/gpt-bi-instruct-cesar")
 
 
 
 
6
 
7
- def format_messages(history: List[Tuple[str, str]], system_message: str, user_message: str) -> List[Dict[str, str]]:
8
- messages = [{"role": "system", "content": system_message}]
9
- messages.extend([
10
- {"role": "user" if i % 2 == 0 else "assistant", "content": msg}
11
- for turn in history
12
- for i, msg in enumerate(turn)
13
- if msg
14
- ])
15
- messages.append({"role": "user", "content": user_message})
16
- return messages
17
 
18
- def respond(message: str, history: List[Tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float) -> str:
19
- messages = format_messages(history, system_message, message)
20
- response = ""
 
 
 
21
 
22
- for msg in client.chat_completion(
23
- messages,
24
- max_tokens=max_tokens,
25
- stream=True,
26
- temperature=0.7, # Aumentado para más variedad
27
- top_p=0.85, # Ajustado para mejor balance
28
- ):
29
- token = msg.choices[0].delta.content
30
- response += token
31
- yield response
 
 
 
 
 
 
 
 
 
 
32
 
 
33
  demo = gr.ChatInterface(
34
- respond,
35
- additional_inputs=[
36
- gr.Textbox(
37
- value="""You are a helpful assistant. Follow these rules:
38
- 1. Provide diverse and varied responses
39
- 2. Avoid repeating the same words or phrases
40
- 3. Use synonyms and alternative expressions
41
- 4. Be concise and direct""",
42
- label="System message"
43
- ),
44
- gr.Slider(minimum=1, maximum=256, value=200, step=1, label="Max new tokens"),
45
- gr.Slider(minimum=0.7, maximum=1.2, value=1.0, step=0.1, label="Temperature"),
46
- gr.Slider(minimum=0.1, maximum=1.0, value=0.85, step=0.05, label="Top-p (nucleus sampling)"),
47
- ],
48
  )
49
 
50
  if __name__ == "__main__":
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
 
3
  from typing import List, Tuple, Dict
4
+ import torch
5
 
6
+ # Load model and tokenizer
7
+ model_name = "AuriLab/gpt-bi-instruct-cesar"
8
+ tokenizer_name = "AuriLab/gpt-bi"
9
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
12
+ def format_messages(history: List[Tuple[str, str]], system_message: str, user_message: str) -> str:
13
+ # Format conversation history into a single string
14
+ formatted_prompt = system_message + "\n\n"
15
+ for user, assistant in history:
16
+ if user:
17
+ formatted_prompt += f"User: {user}\n"
18
+ if assistant:
19
+ formatted_prompt += f"Assistant: {assistant}\n"
20
+ formatted_prompt += f"User: {user_message}\nAssistant:"
21
+ return formatted_prompt
22
 
23
+ def respond(message: str, history: List[Tuple[str, str]]) -> str:
24
+ system_message = """You are a helpful assistant. Follow these rules:
25
+ 1. Provide diverse and varied responses
26
+ 2. Avoid repeating the same words or phrases
27
+ 3. Use synonyms and alternative expressions
28
+ 4. Be concise and direct"""
29
 
30
+ prompt = format_messages(history, system_message, message)
31
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
32
+
33
+ # Generate response
34
+ with torch.no_grad():
35
+ outputs = model.generate(
36
+ inputs["input_ids"],
37
+ max_new_tokens=200,
38
+ temperature=0.7,
39
+ top_p=0.85,
40
+ do_sample=True,
41
+ pad_token_id=tokenizer.pad_token_id,
42
+ eos_token_id=tokenizer.eos_token_id,
43
+ )
44
+
45
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ # Extract only the assistant's response
47
+ response = response.split("Assistant:")[-1].strip()
48
+
49
+ return response
50
 
51
+ # Create the Gradio interface with custom title
52
  demo = gr.ChatInterface(
53
+ fn=respond,
54
+ title="Demo GPT-BI instruct",
 
 
 
 
 
 
 
 
 
 
 
 
55
  )
56
 
57
  if __name__ == "__main__":