askbyte commited on
Commit
1d5b573
·
verified ·
1 Parent(s): cec76bf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from flask import Flask, request, jsonify
4
+ from threading import Thread
5
+
6
+ app = Flask(__name__)
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "HuggingFaceH4/zephyr-7b-beta",
11
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
+ device_map="auto"
13
+ )
14
+
15
+ @app.route("/api/chat", methods=["POST"])
16
+ def chat():
17
+ data = request.get_json()
18
+ question = data.get("question", "")
19
+
20
+ prompt = f"Eres BITER, un mentor experto en negocios. Siempre respondes en español con consejos breves y útiles.\nUsuario: {question}\nBITER:"
21
+
22
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
23
+ outputs = model.generate(**inputs, max_new_tokens=200)
24
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ respuesta_final = response.split("BITER:")[-1].strip()
26
+
27
+ return jsonify({"choices": [{"message": {"content": respuesta_final}}]})
28
+
29
+ def run():
30
+ app.run(host='0.0.0.0', port=7860)
31
+
32
+ Thread(target=run).start()