Kukedlc commited on
Commit
02bb326
verified
1 Parent(s): 4750cfb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import subprocess
3
+ import requests
4
+ import time
5
+ import socket
6
+ import gradio as gr
7
+
8
+ # Funci贸n para verificar si el servidor est谩 activo en el puerto
9
+ def is_server_active(host, port):
10
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
11
+ return s.connect_ex((host, port)) == 0
12
+
13
+ # Descarga y ejecuci贸n del modelo
14
+ url = "https://huggingface.co/TheBloke/firefly-llama2-13B-chat-GGUF/resolve/main/firefly-llama2-13b-chat.Q4_K_M.gguf?download=true"
15
+ response = requests.get(url)
16
+ with open("./model.gguf", mode="wb") as file:
17
+ file.write(response.content)
18
+ print("Model downloaded")
19
+
20
+ # Ejecutar el servidor LLM
21
+ command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
22
+ server_process = subprocess.Popen(command) # Almacenamos el proceso para poder terminarlo m谩s tarde
23
+ print("Model server starting...")
24
+
25
+ # Esperar a que el servidor est茅 activo
26
+ while not is_server_active("0.0.0.0", 2600):
27
+ print("Waiting for server to start...")
28
+ time.sleep(5)
29
+ print("Model server is ready!")
30
+
31
+ def response(message, history):
32
+ url = "http://localhost:2600/v1/completions"
33
+ body = {"prompt": "[INST]"+message+"[/INST]", "max_tokens": 1024, "echo": False, "stream": False}
34
+ response_text = ""
35
+
36
+ try:
37
+ # Eliminado el timeout para esperar indefinidamente
38
+ with requests.post(url, json=body, stream=True) as stream_response:
39
+ for text_chunk in stream_response.iter_content(chunk_size=None):
40
+ text = text_chunk.decode('utf-8')
41
+ print("Respuesta cruda:", text) # Imprimir la respuesta cruda para depuraci贸n
42
+
43
+ if text.startswith("data: "):
44
+ text = text.replace("data: ", "")
45
+ if text.startswith("{") and "choices" in text:
46
+ try:
47
+ response_json = json.loads(text)
48
+ part = response_json["choices"][0]["text"]
49
+ print(part, end="", flush=True)
50
+ response_text += part
51
+ except json.JSONDecodeError as e:
52
+ print("Error al decodificar JSON:", e)
53
+ break
54
+ elif text.strip():
55
+ print("Respuesta no JSON:", text)
56
+ break
57
+ except requests.exceptions.RequestException as e:
58
+ print(f"Error al realizar la solicitud: {e}")
59
+
60
+ yield response_text
61
+
62
+ def cleanup_server():
63
+ print("Closing server...")
64
+ server_process.terminate() # Terminar el proceso del servidor
65
+ server_process.wait() # Esperar a que el proceso termine
66
+ print("Server closed.")
67
+
68
+ # Configurar y lanzar la interfaz de Gradio
69
+ gr_interface = gr.ChatInterface(
70
+ fn=response,
71
+ title="Mistral-7B-Instruct-v0.2-GGUF Chatbot",
72
+ theme='syddharth/gray-minimal'
73
+ )
74
+
75
+ try:
76
+ gr_interface.launch(share=True)
77
+ finally:
78
+ cleanup_server() # Asegurarse de limpiar el servidor al finalizar