Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,180 @@
|
|
1 |
-
import
|
2 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
This is platform-dependent; works for Windows (cmd) in this example.
|
8 |
-
"""
|
9 |
-
return subprocess.Popen(['start', 'cmd', '/k', command], shell=True)
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
time.sleep(5)
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
run_process('python src\\reference_serve.py')
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
run_process('python src\\app.py')
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
import http.server
|
3 |
+
import socketserver
|
4 |
+
import os
|
5 |
+
import yaml
|
6 |
+
from flask import Flask, request, jsonify
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
8 |
+
import torch
|
9 |
+
import gradio as gr
|
10 |
+
from utils.upload_file import UploadFile
|
11 |
+
from utils.chatbot import ChatBot
|
12 |
+
from utils.ui_settings import UISettings
|
13 |
+
from utils.load_config import LoadConfig
|
14 |
+
from pyprojroot import here
|
15 |
|
16 |
+
# Load the app config
|
17 |
+
with open(here("configs/app_config.yml")) as cfg:
|
18 |
+
app_config = yaml.load(cfg, Loader=yaml.FullLoader)
|
|
|
|
|
|
|
19 |
|
20 |
+
PORT = app_config["serve"]["port"]
|
21 |
+
DIRECTORY1 = app_config["directories"]["data_directory"]
|
22 |
+
DIRECTORY2 = app_config["directories"]["data_directory_2"]
|
23 |
+
|
24 |
+
# ================================
|
25 |
+
# Part 1: Reference Serve Code
|
26 |
+
# ================================
|
27 |
+
class MultiDirectoryHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
|
28 |
+
"""Serve files from multiple directories."""
|
29 |
+
def translate_path(self, path):
|
30 |
+
parts = path.split('/', 2)
|
31 |
+
if len(parts) > 1:
|
32 |
+
first_directory = parts[1]
|
33 |
+
if first_directory == os.path.basename(DIRECTORY1):
|
34 |
+
path = os.path.join(DIRECTORY1, *parts[2:])
|
35 |
+
elif first_directory == os.path.basename(DIRECTORY2):
|
36 |
+
path = os.path.join(DIRECTORY2, *parts[2:])
|
37 |
+
else:
|
38 |
+
file_path1 = os.path.join(DIRECTORY1, first_directory)
|
39 |
+
file_path2 = os.path.join(DIRECTORY2, first_directory)
|
40 |
+
if os.path.isfile(file_path1):
|
41 |
+
return file_path1
|
42 |
+
elif os.path.isfile(file_path2):
|
43 |
+
return file_path2
|
44 |
+
return super().translate_path(path)
|
45 |
+
|
46 |
+
def start_reference_server():
|
47 |
+
with socketserver.TCPServer(("", PORT), MultiDirectoryHTTPRequestHandler) as httpd:
|
48 |
+
print(f"Serving at port {PORT}")
|
49 |
+
httpd.serve_forever()
|
50 |
+
|
51 |
+
# ================================
|
52 |
+
# Part 2: LLM Serve Code
|
53 |
+
# ================================
|
54 |
+
APPCFG = LoadConfig()
|
55 |
+
|
56 |
+
app = Flask(__name__)
|
57 |
+
|
58 |
+
# Load the LLM and tokenizer
|
59 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
60 |
+
APPCFG.llm_engine, token=APPCFG.gemma_token, device=APPCFG.device)
|
61 |
+
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="BioMistral/BioMistral-7B",
|
62 |
+
token=APPCFG.gemma_token,
|
63 |
+
torch_dtype=torch.float16,
|
64 |
+
device_map=APPCFG.device)
|
65 |
+
app_pipeline = pipeline(
|
66 |
+
"text-generation",
|
67 |
+
model=model,
|
68 |
+
tokenizer=tokenizer
|
69 |
+
)
|
70 |
+
|
71 |
+
@app.route("/generate_text", methods=["POST"])
|
72 |
+
def generate_text():
|
73 |
+
data = request.json
|
74 |
+
prompt = data.get("prompt", "")
|
75 |
+
max_new_tokens = data.get("max_new_tokens", 1000)
|
76 |
+
do_sample = data.get("do_sample", True)
|
77 |
+
temperature = data.get("temperature", 0.1)
|
78 |
+
top_k = data.get("top_k", 50)
|
79 |
+
top_p = data.get("top_p", 0.95)
|
80 |
+
|
81 |
+
tokenized_prompt = app_pipeline.tokenizer.apply_chat_template(
|
82 |
+
prompt, tokenize=False, add_generation_prompt=True)
|
83 |
+
outputs = app_pipeline(
|
84 |
+
tokenized_prompt,
|
85 |
+
max_new_tokens=max_new_tokens,
|
86 |
+
do_sample=do_sample,
|
87 |
+
temperature=temperature,
|
88 |
+
top_k=top_k,
|
89 |
+
top_p=top_p
|
90 |
+
)
|
91 |
|
92 |
+
return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]})
|
|
|
93 |
|
94 |
+
def start_llm_server():
|
95 |
+
app.run(debug=False, port=8888)
|
|
|
96 |
|
97 |
+
# ================================
|
98 |
+
# Part 3: Gradio Chatbot Code
|
99 |
+
# ================================
|
100 |
+
def start_gradio_app():
|
101 |
+
with gr.Blocks() as demo:
|
102 |
+
with gr.Tabs():
|
103 |
+
with gr.TabItem("Med-App"):
|
104 |
+
# First row
|
105 |
+
with gr.Row() as row_one:
|
106 |
+
with gr.Column(visible=False) as reference_bar:
|
107 |
+
ref_output = gr.Markdown()
|
108 |
+
with gr.Column() as chatbot_output:
|
109 |
+
chatbot = gr.Chatbot(
|
110 |
+
[], elem_id="chatbot", bubble_full_width=False, height=500,
|
111 |
+
avatar_images=("images/test.png", "images/Gemma-logo.png")
|
112 |
+
)
|
113 |
+
chatbot.like(UISettings.feedback, None, None)
|
114 |
+
|
115 |
+
# Second row
|
116 |
+
with gr.Row():
|
117 |
+
input_txt = gr.Textbox(
|
118 |
+
lines=4, scale=8, placeholder="Enter text and press enter, or upload PDF files"
|
119 |
+
)
|
120 |
+
|
121 |
+
# Third row
|
122 |
+
with gr.Row() as row_two:
|
123 |
+
text_submit_btn = gr.Button(value="Submit text")
|
124 |
+
btn_toggle_sidebar = gr.Button(value="References")
|
125 |
+
upload_btn = gr.UploadButton(
|
126 |
+
"📁 Upload PDF or doc files", file_types=['.pdf', '.doc'], file_count="multiple"
|
127 |
+
)
|
128 |
+
clear_button = gr.ClearButton([input_txt, chatbot])
|
129 |
+
rag_with_dropdown = gr.Dropdown(
|
130 |
+
label="RAG with", choices=["Preprocessed doc", "Upload doc: Process for RAG"], value="Preprocessed doc"
|
131 |
+
)
|
132 |
+
|
133 |
+
# Fourth row
|
134 |
+
with gr.Row() as row_four:
|
135 |
+
temperature_bar = gr.Slider(
|
136 |
+
minimum=0.1, maximum=1, value=0.1, step=0.1, label="Temperature",
|
137 |
+
info="Increasing the temperature will make the model answer more creatively."
|
138 |
+
)
|
139 |
+
top_k = gr.Slider(
|
140 |
+
minimum=0.0, maximum=100.0, step=1, label="top_k", value=50,
|
141 |
+
info="A lower value (e.g. 10) will result in more conservative answers."
|
142 |
+
)
|
143 |
+
top_p = gr.Slider(
|
144 |
+
minimum=0.0, maximum=1.0, step=0.01, label="top_p", value=0.95,
|
145 |
+
info="A lower value will generate more focused and conservative text."
|
146 |
+
)
|
147 |
+
|
148 |
+
# Process uploaded files and text
|
149 |
+
file_msg = upload_btn.upload(
|
150 |
+
fn=UploadFile.process_uploaded_files, inputs=[upload_btn, chatbot, rag_with_dropdown],
|
151 |
+
outputs=[input_txt, chatbot], queue=False
|
152 |
+
)
|
153 |
+
txt_msg = input_txt.submit(
|
154 |
+
fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
|
155 |
+
outputs=[input_txt, chatbot, ref_output], queue=False
|
156 |
+
).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
|
157 |
+
text_submit_btn.click(
|
158 |
+
fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
|
159 |
+
outputs=[input_txt, chatbot, ref_output], queue=False
|
160 |
+
).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
|
161 |
+
|
162 |
+
demo.launch()
|
163 |
+
|
164 |
+
# ================================
|
165 |
+
# Main: Running all services concurrently
|
166 |
+
# ================================
|
167 |
+
if __name__ == "__main__":
|
168 |
+
# Start all services in separate threads
|
169 |
+
reference_server_thread = threading.Thread(target=start_reference_server)
|
170 |
+
llm_server_thread = threading.Thread(target=start_llm_server)
|
171 |
+
gradio_app_thread = threading.Thread(target=start_gradio_app)
|
172 |
|
173 |
+
reference_server_thread.start()
|
174 |
+
llm_server_thread.start()
|
175 |
+
gradio_app_thread.start()
|
|
|
176 |
|
177 |
+
# Keep the main thread alive
|
178 |
+
reference_server_thread.join()
|
179 |
+
llm_server_thread.join()
|
180 |
+
gradio_app_thread.join()
|