titanhacker commited on
Commit
321ea15
·
verified ·
1 Parent(s): fa0aa74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -26
app.py CHANGED
@@ -1,33 +1,180 @@
1
- import subprocess
2
- import time
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def run_process(command):
5
- """
6
- Function to run a command in a separate terminal window.
7
- This is platform-dependent; works for Windows (cmd) in this example.
8
- """
9
- return subprocess.Popen(['start', 'cmd', '/k', command], shell=True)
10
 
11
- if __name__ == "__main__":
12
- # Step 1: Run the data upload script
13
- print("Running upload_data_manually.py...")
14
- subprocess.call('python src\\upload_data_manually.py', shell=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Give some time for the upload script to complete
17
- time.sleep(5)
18
 
19
- # Step 2: Run the reference server
20
- print("Starting reference_serve.py in a new terminal...")
21
- run_process('python src\\reference_serve.py')
22
 
23
- # Step 3: Run the LLM service
24
- time.sleep(2)
25
- print("Starting llm_service.py in a new terminal...")
26
- run_process('python src\\llm_service.py')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Step 4: Run the app
29
- time.sleep(2)
30
- print("Starting app.py in a new terminal...")
31
- run_process('python src\\app.py')
32
 
33
- print("All services are running. Check the separate terminal windows.")
 
 
 
 
1
+ import threading
2
+ import http.server
3
+ import socketserver
4
+ import os
5
+ import yaml
6
+ from flask import Flask, request, jsonify
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
8
+ import torch
9
+ import gradio as gr
10
+ from utils.upload_file import UploadFile
11
+ from utils.chatbot import ChatBot
12
+ from utils.ui_settings import UISettings
13
+ from utils.load_config import LoadConfig
14
+ from pyprojroot import here
15
 
16
+ # Load the app config
17
+ with open(here("configs/app_config.yml")) as cfg:
18
+ app_config = yaml.load(cfg, Loader=yaml.FullLoader)
 
 
 
19
 
20
+ PORT = app_config["serve"]["port"]
21
+ DIRECTORY1 = app_config["directories"]["data_directory"]
22
+ DIRECTORY2 = app_config["directories"]["data_directory_2"]
23
+
24
+ # ================================
25
+ # Part 1: Reference Serve Code
26
+ # ================================
27
+ class MultiDirectoryHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
28
+ """Serve files from multiple directories."""
29
+ def translate_path(self, path):
30
+ parts = path.split('/', 2)
31
+ if len(parts) > 1:
32
+ first_directory = parts[1]
33
+ if first_directory == os.path.basename(DIRECTORY1):
34
+ path = os.path.join(DIRECTORY1, *parts[2:])
35
+ elif first_directory == os.path.basename(DIRECTORY2):
36
+ path = os.path.join(DIRECTORY2, *parts[2:])
37
+ else:
38
+ file_path1 = os.path.join(DIRECTORY1, first_directory)
39
+ file_path2 = os.path.join(DIRECTORY2, first_directory)
40
+ if os.path.isfile(file_path1):
41
+ return file_path1
42
+ elif os.path.isfile(file_path2):
43
+ return file_path2
44
+ return super().translate_path(path)
45
+
46
+ def start_reference_server():
47
+ with socketserver.TCPServer(("", PORT), MultiDirectoryHTTPRequestHandler) as httpd:
48
+ print(f"Serving at port {PORT}")
49
+ httpd.serve_forever()
50
+
51
+ # ================================
52
+ # Part 2: LLM Serve Code
53
+ # ================================
54
+ APPCFG = LoadConfig()
55
+
56
+ app = Flask(__name__)
57
+
58
+ # Load the LLM and tokenizer
59
+ tokenizer = AutoTokenizer.from_pretrained(
60
+ APPCFG.llm_engine, token=APPCFG.gemma_token, device=APPCFG.device)
61
+ model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="BioMistral/BioMistral-7B",
62
+ token=APPCFG.gemma_token,
63
+ torch_dtype=torch.float16,
64
+ device_map=APPCFG.device)
65
+ app_pipeline = pipeline(
66
+ "text-generation",
67
+ model=model,
68
+ tokenizer=tokenizer
69
+ )
70
+
71
+ @app.route("/generate_text", methods=["POST"])
72
+ def generate_text():
73
+ data = request.json
74
+ prompt = data.get("prompt", "")
75
+ max_new_tokens = data.get("max_new_tokens", 1000)
76
+ do_sample = data.get("do_sample", True)
77
+ temperature = data.get("temperature", 0.1)
78
+ top_k = data.get("top_k", 50)
79
+ top_p = data.get("top_p", 0.95)
80
+
81
+ tokenized_prompt = app_pipeline.tokenizer.apply_chat_template(
82
+ prompt, tokenize=False, add_generation_prompt=True)
83
+ outputs = app_pipeline(
84
+ tokenized_prompt,
85
+ max_new_tokens=max_new_tokens,
86
+ do_sample=do_sample,
87
+ temperature=temperature,
88
+ top_k=top_k,
89
+ top_p=top_p
90
+ )
91
 
92
+ return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]})
 
93
 
94
+ def start_llm_server():
95
+ app.run(debug=False, port=8888)
 
96
 
97
+ # ================================
98
+ # Part 3: Gradio Chatbot Code
99
+ # ================================
100
+ def start_gradio_app():
101
+ with gr.Blocks() as demo:
102
+ with gr.Tabs():
103
+ with gr.TabItem("Med-App"):
104
+ # First row
105
+ with gr.Row() as row_one:
106
+ with gr.Column(visible=False) as reference_bar:
107
+ ref_output = gr.Markdown()
108
+ with gr.Column() as chatbot_output:
109
+ chatbot = gr.Chatbot(
110
+ [], elem_id="chatbot", bubble_full_width=False, height=500,
111
+ avatar_images=("images/test.png", "images/Gemma-logo.png")
112
+ )
113
+ chatbot.like(UISettings.feedback, None, None)
114
+
115
+ # Second row
116
+ with gr.Row():
117
+ input_txt = gr.Textbox(
118
+ lines=4, scale=8, placeholder="Enter text and press enter, or upload PDF files"
119
+ )
120
+
121
+ # Third row
122
+ with gr.Row() as row_two:
123
+ text_submit_btn = gr.Button(value="Submit text")
124
+ btn_toggle_sidebar = gr.Button(value="References")
125
+ upload_btn = gr.UploadButton(
126
+ "📁 Upload PDF or doc files", file_types=['.pdf', '.doc'], file_count="multiple"
127
+ )
128
+ clear_button = gr.ClearButton([input_txt, chatbot])
129
+ rag_with_dropdown = gr.Dropdown(
130
+ label="RAG with", choices=["Preprocessed doc", "Upload doc: Process for RAG"], value="Preprocessed doc"
131
+ )
132
+
133
+ # Fourth row
134
+ with gr.Row() as row_four:
135
+ temperature_bar = gr.Slider(
136
+ minimum=0.1, maximum=1, value=0.1, step=0.1, label="Temperature",
137
+ info="Increasing the temperature will make the model answer more creatively."
138
+ )
139
+ top_k = gr.Slider(
140
+ minimum=0.0, maximum=100.0, step=1, label="top_k", value=50,
141
+ info="A lower value (e.g. 10) will result in more conservative answers."
142
+ )
143
+ top_p = gr.Slider(
144
+ minimum=0.0, maximum=1.0, step=0.01, label="top_p", value=0.95,
145
+ info="A lower value will generate more focused and conservative text."
146
+ )
147
+
148
+ # Process uploaded files and text
149
+ file_msg = upload_btn.upload(
150
+ fn=UploadFile.process_uploaded_files, inputs=[upload_btn, chatbot, rag_with_dropdown],
151
+ outputs=[input_txt, chatbot], queue=False
152
+ )
153
+ txt_msg = input_txt.submit(
154
+ fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
155
+ outputs=[input_txt, chatbot, ref_output], queue=False
156
+ ).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
157
+ text_submit_btn.click(
158
+ fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
159
+ outputs=[input_txt, chatbot, ref_output], queue=False
160
+ ).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
161
+
162
+ demo.launch()
163
+
164
+ # ================================
165
+ # Main: Running all services concurrently
166
+ # ================================
167
+ if __name__ == "__main__":
168
+ # Start all services in separate threads
169
+ reference_server_thread = threading.Thread(target=start_reference_server)
170
+ llm_server_thread = threading.Thread(target=start_llm_server)
171
+ gradio_app_thread = threading.Thread(target=start_gradio_app)
172
 
173
+ reference_server_thread.start()
174
+ llm_server_thread.start()
175
+ gradio_app_thread.start()
 
176
 
177
+ # Keep the main thread alive
178
+ reference_server_thread.join()
179
+ llm_server_thread.join()
180
+ gradio_app_thread.join()