Chris4K commited on
Commit
4278cab
·
verified ·
1 Parent(s): 24976d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +392 -159
app.py CHANGED
@@ -1,19 +1,318 @@
1
- # main.py
2
- from fastapi import FastAPI, Depends, HTTPException, BackgroundTasks
3
- from fastapi.middleware.cors import CORSMiddleware
4
- import gradio as gr
5
- from services.chat_service import ChatService
6
- from services.model_service import ModelService
7
- from services.pdf_service import PDFService
8
- from services.data_service import DataService
9
- from services.faq_service import FAQService
10
- from auth.auth_handler import get_api_key
11
- from models.base_models import UserInput, SearchQuery
12
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import asyncio
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Initialize FastAPI app
16
- app = FastAPI()
 
 
 
 
17
 
18
  # Add CORS middleware
19
  app.add_middleware(
@@ -24,165 +323,99 @@ app.add_middleware(
24
  allow_headers=["*"],
25
  )
26
 
 
 
27
 
28
- # Index URLs on app startup
29
  @app.on_event("startup")
30
- async def startup():
31
- print("donee.... ")
32
-
33
-
34
- # Configure logging
35
- logging.basicConfig(
36
- level=logging.INFO,
37
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
38
- handlers=[
39
- logging.FileHandler('chatbot.log'),
40
- logging.StreamHandler()
41
- ]
42
- )
43
- logger = logging.getLogger(__name__)
44
-
45
-
46
-
47
- # Initialize services
48
- model_service = ModelService()
49
- data_service = DataService(model_service)
50
- pdf_service = PDFService(model_service)
51
- faq_service = FAQService(model_service)
52
- chat_service = ChatService(model_service, data_service, pdf_service, faq_service)
53
-
54
- import math
55
- from fastapi.responses import JSONResponse
56
-
57
- # Helper function to sanitize data
58
- def sanitize_response(data):
59
- if isinstance(data, dict):
60
- return {k: sanitize_response(v) for k, v in data.items()}
61
- elif isinstance(data, list):
62
- return [sanitize_response(item) for item in data]
63
- elif isinstance(data, float) and (math.isnan(data) or math.isinf(data)):
64
- return None # Replace NaN/Infinity with None or another default value
65
- return data
66
-
67
- @app.post("/api/chat")
68
- async def chat_endpoint(
69
- background_tasks: BackgroundTasks,
70
- user_input: UserInput,
71
- api_key: str = Depends(get_api_key)
72
- ):
73
  try:
74
- # Call the chat service to get the data
75
- response, updated_history, search_results = await chat_service.chat(
76
- user_input.user_input,
77
- user_input.chat_history
 
 
 
78
  )
79
-
80
- # Build the response dictionary
81
- response_data = {
82
- "status": "success",
83
- "response": response,
84
- "chat_history": updated_history,
85
- "search_results": search_results
86
- }
87
-
88
- # Sanitize the response to ensure JSON compliance
89
- sanitized_data = sanitize_response(response_data)
90
-
91
- # Return the sanitized response
92
- return JSONResponse(content=sanitized_data)
93
-
94
  except Exception as e:
95
- # Log and raise an error with details
96
- logger.error(f"Error in chat endpoint: {e}")
97
- raise HTTPException(status_code=500, detail="An internal server error occurred.")
98
-
99
 
100
- @app.post("/api/search")
101
- async def search_endpoint(
102
- query: SearchQuery,
103
- api_key: str = Depends(get_api_key)
104
- ):
105
  try:
106
- results = await data_service.search(query.query, query.top_k)
107
- # Sanitize the response to ensure JSON compliance
108
- sanitized_data = sanitize_response(results)
109
-
110
- # Return the sanitized response
111
- return JSONResponse(content=sanitized_data)
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  except Exception as e:
114
- logger.error(f"Error in search endpoint: {e}")
115
  raise HTTPException(status_code=500, detail=str(e))
116
 
117
- @app.post("/api/faq/search")
118
- async def faq_search_endpoint(
119
- query: SearchQuery,
120
- api_key: str = Depends(get_api_key)
121
- ):
122
  try:
123
- results = await faq_service.search_faqs(query.query, query.top_k)
124
- return {"results": results}
125
- except Exception as e:
126
- logger.error(f"Error in FAQ search endpoint: {e}")
127
- raise HTTPException(status_code=500, detail=str(e))
128
-
129
- # Gradio interface
130
- def create_gradio_interface():
131
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
132
- gr.Markdown("# 🦙 * Chat Assistant\nFragen Sie nach Produkten, Rezepten und mehr!")
133
-
134
- with gr.Row():
135
- with gr.Column(scale=4):
136
- chat_display = gr.Chatbot(label="Chat-Verlauf", height=400)
137
- user_input = gr.Textbox(
138
- label="Ihre Nachricht",
139
- placeholder="Stellen Sie Ihre Frage...",
140
- lines=2
141
- )
 
 
 
 
 
 
142
 
143
- with gr.Column(scale=2):
144
- with gr.Accordion("Zusätzliche Informationen", open=False):
145
- product_info = gr.JSON(label="Produktdetails")
146
-
147
- with gr.Row():
148
- submit_btn = gr.Button("Senden", variant="primary")
149
- clear_btn = gr.Button("Chat löschen")
150
 
151
- chat_history = gr.State([])
152
-
153
- async def respond(message, history):
154
- response, updated_history, search_results = await chat_service.chat(message, history)
155
- # Convert updated_history to the required format
156
- if isinstance(updated_history[0], dict):
157
- formatted_history = [(item['user_input'], item['response']) for item in updated_history]
158
- elif isinstance(updated_history[0], tuple):
159
- formatted_history = [(item[0], item[1]) for item in updated_history]
160
- else:
161
- raise TypeError("Unexpected structure for updated_history")
162
- #formatted_history = [(item['user_input'], item['response']) for item in updated_history]
163
- return formatted_history, updated_history, search_results
164
-
165
-
166
- submit_btn.click(
167
- respond,
168
- inputs=[user_input, chat_history],
169
- outputs=[chat_display, chat_history, product_info]
170
- )
171
-
172
- clear_btn.click(
173
- lambda: ([], [], None),
174
- outputs=[chat_display, chat_history, product_info]
175
- )
176
-
177
- demo.queue()
178
- return demo
179
 
180
  if __name__ == "__main__":
181
  import uvicorn
182
-
183
- # Create and launch Gradio interface
184
- demo = create_gradio_interface()
185
- demo.launch(server_name="0.0.0.0", server_port=8080)
186
-
187
- # Start FastAPI server
188
- #uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from typing import List, Tuple, Optional, Dict, Any, Union
4
+ from dataclasses import dataclass
5
+ from enum import Enum
 
 
 
 
 
 
6
  import logging
7
+
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ prm_model_path = hf_hub_download(
11
+ repo_id="tensorblock/Llama3.1-8B-PRM-Mistral-Data-GGUF",
12
+ filename="Llama3.1-8B-PRM-Mistral-Data-Q4_K_M.gguf"
13
+ )
14
+
15
+ class GenerationStrategy(str, Enum):
16
+ DEFAULT = "default"
17
+ MAJORITY_VOTING = "majority_voting"
18
+ BEST_OF_N = "best_of_n"
19
+ BEAM_SEARCH = "beam_search"
20
+ DVTS = "dvts"
21
+
22
+ @dataclass
23
+ class GenerationConfig:
24
+ num_samples: int = 5
25
+ depth: int = 3
26
+ breadth: int = 2
27
+ max_history_turns: int = 3
28
+ max_new_tokens: int = 50
29
+ temperature: float = 0.7
30
+ top_p: float = 0.9
31
+ strategy: GenerationStrategy = GenerationStrategy.DEFAULT
32
+
33
+ class LlamaGenerator:
34
+ def __init__(
35
+ self,
36
+ llama_model_name: str,
37
+ prm_model_path: str,
38
+ device: str = None,
39
+ default_generation_config: Optional[GenerationConfig] = None
40
+ ):
41
+ """Initialize the LlamaGenerator with specified models."""
42
+ self.logger = logging.getLogger(__name__)
43
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
44
+ self.default_config = default_generation_config or GenerationConfig()
45
+
46
+ self.logger.info(f"Initializing LlamaGenerator on device: {self.device}")
47
+
48
+ try:
49
+ self._initialize_models(llama_model_name, prm_model_path)
50
+ except Exception as e:
51
+ self.logger.error(f"Failed to initialize models: {str(e)}")
52
+ raise
53
+
54
+ def _initialize_models(self, llama_model_name: str, prm_model_path: str):
55
+ """Initialize models with error handling and logging."""
56
+ # Initialize LLaMA model and tokenizer
57
+ self.llama_tokenizer = AutoTokenizer.from_pretrained(
58
+ llama_model_name,
59
+ padding_side='left',
60
+ trust_remote_code=True
61
+ )
62
+ if self.llama_tokenizer.pad_token is None:
63
+ self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
64
+
65
+ self.llama_model = AutoModelForCausalLM.from_pretrained(
66
+ llama_model_name,
67
+ device_map="auto",
68
+ trust_remote_code=True
69
+ )
70
+
71
+ # Initialize PRM model
72
+ self.prm_model = self._load_quantized_model(prm_model_path)
73
+
74
+ # Enable token streaming
75
+ self.supports_streaming = hasattr(self.llama_model, "streamer")
76
+
77
+ async def generate_stream(
78
+ self,
79
+ prompt: str,
80
+ config: Optional[GenerationConfig] = None
81
+ ) -> AsyncGenerator[str, None]:
82
+ """Stream tokens as they're generated."""
83
+ if not self.supports_streaming:
84
+ raise NotImplementedError("This model doesn't support streaming")
85
+
86
+ config = config or self.default_config
87
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
88
+
89
+ async for token in self.llama_model.streamer(input_ids, **self._get_generation_kwargs(config)):
90
+ yield self.llama_tokenizer.decode([token])
91
+
92
+ def _get_generation_kwargs(self, config: GenerationConfig) -> Dict[str, Any]:
93
+ """Get generation kwargs based on config."""
94
+ return {
95
+ "max_new_tokens": config.max_new_tokens,
96
+ "temperature": config.temperature,
97
+ "top_p": config.top_p,
98
+ "do_sample": config.temperature > 0,
99
+ }
100
+
101
+ def _load_quantized_model(self, model_path: str) -> Llama:
102
+ """Load a quantized GGUF model using llama-cpp-python.
103
+
104
+ Args:
105
+ model_path (str): Path to the GGUF model file
106
+
107
+ Returns:
108
+ Llama: Loaded model instance
109
+ """
110
+ try:
111
+ # Configure GPU layers if CUDA is available
112
+ n_gpu_layers = -1 if torch.cuda.is_available() else 0
113
+
114
+ # Load the model
115
+ model = Llama(
116
+ model_path=model_path,
117
+ n_ctx=2048, # Context window
118
+ n_batch=512, # Batch size for prompt processing
119
+ n_gpu_layers=n_gpu_layers, # Number of layers to offload to GPU
120
+ verbose=False
121
+ )
122
+
123
+ self.logger.info(f"Successfully loaded GGUF model from {model_path}")
124
+ return model
125
+
126
+ except Exception as e:
127
+ self.logger.error(f"Failed to load GGUF model: {str(e)}")
128
+ raise
129
+
130
+ def _score_with_prm(self, text: str) -> float:
131
+ """Score text using the PRM model.
132
+
133
+ Args:
134
+ text (str): Text to score
135
+
136
+ Returns:
137
+ float: Model score
138
+ """
139
+ try:
140
+ # For GGUF models, we need to use the proper scoring interface
141
+ result = self.prm_model.eval(text)
142
+ return result['logprobs'] # Or another appropriate scoring metric
143
+
144
+ except Exception as e:
145
+ self.logger.error(f"Error scoring text with PRM: {str(e)}")
146
+ return float('-inf') # Return very low score on error
147
+
148
+
149
+ def _construct_prompt(
150
+ self,
151
+ context: str,
152
+ user_input: str,
153
+ chat_history: List[Tuple[str, str]],
154
+ max_history_turns: int = 3
155
+ ) -> str:
156
+ """Construct a formatted prompt from the input components."""
157
+ system_message = f"Please assist based on the following context: {context}"
158
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
159
+
160
+ for user_msg, assistant_msg in chat_history[-max_history_turns:]:
161
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
162
+ prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
163
+
164
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|>"
165
+ prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
166
+ return prompt
167
+
168
+ def generate(
169
+ self,
170
+ prompt: str,
171
+ model_kwargs: Dict[str, Any],
172
+ strategy: str = "default",
173
+ num_samples: int = 5,
174
+ depth: int = 3,
175
+ breadth: int = 2
176
+ ) -> str:
177
+ """Generate a response using the specified strategy.
178
+
179
+ Args:
180
+ prompt (str): The input prompt
181
+ model_kwargs (dict): Additional arguments for model.generate()
182
+ strategy (str): Generation strategy ('default', 'majority_voting', 'best_of_n', 'beam_search', 'dvts')
183
+ num_samples (int): Number of samples for applicable strategies
184
+ depth (int): Depth for DVTS strategy
185
+ breadth (int): Breadth for DVTS strategy
186
+
187
+ Returns:
188
+ str: Generated response
189
+ """
190
+ if strategy == "default":
191
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
192
+ output = self.llama_model.generate(input_ids, **model_kwargs)
193
+ return self.llama_tokenizer.decode(output[0], skip_special_tokens=True)
194
+
195
+ elif strategy == "majority_voting":
196
+ outputs = []
197
+ for _ in range(num_samples):
198
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
199
+ output = self.llama_model.generate(input_ids, **model_kwargs)
200
+ outputs.append(self.llama_tokenizer.decode(output[0], skip_special_tokens=True))
201
+ return max(set(outputs), key=outputs.count)
202
+
203
+ elif strategy == "best_of_n":
204
+ scored_outputs = []
205
+ for _ in range(num_samples):
206
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
207
+ output = self.llama_model.generate(input_ids, **model_kwargs)
208
+ response = self.llama_tokenizer.decode(output[0], skip_special_tokens=True)
209
+ score = self.prm_model(**self.llama_tokenizer(response, return_tensors="pt").to(self.device)).logits.mean().item()
210
+ scored_outputs.append((response, score))
211
+ return max(scored_outputs, key=lambda x: x[1])[0]
212
+
213
+ elif strategy == "beam_search":
214
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
215
+ outputs = self.llama_model.generate(
216
+ input_ids,
217
+ num_beams=num_samples,
218
+ num_return_sequences=num_samples,
219
+ **model_kwargs
220
+ )
221
+ return [self.llama_tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
222
+
223
+ elif strategy == "dvts":
224
+ results = []
225
+ for _ in range(breadth):
226
+ input_ids = self.llama_tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
227
+ output = self.llama_model.generate(input_ids, **model_kwargs)
228
+ response = self.llama_tokenizer.decode(output[0], skip_special_tokens=True)
229
+ score = self.prm_model(**self.llama_tokenizer(response, return_tensors="pt").to(self.device)).logits.mean().item()
230
+ results.append((response, score))
231
+
232
+ for _ in range(depth - 1):
233
+ best_responses = sorted(results, key=lambda x: x[1], reverse=True)[:breadth]
234
+ for response, _ in best_responses:
235
+ input_ids = self.llama_tokenizer(response, return_tensors="pt").input_ids.to(self.device)
236
+ output = self.llama_model.generate(input_ids, **model_kwargs)
237
+ extended_response = self.llama_tokenizer.decode(output[0], skip_special_tokens=True)
238
+ score = self.prm_model(**self.llama_tokenizer(extended_response, return_tensors="pt").to(self.device)).logits.mean().item()
239
+ results.append((extended_response, score))
240
+ return max(results, key=lambda x: x[1])[0]
241
+
242
+ else:
243
+ raise ValueError(f"Unknown strategy: {strategy}")
244
+
245
+ def generate_with_context(
246
+ self,
247
+ context: str,
248
+ user_input: str,
249
+ chat_history: List[Tuple[str, str]],
250
+ model_kwargs: Dict[str, Any],
251
+ max_history_turns: int = 3,
252
+ strategy: str = "default",
253
+ num_samples: int = 5,
254
+ depth: int = 3,
255
+ breadth: int = 2
256
+ ) -> str:
257
+ """Generate a response using context and chat history.
258
+
259
+ Args:
260
+ context (str): Context for the conversation
261
+ user_input (str): Current user input
262
+ chat_history (List[Tuple[str, str]]): List of (user, assistant) message pairs
263
+ model_kwargs (dict): Additional arguments for model.generate()
264
+ max_history_turns (int): Maximum number of history turns to include
265
+ strategy (str): Generation strategy
266
+ num_samples (int): Number of samples for applicable strategies
267
+ depth (int): Depth for DVTS strategy
268
+ breadth (int): Breadth for DVTS strategy
269
+
270
+ Returns:
271
+ str: Generated response
272
+ """
273
+ prompt = self._construct_prompt(
274
+ context,
275
+ user_input,
276
+ chat_history,
277
+ max_history_turns
278
+ )
279
+ return self.generate(
280
+ prompt,
281
+ model_kwargs,
282
+ strategy,
283
+ num_samples,
284
+ depth,
285
+ breadth
286
+ )
287
+
288
+ ######################
289
+ #########
290
+ #################
291
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
292
+ from fastapi.middleware.cors import CORSMiddleware
293
+ from pydantic import BaseModel, Field
294
+ from typing import List, Optional, Dict
295
  import asyncio
296
+ import uuid
297
+ from datetime import datetime
298
+ import json
299
+
300
+ class ChatMessage(BaseModel):
301
+ role: str = Field(..., description="Role of the message sender (user/assistant)")
302
+ content: str = Field(..., description="Content of the message")
303
+
304
+ class GenerationRequest(BaseModel):
305
+ context: Optional[str] = Field(None, description="Context for the conversation")
306
+ messages: List[ChatMessage] = Field(..., description="Chat history")
307
+ config: Optional[Dict] = Field(None, description="Generation configuration")
308
+ stream: bool = Field(False, description="Whether to stream the response")
309
 
310
+ class GenerationResponse(BaseModel):
311
+ id: str = Field(..., description="Generation ID")
312
+ content: str = Field(..., description="Generated content")
313
+ created_at: datetime = Field(default_factory=datetime.now)
314
+
315
+ app = FastAPI(title="LLaMA Generation Service")
316
 
317
  # Add CORS middleware
318
  app.add_middleware(
 
323
  allow_headers=["*"],
324
  )
325
 
326
+ # Store generator instance
327
+ generator = None
328
 
 
329
  @app.on_event("startup")
330
+ async def startup_event():
331
+ global generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  try:
333
+ generator = LlamaGenerator(
334
+ llama_model_name="meta-llama/Llama-3.2-1B-Instruct",
335
+ prm_model_path=prm_model_path,
336
+ default_generation_config=GenerationConfig(
337
+ max_new_tokens=100,
338
+ temperature=0.7
339
+ )
340
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  except Exception as e:
342
+ print(f"Failed to initialize generator: {str(e)}")
343
+ raise
 
 
344
 
345
+ @app.post("/generate", response_model=GenerationResponse)
346
+ async def generate(request: GenerationRequest):
347
+ if not generator:
348
+ raise HTTPException(status_code=503, detail="Generator not initialized")
349
+
350
  try:
351
+ # Format chat history
352
+ chat_history = [(msg.role, msg.content) for msg in request.messages[:-1]]
353
+ user_input = request.messages[-1].content
354
+
355
+ # Create generation config
356
+ config = GenerationConfig(**request.config) if request.config else None
357
+
358
+ # Generate response
359
+ response = await asyncio.to_thread(
360
+ generator.generate_with_context,
361
+ context=request.context or "",
362
+ user_input=user_input,
363
+ chat_history=chat_history,
364
+ model_kwargs={}, # Add any model-specific kwargs here
365
+ config=config
366
+ )
367
+
368
+ return GenerationResponse(
369
+ id=str(uuid.uuid4()),
370
+ content=response
371
+ )
372
  except Exception as e:
 
373
  raise HTTPException(status_code=500, detail=str(e))
374
 
375
+ @app.websocket("/generate/stream")
376
+ async def generate_stream(websocket):
377
+ await websocket.accept()
378
+
 
379
  try:
380
+ while True:
381
+ # Receive and parse request
382
+ request_data = await websocket.receive_text()
383
+ request = GenerationRequest.parse_raw(request_data)
384
+
385
+ # Format chat history
386
+ chat_history = [(msg.role, msg.content) for msg in request.messages[:-1]]
387
+ user_input = request.messages[-1].content
388
+
389
+ # Create generation config
390
+ config = GenerationConfig(**request.config) if request.config else None
391
+
392
+ # Stream response
393
+ async for token in generator.generate_stream(
394
+ prompt=generator._construct_prompt(
395
+ context=request.context or "",
396
+ user_input=user_input,
397
+ chat_history=chat_history
398
+ ),
399
+ config=config
400
+ ):
401
+ await websocket.send_text(json.dumps({
402
+ "token": token,
403
+ "finished": False
404
+ }))
405
 
406
+ # Send finished message
407
+ await websocket.send_text(json.dumps({
408
+ "token": "",
409
+ "finished": True
410
+ }))
 
 
411
 
412
+ except Exception as e:
413
+ await websocket.send_text(json.dumps({
414
+ "error": str(e)
415
+ }))
416
+ finally:
417
+ await websocket.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  if __name__ == "__main__":
420
  import uvicorn
421
+ uvicorn.run(app, host="0.0.0.0", port=8000)