oceansweep commited on
Commit
b3d35a8
·
verified ·
1 Parent(s): e88c10c

Delete App_Function_Libraries/Local_Summarization_Lib.py

Browse files
App_Function_Libraries/Local_Summarization_Lib.py DELETED
@@ -1,721 +0,0 @@
1
- # Local_Summarization_Lib.py
2
- #########################################
3
- # Local Summarization Library
4
- # This library is used to perform summarization with a 'local' inference engine.
5
- #
6
- ####
7
- #
8
- ####################
9
- # Function List
10
- # FIXME - UPDATE Function Arguments
11
- # 1. summarize_with_local_llm(text, custom_prompt_arg)
12
- # 2. summarize_with_llama(api_url, text, token, custom_prompt)
13
- # 3. summarize_with_kobold(api_url, text, kobold_api_token, custom_prompt)
14
- # 4. summarize_with_oobabooga(api_url, text, ooba_api_token, custom_prompt)
15
- # 5. summarize_with_vllm(vllm_api_url, vllm_api_key_function_arg, llm_model, text, vllm_custom_prompt_function_arg)
16
- # 6. summarize_with_tabbyapi(tabby_api_key, tabby_api_IP, text, tabby_model, custom_prompt)
17
- # 7. save_summary_to_file(summary, file_path)
18
- #
19
- ###############################
20
- # Import necessary libraries
21
- import json
22
- import logging
23
- import os
24
- from typing import Union
25
-
26
- import requests
27
- # Import 3rd-party Libraries
28
- from openai import OpenAI
29
- # Import Local
30
- from App_Function_Libraries.Utils import load_and_log_configs
31
- from App_Function_Libraries.Utils import extract_text_from_segments
32
- #
33
- #######################################################################################################################
34
- # Function Definitions
35
- #
36
-
37
- logger = logging.getLogger()
38
-
39
- # Dirty hack for vLLM
40
- openai_api_key = "Fake_key"
41
- client = OpenAI(api_key=openai_api_key)
42
-
43
- # FIXME - temp is not used
44
- def summarize_with_local_llm(input_data, custom_prompt_arg, temp, system_message=None):
45
- try:
46
- if isinstance(input_data, str) and os.path.isfile(input_data):
47
- logging.debug("Local LLM: Loading json data for summarization")
48
- with open(input_data, 'r') as file:
49
- data = json.load(file)
50
- else:
51
- logging.debug("openai: Using provided string data for summarization")
52
- data = input_data
53
-
54
- logging.debug(f"Local LLM: Loaded data: {data}")
55
- logging.debug(f"Local LLM: Type of data: {type(data)}")
56
-
57
- if isinstance(data, dict) and 'summary' in data:
58
- # If the loaded data is a dictionary and already contains a summary, return it
59
- logging.debug("Local LLM: Summary already exists in the loaded data")
60
- return data['summary']
61
-
62
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
63
- if isinstance(data, list):
64
- segments = data
65
- text = extract_text_from_segments(segments)
66
- elif isinstance(data, str):
67
- text = data
68
- else:
69
- raise ValueError("Invalid input data format")
70
-
71
- if system_message is None:
72
- system_message = "You are a helpful AI assistant."
73
-
74
- headers = {
75
- 'Content-Type': 'application/json'
76
- }
77
-
78
- logging.debug("Local LLM: Preparing data + prompt for submittal")
79
- local_llm_prompt = f"{text} \n\n\n\n{custom_prompt_arg}"
80
- data = {
81
- "messages": [
82
- {
83
- "role": "system",
84
- "content": system_message
85
- },
86
- {
87
- "role": "user",
88
- "content": local_llm_prompt
89
- }
90
- ],
91
- "max_tokens": 28000, # Adjust tokens as needed
92
- }
93
- logging.debug("Local LLM: Posting request")
94
- response = requests.post('http://127.0.0.1:8080/v1/chat/completions', headers=headers, json=data)
95
-
96
- if response.status_code == 200:
97
- response_data = response.json()
98
- if 'choices' in response_data and len(response_data['choices']) > 0:
99
- summary = response_data['choices'][0]['message']['content'].strip()
100
- logging.debug("Local LLM: Summarization successful")
101
- print("Local LLM: Summarization successful.")
102
- return summary
103
- else:
104
- logging.warning("Local LLM: Summary not found in the response data")
105
- return "Local LLM: Summary not available"
106
- else:
107
- logging.debug("Local LLM: Summarization failed")
108
- print("Local LLM: Failed to process summary:", response.text)
109
- return "Local LLM: Failed to process summary"
110
- except Exception as e:
111
- logging.debug("Local LLM: Error in processing: %s", str(e))
112
- print("Error occurred while processing summary with Local LLM:", str(e))
113
- return "Local LLM: Error occurred while processing summary"
114
-
115
-
116
- def summarize_with_llama(input_data, custom_prompt, api_url="http://127.0.0.1:8080/completion", api_key=None, temp=None, system_message=None):
117
- try:
118
- logging.debug("Llama.cpp: Loading and validating configurations")
119
- loaded_config_data = load_and_log_configs()
120
- if loaded_config_data is None:
121
- logging.error("Failed to load configuration data")
122
- llama_api_key = None
123
- else:
124
- # Prioritize the API key passed as a parameter
125
- if api_key and api_key.strip():
126
- llama_api_key = api_key
127
- logging.info("Llama.cpp: Using API key provided as parameter")
128
- else:
129
- # If no parameter is provided, use the key from the config
130
- llama_api_key = loaded_config_data['api_keys'].get('llama')
131
- if llama_api_key:
132
- logging.info("Llama.cpp: Using API key from config file")
133
- else:
134
- logging.warning("Llama.cpp: No API key found in config file")
135
-
136
- # Load transcript
137
- logging.debug("llama.cpp: Loading JSON data")
138
- if isinstance(input_data, str) and os.path.isfile(input_data):
139
- logging.debug("Llama.cpp: Loading json data for summarization")
140
- with open(input_data, 'r') as file:
141
- data = json.load(file)
142
- else:
143
- logging.debug("Llama.cpp: Using provided string data for summarization")
144
- data = input_data
145
-
146
- logging.debug(f"Llama.cpp: Loaded data: {data}")
147
- logging.debug(f"Llama.cpp: Type of data: {type(data)}")
148
-
149
- if isinstance(data, dict) and 'summary' in data:
150
- # If the loaded data is a dictionary and already contains a summary, return it
151
- logging.debug("Llama.cpp: Summary already exists in the loaded data")
152
- return data['summary']
153
-
154
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
155
- if isinstance(data, list):
156
- segments = data
157
- text = extract_text_from_segments(segments)
158
- elif isinstance(data, str):
159
- text = data
160
- else:
161
- raise ValueError("Llama.cpp: Invalid input data format")
162
-
163
- headers = {
164
- 'accept': 'application/json',
165
- 'content-type': 'application/json',
166
- }
167
- if len(api_key) > 5:
168
- headers['Authorization'] = f'Bearer {api_key}'
169
-
170
- llama_prompt = f"{custom_prompt} \n\n\n\n{text}"
171
- if system_message is None:
172
- system_message = "You are a helpful AI assistant."
173
- logging.debug("llama: Prompt being sent is {llama_prompt}")
174
- if system_message is None:
175
- system_message = "You are a helpful AI assistant."
176
-
177
- data = {
178
- "messages": [
179
- {"role": "system", "content": system_message},
180
- {"role": "user", "content": llama_prompt}
181
- ],
182
- "max_tokens": 4096,
183
- "temperature": temp
184
- }
185
-
186
- logging.debug("llama: Submitting request to API endpoint")
187
- print("llama: Submitting request to API endpoint")
188
- response = requests.post(api_url, headers=headers, json=data)
189
- response_data = response.json()
190
- logging.debug("API Response Data: %s", response_data)
191
-
192
- if response.status_code == 200:
193
- # if 'X' in response_data:
194
- logging.debug(response_data)
195
- summary = response_data['content'].strip()
196
- logging.debug("llama: Summarization successful")
197
- print("Summarization successful.")
198
- return summary
199
- else:
200
- logging.error(f"Llama: API request failed with status code {response.status_code}: {response.text}")
201
- return f"Llama: API request failed: {response.text}"
202
-
203
- except Exception as e:
204
- logging.error("Llama: Error in processing: %s", str(e))
205
- return f"Llama: Error occurred while processing summary with llama: {str(e)}"
206
-
207
-
208
- # https://lite.koboldai.net/koboldcpp_api#/api%2Fv1/post_api_v1_generate
209
- def summarize_with_kobold(input_data, api_key, custom_prompt_input, kobold_api_ip="http://127.0.0.1:5001/api/v1/generate", temp=None, system_message=None):
210
- logging.debug("Kobold: Summarization process starting...")
211
- try:
212
- logging.debug("Kobold: Loading and validating configurations")
213
- loaded_config_data = load_and_log_configs()
214
- if loaded_config_data is None:
215
- logging.error("Failed to load configuration data")
216
- kobold_api_key = None
217
- else:
218
- # Prioritize the API key passed as a parameter
219
- if api_key and api_key.strip():
220
- kobold_api_key = api_key
221
- logging.info("Kobold: Using API key provided as parameter")
222
- else:
223
- # If no parameter is provided, use the key from the config
224
- kobold_api_key = loaded_config_data['api_keys'].get('kobold')
225
- if kobold_api_key:
226
- logging.info("Kobold: Using API key from config file")
227
- else:
228
- logging.warning("Kobold: No API key found in config file")
229
-
230
- logging.debug(f"Kobold: Using API Key: {kobold_api_key[:5]}...{kobold_api_key[-5:]}")
231
-
232
- if isinstance(input_data, str) and os.path.isfile(input_data):
233
- logging.debug("Kobold.cpp: Loading json data for summarization")
234
- with open(input_data, 'r') as file:
235
- data = json.load(file)
236
- else:
237
- logging.debug("Kobold.cpp: Using provided string data for summarization")
238
- data = input_data
239
-
240
- logging.debug(f"Kobold.cpp: Loaded data: {data}")
241
- logging.debug(f"Kobold.cpp: Type of data: {type(data)}")
242
-
243
- if isinstance(data, dict) and 'summary' in data:
244
- # If the loaded data is a dictionary and already contains a summary, return it
245
- logging.debug("Kobold.cpp: Summary already exists in the loaded data")
246
- return data['summary']
247
-
248
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
249
- if isinstance(data, list):
250
- segments = data
251
- text = extract_text_from_segments(segments)
252
- elif isinstance(data, str):
253
- text = data
254
- else:
255
- raise ValueError("Kobold.cpp: Invalid input data format")
256
-
257
- headers = {
258
- 'accept': 'application/json',
259
- 'content-type': 'application/json',
260
- }
261
-
262
- kobold_prompt = f"{custom_prompt_input}\n\n\n\n{text}"
263
- logging.debug("kobold: Prompt being sent is {kobold_prompt}")
264
-
265
- # FIXME
266
- # Values literally c/p from the api docs....
267
- data = {
268
- "max_context_length": 8096,
269
- "max_length": 4096,
270
- "prompt": kobold_prompt,
271
- "temperature": 0.7,
272
- #"top_p": 0.9,
273
- #"top_k": 100
274
- #"rep_penalty": 1.0,
275
- }
276
-
277
- logging.debug("kobold: Submitting request to API endpoint")
278
- print("kobold: Submitting request to API endpoint")
279
- kobold_api_ip = loaded_config_data['local_api_ip']['kobold']
280
- try:
281
- response = requests.post(kobold_api_ip, headers=headers, json=data)
282
- logging.debug("kobold: API Response Status Code: %d", response.status_code)
283
-
284
- if response.status_code == 200:
285
- try:
286
- response_data = response.json()
287
- logging.debug("kobold: API Response Data: %s", response_data)
288
-
289
- if response_data and 'results' in response_data and len(response_data['results']) > 0:
290
- summary = response_data['results'][0]['text'].strip()
291
- logging.debug("kobold: Summarization successful")
292
- return summary
293
- else:
294
- logging.error("Expected data not found in API response.")
295
- return "Expected data not found in API response."
296
- except ValueError as e:
297
- logging.error("kobold: Error parsing JSON response: %s", str(e))
298
- return f"Error parsing JSON response: {str(e)}"
299
- else:
300
- logging.error(f"kobold: API request failed with status code {response.status_code}: {response.text}")
301
- return f"kobold: API request failed: {response.text}"
302
- except Exception as e:
303
- logging.error("kobold: Error in processing: %s", str(e))
304
- return f"kobold: Error occurred while processing summary with kobold: {str(e)}"
305
- except Exception as e:
306
- logging.error("kobold: Error in processing: %s", str(e))
307
- return f"kobold: Error occurred while processing summary with kobold: {str(e)}"
308
-
309
-
310
- # https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API
311
- def summarize_with_oobabooga(input_data, api_key, custom_prompt, api_url="http://127.0.0.1:5000/v1/chat/completions", temp=None, system_message=None):
312
- logging.debug("Oobabooga: Summarization process starting...")
313
- try:
314
- logging.debug("Oobabooga: Loading and validating configurations")
315
- loaded_config_data = load_and_log_configs()
316
- if loaded_config_data is None:
317
- logging.error("Failed to load configuration data")
318
- ooba_api_key = None
319
- else:
320
- # Prioritize the API key passed as a parameter
321
- if api_key and api_key.strip():
322
- ooba_api_key = api_key
323
- logging.info("Oobabooga: Using API key provided as parameter")
324
- else:
325
- # If no parameter is provided, use the key from the config
326
- ooba_api_key = loaded_config_data['api_keys'].get('ooba')
327
- if ooba_api_key:
328
- logging.info("Anthropic: Using API key from config file")
329
- else:
330
- logging.warning("Anthropic: No API key found in config file")
331
-
332
- logging.debug(f"Oobabooga: Using API Key: {ooba_api_key[:5]}...{ooba_api_key[-5:]}")
333
-
334
- if isinstance(input_data, str) and os.path.isfile(input_data):
335
- logging.debug("Oobabooga: Loading json data for summarization")
336
- with open(input_data, 'r') as file:
337
- data = json.load(file)
338
- else:
339
- logging.debug("Oobabooga: Using provided string data for summarization")
340
- data = input_data
341
-
342
- logging.debug(f"Oobabooga: Loaded data: {data}")
343
- logging.debug(f"Oobabooga: Type of data: {type(data)}")
344
-
345
- if isinstance(data, dict) and 'summary' in data:
346
- # If the loaded data is a dictionary and already contains a summary, return it
347
- logging.debug("Oobabooga: Summary already exists in the loaded data")
348
- return data['summary']
349
-
350
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
351
- if isinstance(data, list):
352
- segments = data
353
- text = extract_text_from_segments(segments)
354
- elif isinstance(data, str):
355
- text = data
356
- else:
357
- raise ValueError("Invalid input data format")
358
-
359
- headers = {
360
- 'accept': 'application/json',
361
- 'content-type': 'application/json',
362
- }
363
-
364
- # prompt_text = "I like to eat cake and bake cakes. I am a baker. I work in a French bakery baking cakes. It
365
- # is a fun job. I have been baking cakes for ten years. I also bake lots of other baked goods, but cakes are
366
- # my favorite." prompt_text += f"\n\n{text}" # Uncomment this line if you want to include the text variable
367
- ooba_prompt = f"{text}" + f"\n\n\n\n{custom_prompt}"
368
- logging.debug("ooba: Prompt being sent is {ooba_prompt}")
369
-
370
- if system_message is None:
371
- system_message = "You are a helpful AI assistant."
372
-
373
- data = {
374
- "mode": "chat",
375
- "character": "Example",
376
- "messages": [{"role": "user", "content": ooba_prompt}],
377
- "system_message": system_message,
378
- }
379
-
380
- logging.debug("ooba: Submitting request to API endpoint")
381
- print("ooba: Submitting request to API endpoint")
382
- response = requests.post(api_url, headers=headers, json=data, verify=False)
383
- logging.debug("ooba: API Response Data: %s", response)
384
-
385
- if response.status_code == 200:
386
- response_data = response.json()
387
- summary = response.json()['choices'][0]['message']['content']
388
- logging.debug("ooba: Summarization successful")
389
- print("Summarization successful.")
390
- return summary
391
- else:
392
- logging.error(f"oobabooga: API request failed with status code {response.status_code}: {response.text}")
393
- return f"ooba: API request failed with status code {response.status_code}: {response.text}"
394
-
395
- except Exception as e:
396
- logging.error("ooba: Error in processing: %s", str(e))
397
- return f"ooba: Error occurred while processing summary with oobabooga: {str(e)}"
398
-
399
-
400
-
401
- def summarize_with_tabbyapi(input_data, custom_prompt_input, api_key=None, api_IP="http://127.0.0.1:5000/v1/chat/completions", temp=None, system_message=None):
402
- logging.debug("TabbyAPI: Summarization process starting...")
403
- try:
404
- logging.debug("TabbyAPI: Loading and validating configurations")
405
- loaded_config_data = load_and_log_configs()
406
- if loaded_config_data is None:
407
- logging.error("Failed to load configuration data")
408
- tabby_api_key = None
409
- else:
410
- # Prioritize the API key passed as a parameter
411
- if api_key and api_key.strip():
412
- tabby_api_key = api_key
413
- logging.info("TabbyAPI: Using API key provided as parameter")
414
- else:
415
- # If no parameter is provided, use the key from the config
416
- tabby_api_key = loaded_config_data['api_keys'].get('tabby')
417
- if tabby_api_key:
418
- logging.info("TabbyAPI: Using API key from config file")
419
- else:
420
- logging.warning("TabbyAPI: No API key found in config file")
421
-
422
- tabby_api_ip = loaded_config_data['local_api_ip']['tabby']
423
- tabby_model = loaded_config_data['models']['tabby']
424
- if temp is None:
425
- temp = 0.7
426
-
427
- logging.debug(f"TabbyAPI: Using API Key: {tabby_api_key[:5]}...{tabby_api_key[-5:]}")
428
-
429
- if isinstance(input_data, str) and os.path.isfile(input_data):
430
- logging.debug("tabby: Loading json data for summarization")
431
- with open(input_data, 'r') as file:
432
- data = json.load(file)
433
- else:
434
- logging.debug("tabby: Using provided string data for summarization")
435
- data = input_data
436
-
437
- logging.debug(f"tabby: Loaded data: {data}")
438
- logging.debug(f"tabby: Type of data: {type(data)}")
439
-
440
- if isinstance(data, dict) and 'summary' in data:
441
- # If the loaded data is a dictionary and already contains a summary, return it
442
- logging.debug("tabby: Summary already exists in the loaded data")
443
- return data['summary']
444
-
445
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
446
- if isinstance(data, list):
447
- segments = data
448
- text = extract_text_from_segments(segments)
449
- elif isinstance(data, str):
450
- text = data
451
- else:
452
- raise ValueError("Invalid input data format")
453
- if system_message is None:
454
- system_message = "You are a helpful AI assistant."
455
-
456
- headers = {
457
- 'Authorization': f'Bearer {api_key}',
458
- 'Content-Type': 'application/json'
459
- }
460
- data2 = {
461
- 'max_tokens': 4096,
462
- "min_tokens": 0,
463
- 'temperature': temp,
464
- #'top_p': 1.0,
465
- #'top_k': 0,
466
- #'frequency_penalty': 0,
467
- #'presence_penalty': 0.0,
468
- #"repetition_penalty": 1.0,
469
- 'model': tabby_model,
470
- 'user': custom_prompt_input,
471
- 'messages': input_data
472
- }
473
-
474
- response = requests.post(tabby_api_ip, headers=headers, json=data2)
475
-
476
- if response.status_code == 200:
477
- response_json = response.json()
478
-
479
- # Validate the response structure
480
- if all(key in response_json for key in ['id', 'choices', 'created', 'model', 'object', 'usage']):
481
- logging.info("TabbyAPI: Received a valid 200 response")
482
- summary = response_json['choices'][0].get('message', {}).get('content', '')
483
- return summary
484
- else:
485
- logging.error("TabbyAPI: Received a 200 response, but the structure is invalid")
486
- return "Error: Received an invalid response structure from TabbyAPI."
487
-
488
- elif response.status_code == 422:
489
- logging.error(f"TabbyAPI: Received a 422 error. Details: {response.json()}")
490
- return "Error: Invalid request sent to TabbyAPI."
491
-
492
- else:
493
- response.raise_for_status() # This will raise an exception for other status codes
494
-
495
- except requests.exceptions.RequestException as e:
496
- logging.error(f"Error summarizing with TabbyAPI: {e}")
497
- return f"Error summarizing with TabbyAPI: {str(e)}"
498
- except json.JSONDecodeError:
499
- logging.error("TabbyAPI: Received an invalid JSON response")
500
- return "Error: Received an invalid JSON response from TabbyAPI."
501
- except Exception as e:
502
- logging.error(f"Unexpected error in summarize_with_tabbyapi: {e}")
503
- return f"Unexpected error in summarization process: {str(e)}"
504
-
505
- def summarize_with_vllm(
506
- input_data: Union[str, dict, list],
507
- custom_prompt_input: str,
508
- api_key: str = None,
509
- vllm_api_url: str = "http://127.0.0.1:8000/v1/chat/completions",
510
- model: str = None,
511
- system_prompt: str = None,
512
- temp: float = 0.7
513
- ) -> str:
514
- logging.debug("vLLM: Summarization process starting...")
515
- try:
516
- logging.debug("vLLM: Loading and validating configurations")
517
- loaded_config_data = load_and_log_configs()
518
- if loaded_config_data is None:
519
- logging.error("Failed to load configuration data")
520
- vllm_api_key = None
521
- else:
522
- # Prioritize the API key passed as a parameter
523
- if api_key and api_key.strip():
524
- vllm_api_key = api_key
525
- logging.info("vLLM: Using API key provided as parameter")
526
- else:
527
- # If no parameter is provided, use the key from the config
528
- vllm_api_key = loaded_config_data['api_keys'].get('vllm')
529
- if vllm_api_key:
530
- logging.info("vLLM: Using API key from config file")
531
- else:
532
- logging.warning("vLLM: No API key found in config file")
533
-
534
- logging.debug(f"vLLM: Using API Key: {vllm_api_key[:5]}...{vllm_api_key[-5:]}")
535
- # Process input data
536
- if isinstance(input_data, str) and os.path.isfile(input_data):
537
- logging.debug("vLLM: Loading json data for summarization")
538
- with open(input_data, 'r') as file:
539
- data = json.load(file)
540
- else:
541
- logging.debug("vLLM: Using provided data for summarization")
542
- data = input_data
543
-
544
- logging.debug(f"vLLM: Type of data: {type(data)}")
545
-
546
- # Extract text for summarization
547
- if isinstance(data, dict) and 'summary' in data:
548
- logging.debug("vLLM: Summary already exists in the loaded data")
549
- return data['summary']
550
- elif isinstance(data, list):
551
- text = extract_text_from_segments(data)
552
- elif isinstance(data, str):
553
- text = data
554
- elif isinstance(data, dict):
555
- text = json.dumps(data)
556
- else:
557
- raise ValueError("Invalid input data format")
558
-
559
- logging.debug(f"vLLM: Extracted text (showing first 500 chars): {text[:500]}...")
560
-
561
- if system_prompt is None:
562
- system_prompt = "You are a helpful AI assistant."
563
-
564
- model = model or loaded_config_data['models']['vllm']
565
- if system_prompt is None:
566
- system_prompt = "You are a helpful AI assistant."
567
-
568
- # Prepare the API request
569
- headers = {
570
- "Content-Type": "application/json"
571
- }
572
-
573
- payload = {
574
- "model": model,
575
- "messages": [
576
- {"role": "system", "content": system_prompt},
577
- {"role": "user", "content": f"{custom_prompt_input}\n\n{text}"}
578
- ]
579
- }
580
-
581
- # Make the API call
582
- logging.debug(f"vLLM: Sending request to {vllm_api_url}")
583
- response = requests.post(vllm_api_url, headers=headers, json=payload)
584
-
585
- # Check for successful response
586
- response.raise_for_status()
587
-
588
- # Extract and return the summary
589
- response_data = response.json()
590
- if 'choices' in response_data and len(response_data['choices']) > 0:
591
- summary = response_data['choices'][0]['message']['content']
592
- logging.debug("vLLM: Summarization successful")
593
- logging.debug(f"vLLM: Summary (first 500 chars): {summary[:500]}...")
594
- return summary
595
- else:
596
- raise ValueError("Unexpected response format from vLLM API")
597
-
598
- except requests.RequestException as e:
599
- logging.error(f"vLLM: API request failed: {str(e)}")
600
- return f"Error: vLLM API request failed - {str(e)}"
601
- except json.JSONDecodeError as e:
602
- logging.error(f"vLLM: Failed to parse API response: {str(e)}")
603
- return f"Error: Failed to parse vLLM API response - {str(e)}"
604
- except Exception as e:
605
- logging.error(f"vLLM: Unexpected error during summarization: {str(e)}")
606
- return f"Error: Unexpected error during vLLM summarization - {str(e)}"
607
-
608
-
609
- def summarize_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/api/generate", api_key=None, temp=None, system_message=None, model=None):
610
- try:
611
- logging.debug("ollama: Loading and validating configurations")
612
- loaded_config_data = load_and_log_configs()
613
- if loaded_config_data is None:
614
- logging.error("Failed to load configuration data")
615
- ollama_api_key = None
616
- else:
617
- # Prioritize the API key passed as a parameter
618
- if api_key and api_key.strip():
619
- ollama_api_key = api_key
620
- logging.info("Ollama: Using API key provided as parameter")
621
- else:
622
- # If no parameter is provided, use the key from the config
623
- ollama_api_key = loaded_config_data['api_keys'].get('ollama')
624
- if ollama_api_key:
625
- logging.info("Ollama: Using API key from config file")
626
- else:
627
- logging.warning("Ollama: No API key found in config file")
628
-
629
- model = loaded_config_data['models']['ollama']
630
-
631
- # Load transcript
632
- logging.debug("Ollama: Loading JSON data")
633
- if isinstance(input_data, str) and os.path.isfile(input_data):
634
- logging.debug("Ollama: Loading json data for summarization")
635
- with open(input_data, 'r') as file:
636
- data = json.load(file)
637
- else:
638
- logging.debug("Ollama: Using provided string data for summarization")
639
- data = input_data
640
-
641
- logging.debug(f"Ollama: Loaded data: {data}")
642
- logging.debug(f"Ollama: Type of data: {type(data)}")
643
-
644
- if isinstance(data, dict) and 'summary' in data:
645
- # If the loaded data is a dictionary and already contains a summary, return it
646
- logging.debug("Ollama: Summary already exists in the loaded data")
647
- return data['summary']
648
-
649
- # If the loaded data is a list of segment dictionaries or a string, proceed with summarization
650
- if isinstance(data, list):
651
- segments = data
652
- text = extract_text_from_segments(segments)
653
- elif isinstance(data, str):
654
- text = data
655
- else:
656
- raise ValueError("Ollama: Invalid input data format")
657
-
658
- headers = {
659
- 'accept': 'application/json',
660
- 'content-type': 'application/json',
661
- }
662
- if len(ollama_api_key) > 5:
663
- headers['Authorization'] = f'Bearer {ollama_api_key}'
664
-
665
- ollama_prompt = f"{custom_prompt} \n\n\n\n{text}"
666
- if system_message is None:
667
- system_message = "You are a helpful AI assistant."
668
- logging.debug(f"llama: Prompt being sent is {ollama_prompt}")
669
- if system_message is None:
670
- system_message = "You are a helpful AI assistant."
671
-
672
- data = {
673
- "model": model,
674
- "messages": [
675
- {"role": "system",
676
- "content": system_message
677
- },
678
- {"role": "user",
679
- "content": ollama_prompt
680
- }
681
- ],
682
- }
683
-
684
- logging.debug("Ollama: Submitting request to API endpoint")
685
- print("Ollama: Submitting request to API endpoint")
686
- response = requests.post(api_url, headers=headers, json=data)
687
- response_data = response.json()
688
- logging.debug("API Response Data: %s", response_data)
689
-
690
- if response.status_code == 200:
691
- # if 'X' in response_data:
692
- logging.debug(response_data)
693
- summary = response_data['content'].strip()
694
- logging.debug("Ollama: Summarization successful")
695
- print("Summarization successful.")
696
- return summary
697
- else:
698
- logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
699
- return f"Ollama: API request failed: {response.text}"
700
-
701
- except Exception as e:
702
- logging.error("Ollama: Error in processing: %s", str(e))
703
- return f"Ollama: Error occurred while processing summary with ollama: {str(e)}"
704
-
705
-
706
- def save_summary_to_file(summary, file_path):
707
- logging.debug("Now saving summary to file...")
708
- base_name = os.path.splitext(os.path.basename(file_path))[0]
709
- summary_file_path = os.path.join(os.path.dirname(file_path), base_name + '_summary.txt')
710
- os.makedirs(os.path.dirname(summary_file_path), exist_ok=True)
711
- logging.debug("Opening summary file for writing, *segments.json with *_summary.txt")
712
- with open(summary_file_path, 'w') as file:
713
- file.write(summary)
714
- logging.info(f"Summary saved to file: {summary_file_path}")
715
-
716
- #
717
- #
718
- #######################################################################################################################
719
-
720
-
721
-