oceansweep commited on
Commit
0cc941c
·
verified ·
1 Parent(s): 864a9ff

Delete App_Function_Libraries/Article_Summarization_Lib.py

Browse files
App_Function_Libraries/Article_Summarization_Lib.py DELETED
@@ -1,246 +0,0 @@
1
- # Article_Summarization_Lib.py
2
- #########################################
3
- # Article Summarization Library
4
- # This library is used to handle summarization of articles.
5
-
6
- #
7
- ####
8
- #
9
- ####################
10
- # Function List
11
- #
12
- # 1.
13
- #
14
- ####################
15
- #
16
- # Import necessary libraries
17
- import datetime
18
- from datetime import datetime
19
- import gradio as gr
20
- import json
21
- import os
22
- import logging
23
- import requests
24
- # 3rd-Party Imports
25
- from tqdm import tqdm
26
-
27
- from App_Function_Libraries.Utils.Utils import sanitize_filename
28
- # Local Imports
29
- from Article_Extractor_Lib import scrape_article
30
- from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_llama, summarize_with_oobabooga, summarize_with_tabbyapi, \
31
- summarize_with_vllm, summarize_with_kobold, save_summary_to_file, summarize_with_local_llm
32
- from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, summarize_with_cohere, \
33
- summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, summarize_with_huggingface, \
34
- summarize_with_mistral
35
- from App_Function_Libraries.DB.DB_Manager import ingest_article_to_db
36
- #
37
- #######################################################################################################################
38
- # Function Definitions
39
- #
40
-
41
- def scrape_and_summarize_multiple(urls, custom_prompt_arg, api_name, api_key, keywords, custom_article_titles, system_message=None):
42
- urls = [url.strip() for url in urls.split('\n') if url.strip()]
43
- custom_titles = custom_article_titles.split('\n') if custom_article_titles else []
44
-
45
- results = []
46
- errors = []
47
-
48
- # Create a progress bar
49
- progress = gr.Progress()
50
-
51
- for i, url in tqdm(enumerate(urls), total=len(urls), desc="Processing URLs"):
52
- custom_title = custom_titles[i] if i < len(custom_titles) else None
53
- try:
54
- article = scrape_article(url)
55
- if article and article['extraction_successful']:
56
- if custom_title:
57
- article['title'] = custom_title
58
- results.append(article)
59
- except Exception as e:
60
- error_message = f"Error processing URL {i + 1} ({url}): {str(e)}"
61
- errors.append(error_message)
62
-
63
- # Update progress
64
- progress((i + 1) / len(urls), desc=f"Processed {i + 1}/{len(urls)} URLs")
65
-
66
- if errors:
67
- logging.error("\n".join(errors))
68
-
69
- return results
70
-
71
-
72
- def scrape_and_summarize(url, custom_prompt_arg, api_name, api_key, keywords, custom_article_title, system_message=None):
73
- try:
74
- # Step 1: Scrape the article
75
- article_data = scrape_article(url)
76
- print(f"Scraped Article Data: {article_data}") # Debugging statement
77
- if not article_data:
78
- return "Failed to scrape the article."
79
-
80
- # Use the custom title if provided, otherwise use the scraped title
81
- title = custom_article_title.strip() if custom_article_title else article_data.get('title', 'Untitled')
82
- author = article_data.get('author', 'Unknown')
83
- content = article_data.get('content', '')
84
- ingestion_date = datetime.now().strftime('%Y-%m-%d')
85
-
86
- print(f"Title: {title}, Author: {author}, Content Length: {len(content)}") # Debugging statement
87
-
88
- # Custom system prompt for the article
89
- system_message = system_message or "Act as a professional summarizer and summarize this article."
90
- # Custom prompt for the article
91
- article_custom_prompt = custom_prompt_arg or "Act as a professional summarizer and summarize this article."
92
-
93
- # Step 2: Summarize the article
94
- summary = None
95
- if api_name:
96
- logging.debug(f"Article_Summarizer: Summarization being performed by {api_name}")
97
-
98
- # Sanitize filename for saving the JSON file
99
- sanitized_title = sanitize_filename(title)
100
- json_file_path = os.path.join("Results", f"{sanitized_title}_segments.json")
101
-
102
- with open(json_file_path, 'w') as json_file:
103
- json.dump([{'text': content}], json_file, indent=2)
104
-
105
- # FIXME - Swap out this if/else to use the dedicated function....
106
- try:
107
- if api_name.lower() == 'openai':
108
- # def summarize_with_openai(api_key, input_data, custom_prompt_arg)
109
- summary = summarize_with_openai(api_key, json_file_path, article_custom_prompt, system_message)
110
-
111
- elif api_name.lower() == "anthropic":
112
- # def summarize_with_anthropic(api_key, input_data, model, custom_prompt_arg, max_retries=3, retry_delay=5):
113
- summary = summarize_with_anthropic(api_key, json_file_path, article_custom_prompt, system_message)
114
- elif api_name.lower() == "cohere":
115
- # def summarize_with_cohere(api_key, input_data, model, custom_prompt_arg)
116
- summary = summarize_with_cohere(api_key, json_file_path, article_custom_prompt, system_message)
117
-
118
- elif api_name.lower() == "groq":
119
- logging.debug(f"MAIN: Trying to summarize with groq")
120
- # def summarize_with_groq(api_key, input_data, model, custom_prompt_arg):
121
- summary = summarize_with_groq(api_key, json_file_path, article_custom_prompt, system_message)
122
-
123
- elif api_name.lower() == "openrouter":
124
- logging.debug(f"MAIN: Trying to summarize with OpenRouter")
125
- # def summarize_with_openrouter(api_key, input_data, custom_prompt_arg):
126
- summary = summarize_with_openrouter(api_key, json_file_path, article_custom_prompt, system_message)
127
-
128
- elif api_name.lower() == "deepseek":
129
- logging.debug(f"MAIN: Trying to summarize with DeepSeek")
130
- # def summarize_with_deepseek(api_key, input_data, custom_prompt_arg):
131
- summary = summarize_with_deepseek(api_key, json_file_path, article_custom_prompt, system_message)
132
-
133
- elif api_name.lower() == "mistral":
134
- summary = summarize_with_mistral(api_key, json_file_path, article_custom_prompt, system_message)
135
-
136
- elif api_name.lower() == "llama.cpp":
137
- logging.debug(f"MAIN: Trying to summarize with Llama.cpp")
138
- # def summarize_with_llama(api_url, file_path, token, custom_prompt)
139
- summary = summarize_with_llama(json_file_path, article_custom_prompt, system_message)
140
-
141
- elif api_name.lower() == "kobold":
142
- logging.debug(f"MAIN: Trying to summarize with Kobold.cpp")
143
- # def summarize_with_kobold(input_data, kobold_api_token, custom_prompt_input, api_url):
144
- summary = summarize_with_kobold(json_file_path, api_key, article_custom_prompt, system_message)
145
-
146
- elif api_name.lower() == "ooba":
147
- # def summarize_with_oobabooga(input_data, api_key, custom_prompt, api_url):
148
- summary = summarize_with_oobabooga(json_file_path, api_key, article_custom_prompt, system_message)
149
-
150
- elif api_name.lower() == "tabbyapi":
151
- # def summarize_with_tabbyapi(input_data, tabby_model, custom_prompt_input, api_key=None, api_IP):
152
- summary = summarize_with_tabbyapi(json_file_path, article_custom_prompt, system_message)
153
-
154
- elif api_name.lower() == "vllm":
155
- logging.debug(f"MAIN: Trying to summarize with VLLM")
156
- # def summarize_with_vllm(api_key, input_data, custom_prompt_input):
157
- summary = summarize_with_vllm(json_file_path, article_custom_prompt, system_message)
158
-
159
- elif api_name.lower() == "local-llm":
160
- logging.debug(f"MAIN: Trying to summarize with Local LLM")
161
- summary = summarize_with_local_llm(json_file_path, article_custom_prompt, system_message)
162
-
163
- elif api_name.lower() == "huggingface":
164
- logging.debug(f"MAIN: Trying to summarize with huggingface")
165
- # def summarize_with_huggingface(api_key, input_data, custom_prompt_arg):
166
- summarize_with_huggingface(api_key, json_file_path, article_custom_prompt, system_message)
167
- # Add additional API handlers here...
168
- except requests.exceptions.ConnectionError as e:
169
- logging.error(f"Connection error while trying to summarize with {api_name}: {str(e)}")
170
-
171
- if summary:
172
- logging.info(f"Article_Summarizer: Summary generated using {api_name} API")
173
- save_summary_to_file(summary, json_file_path)
174
- else:
175
- summary = "Summary not available"
176
- logging.warning(f"Failed to generate summary using {api_name} API")
177
-
178
- else:
179
- summary = "Article Summarization: No API provided for summarization."
180
-
181
- print(f"Summary: {summary}") # Debugging statement
182
-
183
- # Step 3: Ingest the article into the database
184
- ingestion_result = ingest_article_to_db(url, title, author, content, keywords, summary, ingestion_date,
185
- article_custom_prompt)
186
-
187
- return f"Title: {title}\nAuthor: {author}\nIngestion Result: {ingestion_result}\n\nSummary: {summary}\n\nArticle Contents: {content}"
188
- except Exception as e:
189
- logging.error(f"Error processing URL {url}: {str(e)}")
190
- return f"Failed to process URL {url}: {str(e)}"
191
-
192
-
193
- def scrape_and_no_summarize_then_ingest(url, keywords, custom_article_title):
194
- try:
195
- # Step 1: Scrape the article
196
- article_data = scrape_article(url)
197
- print(f"Scraped Article Data: {article_data}") # Debugging statement
198
- if not article_data:
199
- return "Failed to scrape the article."
200
-
201
- # Use the custom title if provided, otherwise use the scraped title
202
- title = custom_article_title.strip() if custom_article_title else article_data.get('title', 'Untitled')
203
- author = article_data.get('author', 'Unknown')
204
- content = article_data.get('content', '')
205
- ingestion_date = datetime.now().strftime('%Y-%m-%d')
206
-
207
- print(f"Title: {title}, Author: {author}, Content Length: {len(content)}") # Debugging statement
208
-
209
- # Step 2: Ingest the article into the database
210
- ingestion_result = ingest_article_to_db(url, title, author, content, keywords, ingestion_date, None, None)
211
-
212
- return f"Title: {title}\nAuthor: {author}\nIngestion Result: {ingestion_result}\n\nArticle Contents: {content}"
213
- except Exception as e:
214
- logging.error(f"Error processing URL {url}: {str(e)}")
215
- return f"Failed to process URL {url}: {str(e)}"
216
-
217
-
218
- def ingest_unstructured_text(text, custom_prompt, api_name, api_key, keywords, custom_article_title, system_message=None):
219
- title = custom_article_title.strip() if custom_article_title else "Unstructured Text"
220
- author = "Unknown"
221
- ingestion_date = datetime.now().strftime('%Y-%m-%d')
222
-
223
- # Summarize the unstructured text
224
- if api_name:
225
- json_file_path = f"Results/{title.replace(' ', '_')}_segments.json"
226
- with open(json_file_path, 'w') as json_file:
227
- json.dump([{'text': text}], json_file, indent=2)
228
-
229
- if api_name.lower() == 'openai':
230
- summary = summarize_with_openai(api_key, json_file_path, custom_prompt, system_message)
231
- # Add other APIs as needed
232
- else:
233
- summary = "Unsupported API."
234
- else:
235
- summary = "No API provided for summarization."
236
-
237
- # Ingest the unstructured text into the database
238
- ingestion_result = ingest_article_to_db('Unstructured Text', title, author, text, keywords, summary, ingestion_date,
239
- custom_prompt)
240
- return f"Title: {title}\nSummary: {summary}\nIngestion Result: {ingestion_result}"
241
-
242
-
243
-
244
- #
245
- #
246
- #######################################################################################################################