Spaces:
Running
Running
Commit
·
be7c65c
1
Parent(s):
5ab026b
Update .gitignore, refactor NLP summary functions, and enhance output handling
Browse files- .gitignore +5 -1
- main.py +11 -14
- nlp_summarizer.py +33 -13
- requirements.txt +1 -0
.gitignore
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
__pycache__
|
2 |
.env
|
3 |
-
/downloads
|
|
|
|
|
|
|
|
|
|
1 |
__pycache__
|
2 |
.env
|
3 |
+
/downloads
|
4 |
+
*.html
|
5 |
+
/har*
|
6 |
+
*.json
|
7 |
+
test*
|
main.py
CHANGED
@@ -8,19 +8,13 @@ import time
|
|
8 |
import os
|
9 |
|
10 |
dotenv.load_dotenv()
|
11 |
-
API_KEY = os.getenv("API_KEY")
|
12 |
ACCESS_KEY = os.getenv("ACCESS_KEY")
|
13 |
|
14 |
-
def
|
15 |
-
client = openai.OpenAI(
|
16 |
-
api_key=api_key,
|
17 |
-
base_url="https://api.groq.com/openai/v1",
|
18 |
-
)
|
19 |
-
return client
|
20 |
-
|
21 |
-
def generate_summary(client, corpus):
|
22 |
response = {}
|
23 |
math_summary = generate_math_summary(corpus)
|
|
|
|
|
24 |
if not math_summary:
|
25 |
print("Error generating Math Summary")
|
26 |
response["summary_status"] = "error"
|
@@ -29,7 +23,8 @@ def generate_summary(client, corpus):
|
|
29 |
response["mindmap"] = None
|
30 |
return response
|
31 |
else:
|
32 |
-
response = generate_nlp_summary_and_mindmap(
|
|
|
33 |
return response
|
34 |
|
35 |
def main(url, id, access_key):
|
@@ -38,13 +33,15 @@ def main(url, id, access_key):
|
|
38 |
else:
|
39 |
corpus = extract_text_from_pdf(url, id)
|
40 |
start_time = time.time()
|
41 |
-
|
42 |
-
response = generate_summary(client, corpus)
|
43 |
print(f"Total timetaken: {time.time() - start_time} seconds")
|
44 |
return json.dumps(response, indent=4, ensure_ascii=False)
|
45 |
|
46 |
if __name__ == "__main__":
|
47 |
-
url = "https://arxiv.org/pdf/
|
48 |
id = "123"
|
49 |
access_key = "1234"
|
50 |
-
|
|
|
|
|
|
|
|
8 |
import os
|
9 |
|
10 |
dotenv.load_dotenv()
|
|
|
11 |
ACCESS_KEY = os.getenv("ACCESS_KEY")
|
12 |
|
13 |
+
def generate_summary_mindmap(corpus):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
response = {}
|
15 |
math_summary = generate_math_summary(corpus)
|
16 |
+
# print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"')
|
17 |
+
# exit()
|
18 |
if not math_summary:
|
19 |
print("Error generating Math Summary")
|
20 |
response["summary_status"] = "error"
|
|
|
23 |
response["mindmap"] = None
|
24 |
return response
|
25 |
else:
|
26 |
+
response = generate_nlp_summary_and_mindmap(math_summary)
|
27 |
+
print(len(response))
|
28 |
return response
|
29 |
|
30 |
def main(url, id, access_key):
|
|
|
33 |
else:
|
34 |
corpus = extract_text_from_pdf(url, id)
|
35 |
start_time = time.time()
|
36 |
+
response = generate_summary_mindmap(corpus)
|
|
|
37 |
print(f"Total timetaken: {time.time() - start_time} seconds")
|
38 |
return json.dumps(response, indent=4, ensure_ascii=False)
|
39 |
|
40 |
if __name__ == "__main__":
|
41 |
+
url = "https://arxiv.org/pdf/2412.21024"
|
42 |
id = "123"
|
43 |
access_key = "1234"
|
44 |
+
data = main(url, id, access_key)
|
45 |
+
print(len(data))
|
46 |
+
with open("output.json", "w", encoding='utf-8') as f:
|
47 |
+
json.dump(data, f, ensure_ascii=False, indent=4)
|
nlp_summarizer.py
CHANGED
@@ -1,37 +1,57 @@
|
|
|
|
|
|
1 |
import threading
|
2 |
|
3 |
-
def generate_nlp_summary(
|
4 |
-
print("Generating NLP Summary")
|
5 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
completion = client.chat.completions.create(
|
7 |
-
model="llama-3.
|
8 |
messages=[
|
9 |
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted summaries from scientific research papers."},
|
10 |
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \" {temp_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"'}
|
11 |
-
]
|
12 |
)
|
13 |
-
|
|
|
14 |
except Exception as e:
|
|
|
15 |
return False
|
16 |
|
17 |
-
def generate_nlp_mindmap(
|
18 |
-
print("Generating Mindmap")
|
19 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
completion = client.chat.completions.create(
|
21 |
-
model="llama-3.
|
22 |
messages=[
|
23 |
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted mindmaps from scientific research papers."},
|
24 |
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \"{temp_summary}\".Your output should use the following template:\\n\\n## {{Subtitle01}}\\n- {{Bulletpoint01}}\\n- {{Bulletpoint02}}\\n## {{Subtitle02}}\\n- {{Bulletpoint03}}\\n- {{Bulletpoint04}}\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown. Do not include anything in the response, that is not the part of mindmap.\\n Most Importantly your output must use language \"English\" and each point or pointer should include no more than 9 words.'}
|
25 |
-
]
|
26 |
)
|
27 |
-
|
|
|
28 |
except Exception as e:
|
|
|
29 |
return False
|
30 |
|
31 |
-
def generate_nlp_summary_and_mindmap(
|
32 |
response = {}
|
33 |
def local_generate_nlp_summary():
|
34 |
-
nlp_summary = generate_nlp_summary(
|
35 |
if not nlp_summary:
|
36 |
response["summary_status"] = "error"
|
37 |
response["summary"] = None
|
@@ -39,7 +59,7 @@ def generate_nlp_summary_and_mindmap(client, temp_summary):
|
|
39 |
response["summary_status"] = "success"
|
40 |
response["summary"] = nlp_summary
|
41 |
def local_generate_nlp_mindmap():
|
42 |
-
nlp_mindmap = generate_nlp_mindmap(
|
43 |
if not nlp_mindmap:
|
44 |
response["mindmap_status"] = "error"
|
45 |
response["mindmap"] = None
|
|
|
1 |
+
from g4f.client import Client
|
2 |
+
from g4f.Provider import RetryProvider, Blackbox, MetaAI
|
3 |
import threading
|
4 |
|
5 |
+
def generate_nlp_summary(temp_summary):
|
|
|
6 |
try:
|
7 |
+
client = Client(
|
8 |
+
provider=RetryProvider(
|
9 |
+
providers=[Blackbox, MetaAI],
|
10 |
+
shuffle=True,
|
11 |
+
single_provider_retry=True,
|
12 |
+
max_retries=3,
|
13 |
+
),
|
14 |
+
)
|
15 |
completion = client.chat.completions.create(
|
16 |
+
model="llama-3.1-405b",
|
17 |
messages=[
|
18 |
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted summaries from scientific research papers."},
|
19 |
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \" {temp_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"'}
|
20 |
+
],
|
21 |
)
|
22 |
+
print(completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n"))
|
23 |
+
return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n")
|
24 |
except Exception as e:
|
25 |
+
print(str(e))
|
26 |
return False
|
27 |
|
28 |
+
def generate_nlp_mindmap(temp_summary):
|
|
|
29 |
try:
|
30 |
+
client = Client(
|
31 |
+
provider=RetryProvider(
|
32 |
+
providers=[Blackbox, MetaAI],
|
33 |
+
shuffle=True,
|
34 |
+
single_provider_retry=True,
|
35 |
+
max_retries=3,
|
36 |
+
),
|
37 |
+
)
|
38 |
completion = client.chat.completions.create(
|
39 |
+
model="llama-3.1-405b",
|
40 |
messages=[
|
41 |
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted mindmaps from scientific research papers."},
|
42 |
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \"{temp_summary}\".Your output should use the following template:\\n\\n## {{Subtitle01}}\\n- {{Bulletpoint01}}\\n- {{Bulletpoint02}}\\n## {{Subtitle02}}\\n- {{Bulletpoint03}}\\n- {{Bulletpoint04}}\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown. Do not include anything in the response, that is not the part of mindmap.\\n Most Importantly your output must use language \"English\" and each point or pointer should include no more than 9 words.'}
|
43 |
+
],
|
44 |
)
|
45 |
+
print(completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n"))
|
46 |
+
return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n")
|
47 |
except Exception as e:
|
48 |
+
print(str(e))
|
49 |
return False
|
50 |
|
51 |
+
def generate_nlp_summary_and_mindmap(temp_summary):
|
52 |
response = {}
|
53 |
def local_generate_nlp_summary():
|
54 |
+
nlp_summary = generate_nlp_summary(temp_summary)
|
55 |
if not nlp_summary:
|
56 |
response["summary_status"] = "error"
|
57 |
response["summary"] = None
|
|
|
59 |
response["summary_status"] = "success"
|
60 |
response["summary"] = nlp_summary
|
61 |
def local_generate_nlp_mindmap():
|
62 |
+
nlp_mindmap = generate_nlp_mindmap(temp_summary)
|
63 |
if not nlp_mindmap:
|
64 |
response["mindmap_status"] = "error"
|
65 |
response["mindmap"] = None
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ gradio==5.8.0
|
|
3 |
python-dotenv==1.0.1
|
4 |
pdfplumber==0.11.4
|
5 |
requests==2.32.3
|
|
|
|
3 |
python-dotenv==1.0.1
|
4 |
pdfplumber==0.11.4
|
5 |
requests==2.32.3
|
6 |
+
g4f[all]==0.4.0.2
|