raannakasturi commited on
Commit
be7c65c
·
1 Parent(s): 5ab026b

Update .gitignore, refactor NLP summary functions, and enhance output handling

Browse files
Files changed (4) hide show
  1. .gitignore +5 -1
  2. main.py +11 -14
  3. nlp_summarizer.py +33 -13
  4. requirements.txt +1 -0
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  __pycache__
2
  .env
3
- /downloads
 
 
 
 
 
1
  __pycache__
2
  .env
3
+ /downloads
4
+ *.html
5
+ /har*
6
+ *.json
7
+ test*
main.py CHANGED
@@ -8,19 +8,13 @@ import time
8
  import os
9
 
10
  dotenv.load_dotenv()
11
- API_KEY = os.getenv("API_KEY")
12
  ACCESS_KEY = os.getenv("ACCESS_KEY")
13
 
14
- def create_client(api_key):
15
- client = openai.OpenAI(
16
- api_key=api_key,
17
- base_url="https://api.groq.com/openai/v1",
18
- )
19
- return client
20
-
21
- def generate_summary(client, corpus):
22
  response = {}
23
  math_summary = generate_math_summary(corpus)
 
 
24
  if not math_summary:
25
  print("Error generating Math Summary")
26
  response["summary_status"] = "error"
@@ -29,7 +23,8 @@ def generate_summary(client, corpus):
29
  response["mindmap"] = None
30
  return response
31
  else:
32
- response = generate_nlp_summary_and_mindmap(client, corpus)
 
33
  return response
34
 
35
  def main(url, id, access_key):
@@ -38,13 +33,15 @@ def main(url, id, access_key):
38
  else:
39
  corpus = extract_text_from_pdf(url, id)
40
  start_time = time.time()
41
- client = create_client(API_KEY)
42
- response = generate_summary(client, corpus)
43
  print(f"Total timetaken: {time.time() - start_time} seconds")
44
  return json.dumps(response, indent=4, ensure_ascii=False)
45
 
46
  if __name__ == "__main__":
47
- url = "https://arxiv.org/pdf/2106.01484"
48
  id = "123"
49
  access_key = "1234"
50
- print(main(url, id, access_key))
 
 
 
 
8
  import os
9
 
10
  dotenv.load_dotenv()
 
11
  ACCESS_KEY = os.getenv("ACCESS_KEY")
12
 
13
+ def generate_summary_mindmap(corpus):
 
 
 
 
 
 
 
14
  response = {}
15
  math_summary = generate_math_summary(corpus)
16
+ # print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"')
17
+ # exit()
18
  if not math_summary:
19
  print("Error generating Math Summary")
20
  response["summary_status"] = "error"
 
23
  response["mindmap"] = None
24
  return response
25
  else:
26
+ response = generate_nlp_summary_and_mindmap(math_summary)
27
+ print(len(response))
28
  return response
29
 
30
  def main(url, id, access_key):
 
33
  else:
34
  corpus = extract_text_from_pdf(url, id)
35
  start_time = time.time()
36
+ response = generate_summary_mindmap(corpus)
 
37
  print(f"Total timetaken: {time.time() - start_time} seconds")
38
  return json.dumps(response, indent=4, ensure_ascii=False)
39
 
40
  if __name__ == "__main__":
41
+ url = "https://arxiv.org/pdf/2412.21024"
42
  id = "123"
43
  access_key = "1234"
44
+ data = main(url, id, access_key)
45
+ print(len(data))
46
+ with open("output.json", "w", encoding='utf-8') as f:
47
+ json.dump(data, f, ensure_ascii=False, indent=4)
nlp_summarizer.py CHANGED
@@ -1,37 +1,57 @@
 
 
1
  import threading
2
 
3
- def generate_nlp_summary(client, temp_summary):
4
- print("Generating NLP Summary")
5
  try:
 
 
 
 
 
 
 
 
6
  completion = client.chat.completions.create(
7
- model="llama-3.2-90b-vision-preview",
8
  messages=[
9
  {"role": "system", "content": "You are a helpful research assistant for generating well-formatted summaries from scientific research papers."},
10
  {"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \" {temp_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"'}
11
- ]
12
  )
13
- return completion.choices[0].message.content.replace("**", "")
 
14
  except Exception as e:
 
15
  return False
16
 
17
- def generate_nlp_mindmap(client, temp_summary):
18
- print("Generating Mindmap")
19
  try:
 
 
 
 
 
 
 
 
20
  completion = client.chat.completions.create(
21
- model="llama-3.2-90b-vision-preview",
22
  messages=[
23
  {"role": "system", "content": "You are a helpful research assistant for generating well-formatted mindmaps from scientific research papers."},
24
  {"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \"{temp_summary}\".Your output should use the following template:\\n\\n## {{Subtitle01}}\\n- {{Bulletpoint01}}\\n- {{Bulletpoint02}}\\n## {{Subtitle02}}\\n- {{Bulletpoint03}}\\n- {{Bulletpoint04}}\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown. Do not include anything in the response, that is not the part of mindmap.\\n Most Importantly your output must use language \"English\" and each point or pointer should include no more than 9 words.'}
25
- ]
26
  )
27
- return completion.choices[0].message.content.replace("**", "")
 
28
  except Exception as e:
 
29
  return False
30
 
31
- def generate_nlp_summary_and_mindmap(client, temp_summary):
32
  response = {}
33
  def local_generate_nlp_summary():
34
- nlp_summary = generate_nlp_summary(client, temp_summary)
35
  if not nlp_summary:
36
  response["summary_status"] = "error"
37
  response["summary"] = None
@@ -39,7 +59,7 @@ def generate_nlp_summary_and_mindmap(client, temp_summary):
39
  response["summary_status"] = "success"
40
  response["summary"] = nlp_summary
41
  def local_generate_nlp_mindmap():
42
- nlp_mindmap = generate_nlp_mindmap(client, temp_summary)
43
  if not nlp_mindmap:
44
  response["mindmap_status"] = "error"
45
  response["mindmap"] = None
 
1
+ from g4f.client import Client
2
+ from g4f.Provider import RetryProvider, Blackbox, MetaAI
3
  import threading
4
 
5
+ def generate_nlp_summary(temp_summary):
 
6
  try:
7
+ client = Client(
8
+ provider=RetryProvider(
9
+ providers=[Blackbox, MetaAI],
10
+ shuffle=True,
11
+ single_provider_retry=True,
12
+ max_retries=3,
13
+ ),
14
+ )
15
  completion = client.chat.completions.create(
16
+ model="llama-3.1-405b",
17
  messages=[
18
  {"role": "system", "content": "You are a helpful research assistant for generating well-formatted summaries from scientific research papers."},
19
  {"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \" {temp_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"'}
20
+ ],
21
  )
22
+ print(completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n"))
23
+ return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n")
24
  except Exception as e:
25
+ print(str(e))
26
  return False
27
 
28
+ def generate_nlp_mindmap(temp_summary):
 
29
  try:
30
+ client = Client(
31
+ provider=RetryProvider(
32
+ providers=[Blackbox, MetaAI],
33
+ shuffle=True,
34
+ single_provider_retry=True,
35
+ max_retries=3,
36
+ ),
37
+ )
38
  completion = client.chat.completions.create(
39
+ model="llama-3.1-405b",
40
  messages=[
41
  {"role": "system", "content": "You are a helpful research assistant for generating well-formatted mindmaps from scientific research papers."},
42
  {"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \"{temp_summary}\".Your output should use the following template:\\n\\n## {{Subtitle01}}\\n- {{Bulletpoint01}}\\n- {{Bulletpoint02}}\\n## {{Subtitle02}}\\n- {{Bulletpoint03}}\\n- {{Bulletpoint04}}\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown. Do not include anything in the response, that is not the part of mindmap.\\n Most Importantly your output must use language \"English\" and each point or pointer should include no more than 9 words.'}
43
+ ],
44
  )
45
+ print(completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n"))
46
+ return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n")
47
  except Exception as e:
48
+ print(str(e))
49
  return False
50
 
51
+ def generate_nlp_summary_and_mindmap(temp_summary):
52
  response = {}
53
  def local_generate_nlp_summary():
54
+ nlp_summary = generate_nlp_summary(temp_summary)
55
  if not nlp_summary:
56
  response["summary_status"] = "error"
57
  response["summary"] = None
 
59
  response["summary_status"] = "success"
60
  response["summary"] = nlp_summary
61
  def local_generate_nlp_mindmap():
62
+ nlp_mindmap = generate_nlp_mindmap(temp_summary)
63
  if not nlp_mindmap:
64
  response["mindmap_status"] = "error"
65
  response["mindmap"] = None
requirements.txt CHANGED
@@ -3,3 +3,4 @@ gradio==5.8.0
3
  python-dotenv==1.0.1
4
  pdfplumber==0.11.4
5
  requests==2.32.3
 
 
3
  python-dotenv==1.0.1
4
  pdfplumber==0.11.4
5
  requests==2.32.3
6
+ g4f[all]==0.4.0.2