ScientryAPI / main.py
raannakasturi's picture
Refactor summary generation functions to remove unused parameters and simplify the API
770226f
from extract_text import extract_text_from_pdf
from math_summarizer import generate_math_summary
from nlp_processes import generate_nlp_summary_and_mindmap
import json
import dotenv
import time
import os
dotenv.load_dotenv()
ACCESS_KEY = os.getenv("ACCESS_KEY")
def generate_summary_mindmap(corpus):
response = {}
math_summary = generate_math_summary(corpus)
# print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"')
# exit()
if not math_summary:
print("Error generating Math Summary")
response["summary_status"] = "error"
response["summary"] = None
response["mindmap_status"] = "error"
response["mindmap"] = None
return response
else:
response = generate_nlp_summary_and_mindmap(math_summary)
return response
def main(url, id, access_key):
if access_key != ACCESS_KEY:
return {"error": "Invalid Access Key", "summary": None, "mindmap": None}
else:
corpus = extract_text_from_pdf(url, id)
start_time = time.time()
response = generate_summary_mindmap(corpus)
print(f"Total timetaken: {time.time() - start_time} seconds")
return json.dumps(response, indent=4, ensure_ascii=False)
if __name__ == "__main__":
url = "https://arxiv.org/pdf/2412.21024"
id = "123"
access_key = os.environ.get("ACCESS_KEY")
data = main(url, id, access_key)
print((data))
with open("output.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)