File size: 2,226 Bytes
48f0f78
67ffe93
a90f1c4
a98b950
67ffe93
 
 
 
 
dd832f4
67ffe93
770226f
67ffe93
 
be7c65c
 
67ffe93
 
dd832f4
 
 
 
67ffe93
 
770226f
67ffe93
 
770226f
48f0f78
 
 
 
 
770226f
48f0f78
4a5f826
 
 
be7c65c
4a5f826
8184b96
770226f
8184b96
bdc9122
be7c65c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from extract_text import extract_text_from_pdf
from math_summarizer import generate_math_summary
from nlp_processes import generate_nlp_summary_and_mindmap
import json
import dotenv
import time
import os

dotenv.load_dotenv()
ACCESS_KEY = os.getenv("ACCESS_KEY")

def generate_summary_mindmap(corpus):
    response = {}
    math_summary = generate_math_summary(corpus)
    # print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n    1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n    2.to summarize the text I provided, using up to seven Highlight.\\n    3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n    Your output should use the following template strictly, provide the results for the three tasks:\\n    ## Summary\\n    ## Highlights\\n    - Highlights\\n    ## Key Insights\\n    - Key Insights .\\n  Importantly your output must use language \"English\"')
    # exit()
    if not math_summary:
        print("Error generating Math Summary")
        response["summary_status"] = "error"
        response["summary"] = None
        response["mindmap_status"] = "error"
        response["mindmap"] = None
        return response
    else:
        response = generate_nlp_summary_and_mindmap(math_summary)
        return response

def main(url, id, access_key):
    if access_key != ACCESS_KEY:
        return {"error": "Invalid Access Key", "summary": None, "mindmap": None}
    else:
        corpus = extract_text_from_pdf(url, id)
        start_time = time.time()
        response = generate_summary_mindmap(corpus)
        print(f"Total timetaken: {time.time() - start_time} seconds")
        return json.dumps(response, indent=4, ensure_ascii=False)
    
if __name__ == "__main__":
    url = "https://arxiv.org/pdf/2412.21024"
    id = "123"
    access_key = os.environ.get("ACCESS_KEY")
    data = main(url, id, access_key)
    print((data))
    with open("output.json", "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)