ScientryAPI

Running

App Files Files Community

raannakasturi commited on Jan 12

Commit

770226f

1 Parent(s): 67b6792

Refactor summary generation functions to remove unused parameters and simplify the API

Browse files

Files changed (3) hide show

app.py +5 -7
main.py +5 -7
nlp_processes.py +1 -187

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ def installChrome():
     subprocess.run(['apt-get', 'clean'])
-def rexplore_summarizer(url, title, id, citation, access_key):
-    response = json.loads(main(url, title, id, citation, access_key))
     data = json.dumps(response, ensure_ascii=False, indent=4)
     if response["mindmap_status"] != "success":
         mindmap = "error"
@@ -25,7 +25,7 @@ def rexplore_summarizer(url, title, id, citation, access_key):
         summary = response["summary"]
     return data, summary, mindmap
-def clear_everything(url, title, id, citation, access_key, raw_data, summary, mindmap):
     return None, None, None, None, None, None
 theme = gr.themes.Soft(
@@ -51,9 +51,7 @@ with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as ap
     with gr.Row():
         with gr.Column():
             url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
-            title = gr.Textbox(label="Title", placeholder="Enter the title Research Paper")
             id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the Research Paper")
-            citation = gr.Textbox(label="Citation", placeholder="Enter the citation of the Research Paper")
             access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type="password")
             with gr.Row():
                 clear_btn = gr.Button(value="Clear", variant="stop")
@@ -64,7 +62,7 @@ with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as ap
         mindmap = gr.TextArea(label="Mindmap", placeholder="The generated mindmap will be displayed here", lines=7, interactive=False, show_copy_button=True)
     summarize_btn.click(
         rexplore_summarizer,
-        inputs=[url, title, id, citation, access_key],
         outputs=[raw_data, summary, mindmap],
         concurrency_limit=25,
         scroll_to_output=True,
@@ -72,7 +70,7 @@ with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as ap
         api_name="rexplore_summarizer",
         show_progress="full",
     )
-    clear_btn.click(clear_everything, inputs=[url, title, id, citation, raw_data, summary, mindmap, access_key], outputs=[url, id, raw_data, summary, mindmap, access_key], show_api=False)
 installChrome()
 app.queue(default_concurrency_limit=25).launch(show_api=True, ssr_mode=False)

     subprocess.run(['apt-get', 'clean'])
+def rexplore_summarizer(url, id, access_key):
+    response = json.loads(main(url, id, access_key))
     data = json.dumps(response, ensure_ascii=False, indent=4)
     if response["mindmap_status"] != "success":
         mindmap = "error"
         summary = response["summary"]
     return data, summary, mindmap
+def clear_everything(url, id, access_key, raw_data, summary, mindmap):
     return None, None, None, None, None, None
 theme = gr.themes.Soft(
     with gr.Row():
         with gr.Column():
             url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
             id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the Research Paper")
             access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type="password")
             with gr.Row():
                 clear_btn = gr.Button(value="Clear", variant="stop")
         mindmap = gr.TextArea(label="Mindmap", placeholder="The generated mindmap will be displayed here", lines=7, interactive=False, show_copy_button=True)
     summarize_btn.click(
         rexplore_summarizer,
+        inputs=[url, id, access_key],
         outputs=[raw_data, summary, mindmap],
         concurrency_limit=25,
         scroll_to_output=True,
         api_name="rexplore_summarizer",
         show_progress="full",
     )
+    clear_btn.click(clear_everything, inputs=[url, id, raw_data, summary, mindmap, access_key], outputs=[url, id, raw_data, summary, mindmap, access_key], show_api=False)
 installChrome()
 app.queue(default_concurrency_limit=25).launch(show_api=True, ssr_mode=False)

main.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 dotenv.load_dotenv()
 ACCESS_KEY = os.getenv("ACCESS_KEY")
-def generate_summary_mindmap(corpus, title, citation):
     response = {}
     math_summary = generate_math_summary(corpus)
     # print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n    1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n    2.to summarize the text I provided, using up to seven Highlight.\\n    3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n    Your output should use the following template strictly, provide the results for the three tasks:\\n    ## Summary\\n    ## Highlights\\n    - Highlights\\n    ## Key Insights\\n    - Key Insights .\\n  Importantly your output must use language \"English\"')
@@ -22,26 +22,24 @@ def generate_summary_mindmap(corpus, title, citation):
         response["mindmap"] = None
         return response
     else:
-        response = generate_nlp_summary_and_mindmap(math_summary, title, citation)
         return response
-def main(url, title, id, citation, access_key):
     if access_key != ACCESS_KEY:
         return {"error": "Invalid Access Key", "summary": None, "mindmap": None}
     else:
         corpus = extract_text_from_pdf(url, id)
         start_time = time.time()
-        response = generate_summary_mindmap(corpus, title, citation)
         print(f"Total timetaken: {time.time() - start_time} seconds")
         return json.dumps(response, indent=4, ensure_ascii=False)
 if __name__ == "__main__":
     url = "https://arxiv.org/pdf/2412.21024"
     id = "123"
-    title = "Trading linearity for ellipticity: a nonsmooth approach to Einsteinâs theory of gravity and the Lorentzian splitting theorems"
     access_key = os.environ.get("ACCESS_KEY")
-    citation = "Bykov, D., Krivorol, V., & Kuzovchikov, A. (2024). Oscillator Calculus on Coadjoint Orbits and Index Theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2412.21024"
-    data = main(url, title, id, citation, access_key)
     print((data))
     with open("output.json", "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=4)

 dotenv.load_dotenv()
 ACCESS_KEY = os.getenv("ACCESS_KEY")
+def generate_summary_mindmap(corpus):
     response = {}
     math_summary = generate_math_summary(corpus)
     # print(f'As a text script expert, please help me to write a short text script with the topic \" {math_summary}\".You have three tasks, which are:\\n    1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n    2.to summarize the text I provided, using up to seven Highlight.\\n    3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n    Your output should use the following template strictly, provide the results for the three tasks:\\n    ## Summary\\n    ## Highlights\\n    - Highlights\\n    ## Key Insights\\n    - Key Insights .\\n  Importantly your output must use language \"English\"')
         response["mindmap"] = None
         return response
     else:
+        response = generate_nlp_summary_and_mindmap(math_summary)
         return response
+def main(url, id, access_key):
     if access_key != ACCESS_KEY:
         return {"error": "Invalid Access Key", "summary": None, "mindmap": None}
     else:
         corpus = extract_text_from_pdf(url, id)
         start_time = time.time()
+        response = generate_summary_mindmap(corpus)
         print(f"Total timetaken: {time.time() - start_time} seconds")
         return json.dumps(response, indent=4, ensure_ascii=False)
 if __name__ == "__main__":
     url = "https://arxiv.org/pdf/2412.21024"
     id = "123"
     access_key = os.environ.get("ACCESS_KEY")
+    data = main(url, id, access_key)
     print((data))
     with open("output.json", "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=4)

nlp_processes.py CHANGED Viewed

@@ -87,192 +87,8 @@ def generate_nlp_mindmap(temp_summary):
         print(str(e))
         return False
-def fix_title(title):
-    proxy = get_proxy()
-    while not get_proxy():
-        proxy = get_proxy()
-    try:
-        try:
-            client = Client(
-                provider=RetryProvider(
-                    providers=[Blackbox, Airforce, AmigoChat],
-                    max_retries=4,
-                ),
-            )
-            completion = client.chat.completions.create(
-                proxy=proxy,
-                model="llama-3.1-405b",
-                messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a highly advanced language model with strict adherence to precision and accuracy. \n\n"
-                            "Your task is to process input text, identify and correct any encoded or escaped characters, and render the text into a readable format. \n\n"
-                            "**Requirements:**\n"
-                            "1. Correctly decode and render any encoded characters (e.g., \\x sequences or LaTeX-style expressions) into their intended readable forms.\n"
-                            "2. Accurately interpret and render mathematical expressions using MathJax where appropriate.\n"
-                            "3. Produce **only the corrected sequence** as output—no additional commentary, metadata, or extraneous punctuation.\n"
-                            "4. Maintain the structure and style of the original input text, ensuring it remains true to its intended meaning and formatting.\n\n"
-                            "**Input Example:**\n"
-                            "From Painlev\\xe9 equations to ${\\cal N}=2$ susy gauge theories: prolegomena TDI-$\\\\infty$\n\n"
-                            "**Output Example:**\n"
-                            "From Painlevé equations to \\({\\cal N}=2\\) SUSY gauge theories: prolegomena TDI-\\(\\infty\\)"
-                        ),
-                    },
-                    {
-                        "role": "user",
-                        "content": repr(
-                            "Convert the following text into a normal, readable sequence, ensuring accurate interpretation and correction of encoded or escaped characters where necessary. "
-                            "The output must strictly adhere to the input text's original structure, maintaining readability and formatting. Use MathJax where applicable to correctly render mathematical expressions, ensuring the final sequence is flawless. "
-                            "Provide only the corrected sequence as output, with no additional commentary, formatting, or extraneous punctuation beyond what is specified in the input text.\n\n"
-                            f"**Input:** {title}\n\n"
-                        ),
-                    },
-                ],
-            )
-            return completion.choices[0].message.content
-        except Exception as e:
-            print(str(e))
-            client = Client()
-            completion = client.chat.completions.create(
-                proxy=proxy,
-                provider=RetryProvider(
-                    providers=[Blackbox, ChatGptEs, ChatGpt, Copilot, DDG, Liaobots, Mhystical, PollinationsAI],
-                    max_retries=8,
-                    single_provider_retry=True,
-                ),
-                model="gpt-4",
-                messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a highly advanced language model with strict adherence to precision and accuracy. \n\n"
-                            "Your task is to process input text, identify and correct any encoded or escaped characters, and render the text into a readable format. \n\n"
-                            "**Requirements:**\n"
-                            "1. Correctly decode and render any encoded characters (e.g., \\x sequences or LaTeX-style expressions) into their intended readable forms.\n"
-                            "2. Accurately interpret and render mathematical expressions using MathJax where appropriate.\n"
-                            "3. Produce **only the corrected sequence** as output—no additional commentary, metadata, or extraneous punctuation.\n"
-                            "4. Maintain the structure and style of the original input text, ensuring it remains true to its intended meaning and formatting.\n\n"
-                            "**Input Example:**\n"
-                            "From Painlev\\xe9 equations to ${\\cal N}=2$ susy gauge theories: prolegomena TDI-$\\\\infty$\n\n"
-                            "**Output Example:**\n"
-                            "From Painlevé equations to \\({\\cal N}=2\\) SUSY gauge theories: prolegomena TDI-\\(\\infty\\)"
-                        ),
-                    },
-                    {
-                        "role": "user",
-                        "content": repr(
-                            "Convert the following text into a normal, readable sequence, ensuring accurate interpretation and correction of encoded or escaped characters where necessary. "
-                            "The output must strictly adhere to the input text's original structure, maintaining readability and formatting. Use MathJax where applicable to correctly render mathematical expressions, ensuring the final sequence is flawless. "
-                            "Provide only the corrected sequence as output, with no additional commentary, formatting, or extraneous punctuation beyond what is specified in the input text.\n\n"
-                            f"**Input:** {title}\n\n"
-                        ),
-                    },
-                ],
-            )
-            return completion.choices[0].message.content
-    except Exception as e:
-        print(str(e))
-        return False
-def fix_citation(citation):
-    proxy = get_proxy()
-    while not get_proxy():
-        proxy = get_proxy()
-    try:
-        try:
-            client = Client(
-                provider=RetryProvider(
-                    providers=[Blackbox, Airforce, AmigoChat],
-                    max_retries=4,
-                ),
-            )
-            completion = client.chat.completions.create(
-                proxy=proxy,
-                model="llama-3.1-405b",
-                messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a highly advanced language model with strict adherence to precision and formatting. Your task is to process input text and correct any encoding errors or formatting issues, rendering it into a readable citation in APA latest edition format. \n\n"
-                            "Requirements:\n"
-                            "Accurately decode and render any encoded characters (e.g., special character codes like â).\n"
-                            "Correctly format the citation in strict compliance with the APA latest edition guidelines.\n"
-                            "Produce only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
-                            "Ensure mathematical expressions, if any, are rendered using MathJax where applicable, maintaining their proper APA context.\n"
-                            "Input Example:\n"
-                            "McCann, R. J. (2025). Trading linearity for ellipticity: a nonsmooth approach to Einsteinâs theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
-                            "Expected Output Example:\n"
-                            "McCann, R. J. (2025). Trading linearity for ellipticity: A nonsmooth approach to Einstein’s theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
-                        ),
-                    },
-                    {
-                        "role": "user",
-                        "content": repr(
-                            "Convert the following text into a properly formatted citation in strict compliance with APA latest edition guidelines. Correct any encoding errors (e.g., â) and ensure the output is clean, readable, and adheres to APA rules. Render mathematical expressions using MathJax where applicable, preserving proper context.\n"
-                            "Provide only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
-                            f"**Input:** {citation}\n\n"
-                        ),
-                    },
-                ],
-            )
-            return completion.choices[0].message.content
-        except Exception as e:
-            print(str(e))
-            client = Client()
-            completion = client.chat.completions.create(
-                proxy=proxy,
-                provider=RetryProvider(
-                    providers=[Blackbox, ChatGptEs, ChatGpt, Copilot, DDG, Liaobots, Mhystical, PollinationsAI],
-                    max_retries=8,
-                    single_provider_retry=True,
-                ),
-                model="gpt-4",
-                messages=[
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a highly advanced language model with strict adherence to precision and formatting. Your task is to process input text and correct any encoding errors or formatting issues, rendering it into a readable citation in APA latest edition format. \n\n"
-                            "Requirements:\n"
-                            "Accurately decode and render any encoded characters (e.g., special character codes like â).\n"
-                            "Correctly format the citation in strict compliance with the APA latest edition guidelines.\n"
-                            "Produce only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
-                            "Ensure mathematical expressions, if any, are rendered using MathJax where applicable, maintaining their proper APA context.\n"
-                            "Input Example:\n"
-                            "McCann, R. J. (2025). Trading linearity for ellipticity: a nonsmooth approach to Einsteinâs theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
-                            "Expected Output Example:\n"
-                            "McCann, R. J. (2025). Trading linearity for ellipticity: A nonsmooth approach to Einstein’s theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
-                        ),
-                    },
-                    {
-                        "role": "user",
-                        "content": repr(
-                            "Convert the following text into a properly formatted citation in strict compliance with APA latest edition guidelines. Correct any encoding errors (e.g., â) and ensure the output is clean, readable, and adheres to APA rules. Render mathematical expressions using MathJax where applicable, preserving proper context.\n"
-                            "Provide only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
-                            f"**Input:** {citation}\n\n"
-                        ),
-                    },
-                ],
-            )
-            return completion.choices[0].message.content
-    except Exception as e:
-        print(str(e))
-        return False
-def generate_nlp_summary_and_mindmap(temp_summary, title, citation):
     response = {}
-    def local_fix_title():
-        fixed_title = fix_title(title)
-        if not fixed_title:
-            response["title"] = title
-        else:
-            response["title"] = fixed_title.strip()
-    def local_fix_citation():
-        fixed_citation = fix_citation(citation)
-        if not fixed_citation:
-            response["citation"] = citation
-        else:
-            response["citation"] = fixed_citation.strip()
     def local_generate_nlp_summary():
         nlp_summary = generate_nlp_summary(temp_summary)
         if not nlp_summary:
@@ -292,8 +108,6 @@ def generate_nlp_summary_and_mindmap(temp_summary, title, citation):
     threads = []
     threads.append(threading.Thread(target=local_generate_nlp_summary))
     threads.append(threading.Thread(target=local_generate_nlp_mindmap))
-    threads.append(threading.Thread(target=local_fix_title))
-    threads.append(threading.Thread(target=local_fix_citation))
     for thread in threads:
         thread.start()
     for thread in threads:

         print(str(e))
         return False
+def generate_nlp_summary_and_mindmap(temp_summary):
     response = {}
     def local_generate_nlp_summary():
         nlp_summary = generate_nlp_summary(temp_summary)
         if not nlp_summary:
     threads = []
     threads.append(threading.Thread(target=local_generate_nlp_summary))
     threads.append(threading.Thread(target=local_generate_nlp_mindmap))
     for thread in threads:
         thread.start()
     for thread in threads: