ScientryAPI / nlp_processes.py
raannakasturi's picture
Update dependencies and refactor summarization functions to include title and citation
a90f1c4
raw
history blame
10.4 kB
from g4f.client import Client
from g4f.Provider import RetryProvider, Blackbox, MetaAI, BlackboxCreateAgent
import threading
def generate_nlp_summary(temp_summary):
try:
client = Client(
provider=RetryProvider(
providers=[Blackbox, MetaAI, BlackboxCreateAgent],
shuffle=True,
single_provider_retry=True,
max_retries=3,
),
)
completion = client.chat.completions.create(
model="llama-3.1-405b",
messages=[
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted summaries from scientific research papers."},
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \" {temp_summary}\".You have three tasks, which are:\\n 1.to summarize the text I provided into a Summary .Please answer within 150-300 characters.\\n 2.to summarize the text I provided, using up to seven Highlight.\\n 3.to summarize the text I provided, using up to seven Key Insights. Each insight should include a brief in-depth analysis. Key Insight should not include timestamps.\\n Your output should use the following template strictly, provide the results for the three tasks:\\n ## Summary\\n ## Highlights\\n - Highlights\\n ## Key Insights\\n - Key Insights .\\n Importantly your output must use language \"English\"'}
],
)
return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n").replace("\n \n", "\n").replace("##", "\n##").strip()
except Exception as e:
print(str(e))
return False
def generate_nlp_mindmap(temp_summary):
try:
client = Client(
provider=RetryProvider(
providers=[Blackbox, MetaAI, BlackboxCreateAgent],
shuffle=True,
single_provider_retry=True,
max_retries=3,
),
)
completion = client.chat.completions.create(
model="llama-3.1-405b",
messages=[
{"role": "system", "content": "You are a helpful research assistant for generating well-formatted mindmaps from scientific research papers."},
{"role": "user", "content": f'As a text script expert, please help me to write a short text script with the topic \"{temp_summary}\".Your output should use the following template:\\n\\n## {{Subtitle01}}\\n- {{Bulletpoint01}}\\n- {{Bulletpoint02}}\\n## {{Subtitle02}}\\n- {{Bulletpoint03}}\\n- {{Bulletpoint04}}\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown. Do not include anything in the response, that is not the part of mindmap.\\n Most Importantly your output must use language \"English\" and each point or pointer should include no more than 9 words.'}
],
)
return completion.choices[0].message.content.replace("**", "").replace("\n\n", "\n").replace("\n \n", "\n").replace("##", "\n##").strip()
except Exception as e:
print(str(e))
return False
def fix_title(title):
try:
client = Client(
provider=RetryProvider(
providers=[Blackbox, MetaAI, BlackboxCreateAgent],
shuffle=True,
single_provider_retry=True,
max_retries=3,
),
)
completion = client.chat.completions.create(
model="llama-3.1-405b",
messages=[
{
"role": "system",
"content": (
"You are a highly advanced language model with strict adherence to precision and accuracy. \n\n"
"Your task is to process input text, identify and correct any encoded or escaped characters, and render the text into a readable format. \n\n"
"**Requirements:**\n"
"1. Correctly decode and render any encoded characters (e.g., \\x sequences or LaTeX-style expressions) into their intended readable forms.\n"
"2. Accurately interpret and render mathematical expressions using MathJax where appropriate.\n"
"3. Produce **only the corrected sequence** as output—no additional commentary, metadata, or extraneous punctuation.\n"
"4. Maintain the structure and style of the original input text, ensuring it remains true to its intended meaning and formatting.\n\n"
"**Input Example:**\n"
"From Painlev\\xe9 equations to ${\\cal N}=2$ susy gauge theories: prolegomena TDI-$\\\\infty$\n\n"
"**Output Example:**\n"
"From Painlevé equations to \\({\\cal N}=2\\) SUSY gauge theories: prolegomena TDI-\\(\\infty\\)"
),
},
{
"role": "user",
"content": repr(
"Convert the following text into a normal, readable sequence, ensuring accurate interpretation and correction of encoded or escaped characters where necessary. "
"The output must strictly adhere to the input text's original structure, maintaining readability and formatting. Use MathJax where applicable to correctly render mathematical expressions, ensuring the final sequence is flawless. "
"Provide only the corrected sequence as output, with no additional commentary, formatting, or extraneous punctuation beyond what is specified in the input text.\n\n"
f"**Input:** {title}\n\n"
),
},
],
)
return completion.choices[0].message.content
except Exception as e:
print(str(e))
return False
def fix_citation(citation):
try:
client = Client(
provider=RetryProvider(
providers=[Blackbox, MetaAI, BlackboxCreateAgent],
shuffle=True,
single_provider_retry=True,
max_retries=3,
),
)
completion = client.chat.completions.create(
model="llama-3.1-405b",
messages=[
{
"role": "system",
"content": (
"You are a highly advanced language model with strict adherence to precision and formatting. Your task is to process input text and correct any encoding errors or formatting issues, rendering it into a readable citation in APA latest edition format. \n\n"
"Requirements:\n"
"Accurately decode and render any encoded characters (e.g., special character codes like ’).\n"
"Correctly format the citation in strict compliance with the APA latest edition guidelines.\n"
"Produce only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
"Ensure mathematical expressions, if any, are rendered using MathJax where applicable, maintaining their proper APA context.\n"
"Input Example:\n"
"McCann, R. J. (2025). Trading linearity for ellipticity: a nonsmooth approach to Einstein’s theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
"Expected Output Example:\n"
"McCann, R. J. (2025). Trading linearity for ellipticity: A nonsmooth approach to Einstein’s theory of gravity and the Lorentzian splitting theorems (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00702"
),
},
{
"role": "user",
"content": repr(
"Convert the following text into a properly formatted citation in strict compliance with APA latest edition guidelines. Correct any encoding errors (e.g., ’) and ensure the output is clean, readable, and adheres to APA rules. Render mathematical expressions using MathJax where applicable, preserving proper context.\n"
"Provide only the corrected citation as output, with no additional commentary, metadata, or extraneous punctuation beyond what is specified in the text.\n"
f"**Input:** {citation}\n\n"
),
},
],
)
return completion.choices[0].message.content
except Exception as e:
print(str(e))
return False
def generate_nlp_summary_and_mindmap(temp_summary, title, citation):
response = {}
def local_fix_title():
fixed_title = fix_title(title)
if not fixed_title:
response["title"] = title
else:
response["title"] = fixed_title.strip()
def local_fix_citation():
fixed_citation = fix_citation(citation)
if not fixed_citation:
response["citation"] = citation
else:
response["citation"] = fixed_citation.strip
def local_generate_nlp_summary():
nlp_summary = generate_nlp_summary(temp_summary)
if not nlp_summary:
response["summary_status"] = "error"
response["summary"] = None
else:
response["summary_status"] = "success"
response["summary"] = nlp_summary.strip()
def local_generate_nlp_mindmap():
nlp_mindmap = generate_nlp_mindmap(temp_summary)
if not nlp_mindmap:
response["mindmap_status"] = "error"
response["mindmap"] = None
else:
response["mindmap_status"] = "success"
response["mindmap"] = nlp_mindmap.strip()
threads = []
threads.append(threading.Thread(target=local_generate_nlp_summary))
threads.append(threading.Thread(target=local_generate_nlp_mindmap))
threads.append(threading.Thread(target=local_fix_title))
threads.append(threading.Thread(target=local_fix_citation))
for thread in threads:
thread.start()
for thread in threads:
thread.join()
return response