File size: 27,369 Bytes
ca1d599 0971eef ca1d599 0971eef ca1d599 0971eef ca1d599 0971eef 45398ec 5c5c038 45398ec 0971eef ca1d599 0971eef ca1d599 0971eef ca1d599 0971eef ca1d599 0971eef ca1d599 c10c1ab ca1d599 c10c1ab ca1d599 fe61ed7 0971eef 821874d ca1d599 821874d 58321e8 ca1d599 821874d ca1d599 58321e8 ca1d599 821874d ca1d599 58321e8 ca1d599 58321e8 ca1d599 fb843c4 ca1d599 fb843c4 ca1d599 0971eef ca1d599 43cba25 ca1d599 fb843c4 ca1d599 0971eef ca1d599 6f6fe5f ca1d599 6f6fe5f ca1d599 0971eef ca1d599 7cf7b47 681b263 7cf7b47 681b263 7cf7b47 ca1d599 7cf7b47 ca1d599 7cf7b47 ca1d599 821874d ca1d599 821874d ca1d599 fb843c4 ca1d599 821874d ca1d599 821874d 7cbc3c2 261ab12 7cbc3c2 821874d 7cbc3c2 821874d ca1d599 821874d ca1d599 821874d ca1d599 0971eef ca1d599 0971eef ca1d599 fb843c4 ca1d599 0971eef ca1d599 0971eef ca1d599 c10c1ab ca1d599 553c856 ca1d599 553c856 ca1d599 c10c1ab ca1d599 8ce26e2 ca1d599 c10c1ab ca1d599 c10c1ab ca1d599 fb843c4 c10c1ab fb843c4 c10c1ab ca1d599 9d9cd7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 |
import numpy as np
import os
import re
from io import BytesIO
import datetime
import time
import openai, tenacity
import argparse
import configparser
import json
import fitz
import PyPDF2
import gradio
import sys
from mistralai import Mistral, DocumentURLChunk, ImageURLChunk, TextChunk, OCRResponse
from pathlib import Path
utils_dir = Path(__file__).parent / 'utils'
sys.path.append(str(utils_dir))
from openai_utils import *
import base64
from pdf2image import convert_from_bytes
import requests
import bibtexparser
from pybtex.database import parse_string
from pybtex.plugin import find_plugin
PRIVATE_API_KEY = os.getenv('PRIVATE_API_KEY')
PRIVATE_API_BASE = os.getenv('PRIVATE_API_BASE')
MISTRAL_API = os.getenv('MISTRAL_API')
def insert_sentence(text, sentence, interval):
lines = text.split('\n')
new_lines = []
for line in lines:
words = line.split()
separator = ' '
new_words = []
count = 0
for word in words:
new_words.append(word)
count += 1
if count % interval == 0:
new_words.append(sentence)
new_lines.append(separator.join(new_words))
return '\n'.join(new_lines)
def format_bibtex(paper, style='apa'):
bibtex_entry = paper["citationStyles"]["bibtex"]
try:
bib_data = parse_string(bibtex_entry, 'bibtex')
formatter = find_plugin('pybtex.style.formatting', style)()
entries = list(bib_data.entries.values())
formatted = formatter.format_entries(entries)
return '\n'.join(e.text.render_as('text') for e in formatted)
except:
# Fallback: ▸ return raw BibTeX ▸ or convert to a safe @misc record
return bibtex_entry.strip()
def search_paper(query):
SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/graph/v1/paper/"
url = f"{SEMANTIC_SCHOLAR_API_URL}search?query={query}&limit=3&fields=url,title,abstract&fieldsOfStudy=Computer Science"
response = requests.get(url)
while response.status_code != 200:
time.sleep(1)
# print(response)
response = requests.get(url)
return response.json()
def get_combined_markdown(pdf_response: OCRResponse) -> str:
markdowns: list[str] = []
for page in pdf_response.pages:
markdowns.append(page.markdown)
return "\n\n".join(markdowns)
def split_text_into_chunks(pdf_response: OCRResponse) -> str:
# words = text.split()
# chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
# return chunks
markdowns: list[str] = []
for page in pdf_response.pages:
markdowns.append(page.markdown)
return markdowns
def download_pdf(paper):
pdf_url = paper["openAccessPdf"]["url"]
try:
response = requests.get(pdf_url)
response.raise_for_status()
file_object = BytesIO(response.content)
chunks = extract_chapter(file_object)
return chunks
except:
return []
def recommendation(s2_id, limit=500):
SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/recommendations/v1/papers/forpaper/"
url = f"{SEMANTIC_SCHOLAR_API_URL}{s2_id}?limit={limit}&fields=url,title,abstract,publicationDate,isOpenAccess,openAccessPdf,citationStyles"
# print(url)
response = requests.get(url)
while response.status_code != 200:
time.sleep(1)
# print(response)
response = requests.get(url)
return response.json()
def extract_chapter(file_object):
client = Mistral(api_key=MISTRAL_API)
uploaded_file = client.files.upload(
file={
"file_name": "retrieve.pdf",
"content": file_object.read(),
},
purpose="ocr",
)
signed_url = client.files.get_signed_url(file_id=uploaded_file.id, expiry=1)
pdf_response = client.ocr.process(document=DocumentURLChunk(document_url=signed_url.url), model="mistral-ocr-latest", include_image_base64=True)
# response_dict = json.loads(pdf_response.json())
chunks = split_text_into_chunks(pdf_response)
return chunks
class Reviewer:
def __init__(self, api, api_base, paper_pdf, aspect, model_name, limit_num, enable_rag):
self.api = api
self.api_base = api_base
self.aspect = aspect
self.paper_pdf = paper_pdf
self.model_name = model_name
self.limit_num = int(limit_num)
self.enable_rag = enable_rag
# self.max_token_num = 50000
# self.encoding = tiktoken.get_encoding("gpt2")
def review_by_chatgpt(self, paper_list):
text, title, abstract = self.extract_from_paper(self.paper_pdf)
content = f"Paper to review: \nTitle: {title}\n" + text
if self.enable_rag:
papers = self.retrieve_papers(title, abstract)
if papers != None:
retrieval_content = ""
retrieved_papers = ""
cnt = 1
for paper in papers:
retrieval_content += f"Relevant Paper {str(cnt)}:\n"
retrieval_content += f"Author and Title: {format_bibtex(paper, 'unsrt')}\n{paper['content']}\n\n"
formatted_citation = format_bibtex(paper, 'unsrt')
retrieved_papers += f"{str(cnt)}. {formatted_citation}\n({paper['url']})\n\n"
cnt += 1
text = retrieval_content + content
chat_review_limitations = self.chat_review(text=text)
chat_review_text = self.chat_refine(text=text, limitations=chat_review_limitations)
else:
text = content
chat_review_limitations = self.chat_review(text=text)
retrieved_papers = ""
chat_review_text = self.chat_refine(text=text, limitations=chat_review_limitations)
else:
text = content
chat_review_limitations = self.chat_review(text=text)
retrieved_papers = ""
chat_review_text = self.chat_refine(text=text, limitations=chat_review_limitations)
# text = f"Paper:\n{paper['content']}\n\n"
# chat_review_text = self.chat_refine(text=text, limitations=chat_review_limitations)
return chat_review_text, retrieved_papers
def query_gen(self, abstract):
os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
client = AsyncOpenAI()
messages=[
{"role": "system", "content": f"Generate a TLDR in 5 words of the following text. Do not use any proposed model names or dataset names from the text. Output only the 5 words without punctuation."} ,
{"role": "user", "content": abstract},
]
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=[messages],
engine_name="gpt-4.1-mini", # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
# response_format={"type":"json_object"},
)
)
return responses[0]
def rerank(self, paper_list, title, abstract):
os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
client = AsyncOpenAI()
rec_content = ""
rec_paper_cnt = 1
for rec_paper in paper_list:
rec_content += f"Paper {rec_paper_cnt}: {rec_paper['title']}\n{rec_paper['abstract']}\n\n"
rec_paper_cnt += 1
rec_content += f"Reference Paper: {title}\n"
rec_content += f"Abstract: {abstract}\n"
messages=[
{"role": "system", "content": f"Given the abstracts of {rec_paper_cnt-1} papers and the abstract of a reference paper, rank the papers in order of relevance to the reference paper. Output the top 5 as a list of integers in JSON format: {{'ranking': [1, 10, 4, 2, 8]}}."} ,
{"role": "user", "content": rec_content},
]
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=[messages],
engine_name="gpt-4.1-mini", # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
response_format={"type":"json_object"},
)
)
response_data = json.loads(responses[0])
rec_papers = []
for rec_num in response_data["ranking"][:5]:
num = int(rec_num)
rec_papers.append(paper_list[num-1])
return rec_papers
def extract_related_content(self, papers, aspect):
os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
messages = []
chunk_index_map = []
paper_data_list = []
paper_chunk_list = []
for paper_idx, paper in enumerate(papers):
paper_chunks = download_pdf(paper)
paper_chunk_list.append(paper_chunks)
SYSTEM_INPUT = f"Read the following section from a scientific paper. If the section is related to the paper's {aspect}, output 'yes'; otherwise, output 'no'."
for chunk_idx, paper_chunk in enumerate(paper_chunks):
message = [
{"role": "system", "content": SYSTEM_INPUT},
{"role": "user", "content": paper_chunk},
]
messages.append(message)
chunk_index_map.append((paper_idx, chunk_idx)) # 标记每个 chunk 归属哪个 paper
client = AsyncOpenAI()
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=messages,
engine_name="gpt-4.1-mini",
max_tokens=1000,
requests_per_minute=100,
)
)
paper_data_list = [{"title": paper["title"], "content": "", "citationStyles": paper["citationStyles"], "url": paper["url"]} for paper in papers]
for (paper_idx, chunk_idx), response in zip(chunk_index_map, responses):
if response.strip().lower().startswith("yes"):
paper_data_list[paper_idx]["content"] += paper_chunk_list[paper_idx][chunk_idx] + "\n"
for idx, paper_data in enumerate(paper_data_list):
if not paper_data["content"].strip():
paper_data["content"] = papers[idx]["abstract"]
if aspect == "Methodology":
SYSTEM_INPUT = """Concatenate all the content from the methodology sections of a paper.
Remove sentences that are irrelevant to the proposed methodology or models, and keep details about key components and innovations.
Organize the result in JSON format as follows:
{
"revised_text": str, not dict, not a summary
}
"""
elif aspect == "Result Analysis":
SYSTEM_INPUT = """Concatenate all the content from the result analysis sections of a paper.
Remove sentences that are irrelevant to the result analysis of the experiments, and keep details about the metrics, case study and how the paper presents the results.
Organize the result in JSON format as follows:
{
"revised_text": str, not dict, not a summary
}
"""
elif aspect == "Experimental Design":
SYSTEM_INPUT = """Concatenate all the content from the experimental design sections of a paper.
Remove sentences that are irrelevant to the experiment setup, and keep details about the datasets, baselines, and main experimental, ablation studies.
Organize the result in JSON format as follows:
{
"revised_text": str, not dict, not a summary
}
"""
elif aspect == "Literature Review":
SYSTEM_INPUT = """Concatenate all the content from the literature review sections of a paper.
Remove sentences that are irrelevant to the literature review, and keep details about the related works.
Organize the result in JSON format as follows:
{
"revised_text": str, not dict, not a summary
}
"""
messages = []
for paper_data in paper_data_list:
message=[
{"role": "system", "content": SYSTEM_INPUT} ,
{"role": "user", "content": paper_data["content"]},
]
messages.append(message)
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=messages,
engine_name="gpt-4o-mini", # gpt-3.5-turbo
max_tokens=5000, # 32
requests_per_minute = 20,
response_format={"type":"json_object"},
)
)
results = []
for paper_data, response in zip(paper_data_list, responses):
# print(response)
response = json.loads(response)
results.append({"title": paper_data["title"], "content": response["revised_text"], "citationStyles": paper_data["citationStyles"], "url": paper_data["url"]})
return results
def chat_review(self, text):
os.environ["OPENAI_BASE_URL"] = self.api_base
os.environ["OPENAI_API_KEY"] = self.api
client = AsyncOpenAI()
if self.aspect == "Methodology":
hint = "focusing on the fundamental approaches and techniques employed in the research. These include issues such as inappropriate choice of methods, unstated assumptions that may not hold, and problems with data quality or preprocessing that could introduce bias."
elif self.aspect == "Experimental Design":
hint = "focusing on weaknesses in how the research validates its claims. These include issues such as insufficient baseline comparisons, limited datasets that may not represent the full problem space, and lack of ablation studies to isolate the contribution of different components."
elif self.aspect == "Result Analysis":
hint = "focusing on how findings are evaluated and interpreted. This includes using inadequate evaluation metrics that may not capture important aspects of performance, insufficient error analysis, and lack of statistical significance testing."
elif self.aspect == "Literature Review":
hint = "focusing on how the research connects to and builds upon existing work. This includes missing citations of relevant prior work, mischaracterization of existing methods, and failure to properly contextualize contributions within the broader research landscape."
if self.enable_rag:
messages=[
{"role": "system", "content": f"Read the following content from several papers to gain knowledge in the relevant field. Using this knowledge, review a new scientific paper in this field. Based on existing research, identify the limitations of the 'Paper to Review'. Generate {str(self.limit_num)} major limitations related to its {self.aspect} in this paper, {hint} Do not include any limitation explicitly mentioned in the paper itself. Return only the limitations in the following JSON format: {{\"limitations\": <a list of limitations>"} ,
{"role": "user", "content": text},
]
else:
messages=[
{"role": "system", "content": f"Read the following scientific paper and generate {str(self.limit_num)} major limitations in this paper about its {self.aspect}, {hint} Do not include any limitation explicitly mentioned in the paper itself. Return only the limitations in the following JSON format: {{\"limitations\": <a list of limitations>"} ,
{"role": "user", "content": text},
]
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=[messages],
engine_name=self.model_name, # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
# response_format={"type":"json_object"},
)
)
try:
limitations = json.loads(responses[0])["limitations"][:self.limit_num]
result = ""
limit_cnt = 1
for limitation in limitations:
result += f"{str(limit_cnt)}. {limitation}\n"
limit_cnt += 1
except:
SYSTEM_INPUT = f"Below is an output from an LLM about several limitations of a scientific paper. Please extract the list of limitations and DO NOT make any modification to the original limitations. Return the limitations in the following JSON format: {{\"limitations\": <a list of limitations>}}. If there is no valid response inthe output, return {{\"limitations\": {{}}}}"
messages=[
{"role": "system", "content": SYSTEM_INPUT},
{"role": "user", "content": responses[0]},
]
os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
client = AsyncOpenAI()
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=[messages],
engine_name="gpt-4.1-mini", # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
response_format={"type":"json_object"},
)
)
limitations = json.loads(responses[0])["limitations"][:self.limit_num]
return limitations
def chat_refine(self, text, limitations):
os.environ["OPENAI_BASE_URL"] = self.api_base
os.environ["OPENAI_API_KEY"] = self.api
client = AsyncOpenAI()
messages = []
if self.enable_rag:
SYSTEM_INPUT = "Read the following scientific paper, its limitation, and several relevant papers to gain knowledge of the relevant field. Using insights from the relevant papers, provide a highly specific and actionable suggestion to address the limitation in the paper to review. You need to cite the related paper when giving advice. If suggesting an additional dataset, specify the exact dataset(s) by name. If proposing a methodological change, describe the specific modification. Keep the response within 50 words."
else:
SYSTEM_INPUT = "Read the following scientific paper and its limitation, and provide a highly specific and actionable suggestion to address the limitation. If suggesting an additional dataset, specify the exact dataset(s) by name. If proposing a methodological change, describe the specific modification. Keep the response within 50 words."
for limitation in limitations:
message=[
{"role": "system", "content": SYSTEM_INPUT},
{"role": "user", "content": f"{text}\nLimitation: {limitation}"},
]
messages.append(message)
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=messages,
engine_name=self.model_name, # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
# response_format={"type":"json_object"},
)
)
result = ""
limit_cnt = 1
for limitation, response in zip(limitations, responses):
result += f"{str(limit_cnt)}. {limitation} {response}\n\n"
limit_cnt += 1
print("********"*10)
print(result)
print("********"*10)
return result
def retrieve_papers(self, title, abstract):
query = title
search_results = search_paper(query)
if search_results != [] and search_results["data"][0]["title"].lower() == title.lower():
search_result = search_results["data"][0]
retrieval = recommendation(search_result["paperId"])
recommended_paper_list = []
for recommended_paper in retrieval["recommendedPapers"]:
if recommended_paper["abstract"] is None:
continue
if recommended_paper["isOpenAccess"] and recommended_paper["openAccessPdf"]!= None:
recommended_paper_list.append(recommended_paper)
if len(recommended_paper_list) >= 20:
break
else:
query = self.query_gen(abstract)
search_results = search_paper(query)
recommended_paper_list = []
if search_results["data"] == []:
return None
for search_result in search_results["data"]:
retrieval = recommendation(search_result["paperId"])
recommended_papers = []
for recommended_paper in retrieval["recommendedPapers"]:
if recommended_paper["abstract"] is None:
continue
if recommended_paper["isOpenAccess"] and recommended_paper["openAccessPdf"]!= None:
recommended_papers.append(recommended_paper)
if len(recommended_papers) >= 5:
break
recommended_paper_list.extend(recommended_papers)
if recommended_paper_list == []:
return None
final_papers = self.rerank(recommended_paper_list, title, abstract)
retrieved_papers = self.extract_related_content(final_papers, self.aspect)
return retrieved_papers
def extract_from_paper(self, pdf_path):
os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
client = AsyncOpenAI()
# with open(pdf_path, 'rb') as f: # TODO
# pdf_bytes = f.read()
# file_object = BytesIO(pdf_bytes)
file_object = BytesIO(pdf_path) # TODO
pdf_reader = PyPDF2.PdfReader(file_object)
doc = fitz.open(stream=pdf_path, filetype="pdf") # TODO path/bytes
page = doc.load_page(0)
pix = page.get_pixmap()
image_bytes = pix.tobytes("png")
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
USER_INPUT = [{"type": "text", "text": "The first page of the paper: "}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}]
messages=[
{"role": "system", "content": "Given the first-page image of a scientific paper in PDF format, extract and return the title and abstract in the following JSON format: {\"title\": \"<extracted title>\", \"abstract\": \"<extracted abstract>\"}."} ,
{"role": "user", "content": USER_INPUT},
]
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=[messages],
engine_name="gpt-4.1-mini", # gpt-3.5-turbo
max_tokens=1000, # 32
requests_per_minute = 20,
response_format={"type":"json_object"},
)
)
response = json.loads(responses[0])
title = response["title"]
abstract = response["abstract"]
client = Mistral(api_key=MISTRAL_API)
file_object.seek(0)
uploaded_file = client.files.upload(
file={
"file_name": "upload.pdf",
"content": file_object.read(),
},
purpose="ocr",
)
signed_url = client.files.get_signed_url(file_id=uploaded_file.id, expiry=1)
pdf_response = client.ocr.process(document=DocumentURLChunk(document_url=signed_url.url), model="mistral-ocr-latest", include_image_base64=True)
# response_dict = json.loads(pdf_response.json())
extracted_text = get_combined_markdown(pdf_response)
return extracted_text, title, abstract
def main(api,api_base, paper_pdf, aspect, model_name, limit_num, enable_rag):
start_time = time.time()
# print("key: ", PRIVATE_API_KEY, "\nbase: ", PRIVATE_API_BASE)
comments = ''
output2 = ''
retrieved_content = ''
if not api or not paper_pdf or not api_base:
comments = "It looks like there's a missing API key/base URL or PDF input. Make sure you've provided the necessary information or uploaded the required file."
output2 = "It looks like there's a missing API key or PDF input. Make sure you've provided the necessary information or uploaded the required file."
if not limit_num.isdigit() or int(limit_num) <= 0:
comments = "The input number is not a positive integer."
output2 = "The input number is not a positive integer."
else:
try:
reviewer1 = Reviewer(api,api_base, paper_pdf, aspect, model_name, limit_num, enable_rag)
comments, retrieved_content = reviewer1.review_by_chatgpt(paper_list=paper_pdf)
time_used = time.time() - start_time
output2 ="Processing Time:"+ str(round(time_used, 2)) +"seconds"
except Exception as e:
comments = "Error: "+ str(e)
output2 = "Error: "+ str(e)
return retrieved_content, comments, output2
########################################################################################################
title = "Acceleron - Critique - Limitation Generation with Actionable Feedback"
description = '''<div align='left'>
<strong>We present a demo for our paper: Can LLMs Identify Critical Limitations within Scientific Research? A Systematic Evaluation on AI Research Papers. Upload the PDF of the paper you want to review, and the demo will automatically generate its identified limitations.
</div>
'''
inp = [gradio.Textbox(label="Enter your API-key",
value="",
type='password'),
gradio.Textbox(label="Enter the base URL (ending with /v1). Skip this step if using the original OpenAI API.",
value="https://api.openai.com/v1"),
gradio.File(label="Upload the PDF file of your paper (Make sure the PDF is fully uploaded before clicking Submit)",type="binary"),
gradio.Radio(choices=["Methodology", "Experimental Design", "Result Analysis", "Literature Review"],
value="Methodology",
label="Select the aspect"),
gradio.Dropdown(["gpt-4.1-mini","gpt-4.1"],
label="Select the model name",
value="gpt-4.1"),
gradio.Textbox(label="Enter the number of limitations to generate.",
value="3"),
gradio.Checkbox(label="Enable RAG", value=False),
]
chat_reviewer_gui = gradio.Interface(fn=main,
inputs=inp,
outputs = [gradio.Textbox(lines=6, label="Retrieved Literature"), gradio.Textbox(lines=15, label="Output"), gradio.Textbox(lines=2, label="Resource Statistics")],
title=title,
description=description)
# Start server
chat_reviewer_gui .launch(quiet=True, show_api=False) |