zjXu11 commited on
Commit
9d9cd7e
·
verified ·
1 Parent(s): c1f6997

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +12 -14
  2. app.py +545 -0
  3. requirements.txt +11 -0
README.md CHANGED
@@ -1,14 +1,12 @@
1
- ---
2
- title: LimitGen
3
- emoji: 🏢
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.20.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Demo for LimitGen
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: LimitGen Demo
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.6.0
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: demo
11
+ ---
12
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
 
app.py ADDED
@@ -0,0 +1,545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import re
4
+ from io import BytesIO
5
+ import datetime
6
+ import time
7
+ import openai, tenacity
8
+ import argparse
9
+ import configparser
10
+ import json
11
+ import fitz
12
+ import PyPDF2
13
+ import gradio
14
+ import sys
15
+ from pathlib import Path
16
+ utils_dir = Path(__file__).parent / 'utils'
17
+ sys.path.append(str(utils_dir))
18
+ from openai_utils import *
19
+ import base64
20
+ from pdf2image import convert_from_bytes
21
+ import requests
22
+ PRIVATE_API_KEY = os.getenv('PRIVATE_API_KEY')
23
+ PRIVATE_API_BASE = os.getenv('PRIVATE_API_BASE')
24
+
25
+
26
+ def insert_sentence(text, sentence, interval):
27
+ lines = text.split('\n')
28
+ new_lines = []
29
+
30
+ for line in lines:
31
+ words = line.split()
32
+ separator = ' '
33
+
34
+ new_words = []
35
+ count = 0
36
+
37
+ for word in words:
38
+ new_words.append(word)
39
+ count += 1
40
+
41
+ if count % interval == 0:
42
+ new_words.append(sentence)
43
+
44
+ new_lines.append(separator.join(new_words))
45
+
46
+ return '\n'.join(new_lines)
47
+
48
+ def search_paper(query):
49
+ SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/graph/v1/paper/"
50
+ url = f"{SEMANTIC_SCHOLAR_API_URL}search?query={query}&limit=3&fields=url,title,abstract&fieldsOfStudy=Computer Science"
51
+
52
+ response = requests.get(url)
53
+ while response.status_code != 200:
54
+ time.sleep(1)
55
+ # print(response)
56
+ response = requests.get(url)
57
+
58
+ return response.json()
59
+
60
+ def split_text_into_chunks(text, chunk_size=300):
61
+ words = text.split()
62
+ chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
63
+ return chunks
64
+
65
+ def download_pdf(paper):
66
+ pdf_url = paper["openAccessPdf"]["url"]
67
+ try:
68
+ response = requests.get(pdf_url)
69
+ response.raise_for_status()
70
+
71
+
72
+ file_object = BytesIO(response.content)
73
+ extract_text = extract_chapter(file_object)
74
+ chunks = split_text_into_chunks(extract_text)
75
+ return chunks
76
+ except:
77
+ return []
78
+
79
+
80
+ def recommendation(s2_id, limit=500):
81
+ SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/recommendations/v1/papers/forpaper/"
82
+ url = f"{SEMANTIC_SCHOLAR_API_URL}{s2_id}?limit={limit}&fields=url,title,abstract,publicationDate,isOpenAccess,openAccessPdf"
83
+
84
+ # print(url)
85
+ response = requests.get(url)
86
+ while response.status_code != 200:
87
+ time.sleep(1)
88
+ # print(response)
89
+ response = requests.get(url)
90
+
91
+ return response.json()
92
+
93
+
94
+ def extract_chapter(file_object):
95
+ pdf_reader = PyPDF2.PdfReader(file_object)
96
+
97
+
98
+ num_pages = len(pdf_reader.pages)
99
+ extraction_started = False
100
+ extracted_text = ""
101
+ for page_number in range(num_pages):
102
+ page = pdf_reader.pages[page_number]
103
+ page_text = page.extract_text()
104
+ extraction_started = True
105
+ page_number_start = page_number
106
+ if extraction_started:
107
+ extracted_text += page_text
108
+ if page_number_start + 1 < page_number:
109
+ break
110
+ return extracted_text
111
+
112
+
113
+
114
+ class Reviewer:
115
+
116
+ def __init__(self, api, api_base, paper_pdf, aspect, model_name, enable_rag):
117
+ self.api = api
118
+ self.api_base = api_base
119
+ self.aspect = aspect
120
+ self.paper_pdf = paper_pdf
121
+ self.model_name = model_name
122
+ self.enable_rag = enable_rag
123
+ # self.max_token_num = 50000
124
+ # self.encoding = tiktoken.get_encoding("gpt2")
125
+
126
+
127
+ def review_by_chatgpt(self, paper_list):
128
+ text, title, abstract = self.extract_from_paper(self.paper_pdf)
129
+ content = f"Paper to review: \nTitle: {title}\n" + text
130
+
131
+ if self.enable_rag:
132
+ papers = self.retrieve_papers(title, abstract)
133
+ if papers != None:
134
+ retrieval_content = ""
135
+ retrieved_papers = ""
136
+ cnt = 1
137
+ for paper in papers:
138
+ retrieval_content += f"Relevant Paper {str(cnt)}:\n"
139
+ retrieval_content += f"Title: {paper['title']}\n{paper['content']}\n\n"
140
+ retrieved_papers += f"{str(cnt)}. {paper['title']}\n"
141
+ cnt += 1
142
+ text = retrieval_content + content
143
+ chat_review_text = self.chat_review(text=text)
144
+ else:
145
+ text = content
146
+ chat_review_text = self.chat_review(text=text)
147
+ retrieved_papers = ""
148
+ else:
149
+ text = content
150
+ chat_review_text = self.chat_review(text=text)
151
+ retrieved_papers = ""
152
+
153
+ return chat_review_text, retrieved_papers
154
+
155
+ def query_gen(self, abstract):
156
+ os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
157
+ os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
158
+ client = AsyncOpenAI()
159
+
160
+ messages=[
161
+ {"role": "system", "content": f"Generate a TLDR in 5 words of the following text. Do not use any proposed model names or dataset names from the text. Output only the 5 words without punctuation."} ,
162
+ {"role": "user", "content": abstract},
163
+ ]
164
+
165
+ responses = asyncio.run(
166
+ generate_from_openai_chat_completion(
167
+ client,
168
+ messages=[messages],
169
+ engine_name="gpt-4o-mini", # gpt-3.5-turbo
170
+ max_tokens=1000, # 32
171
+ requests_per_minute = 20,
172
+ # response_format={"type":"json_object"},
173
+ )
174
+ )
175
+ return responses[0]
176
+
177
+
178
+ def rerank(self, paper_list, title, abstract):
179
+ os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
180
+ os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
181
+ client = AsyncOpenAI()
182
+
183
+ rec_content = ""
184
+ rec_paper_cnt = 1
185
+
186
+ for rec_paper in paper_list:
187
+ rec_content += f"Paper {rec_paper_cnt}: {rec_paper['title']}\n{rec_paper['abstract']}\n\n"
188
+ rec_paper_cnt += 1
189
+
190
+ rec_content += f"Reference Paper: {title}\n"
191
+ rec_content += f"Abstract: {abstract}\n"
192
+
193
+ messages=[
194
+ {"role": "system", "content": f"Given the abstracts of {rec_paper_cnt-1} papers and the abstract of a reference paper, rank the papers in order of relevance to the reference paper. Output the top 5 as a list of integers in JSON format: {{'ranking': [1, 10, 4, 2, 8]}}."} ,
195
+ {"role": "user", "content": rec_content},
196
+ ]
197
+
198
+ responses = asyncio.run(
199
+ generate_from_openai_chat_completion(
200
+ client,
201
+ messages=[messages],
202
+ engine_name="gpt-4o-mini", # gpt-3.5-turbo
203
+ max_tokens=1000, # 32
204
+ requests_per_minute = 20,
205
+ response_format={"type":"json_object"},
206
+ )
207
+ )
208
+ response_data = json.loads(responses[0])
209
+ rec_papers = []
210
+ for rec_num in response_data["ranking"][:5]:
211
+ num = int(rec_num)
212
+ rec_papers.append(paper_list[num-1])
213
+
214
+ return rec_papers
215
+
216
+ def extract_related_content(self, papers, aspect):
217
+ os.environ["OPENAI_BASE_URL"] = self.api_base
218
+ os.environ["OPENAI_API_KEY"] = self.api
219
+ client = AsyncOpenAI()
220
+
221
+ messages = []
222
+ chunk_index_map = []
223
+ paper_data_list = []
224
+ paper_chunk_list = []
225
+ for paper_idx, paper in enumerate(papers):
226
+ paper_chunks = download_pdf(paper)
227
+ paper_chunk_list.append(paper_chunks)
228
+
229
+ SYSTEM_INPUT = f"Read the following section from a scientific paper. If the section is related to the paper's {aspect}, output 'yes'; otherwise, output 'no'."
230
+
231
+ for chunk_idx, paper_chunk in enumerate(paper_chunks):
232
+ message = [
233
+ {"role": "system", "content": SYSTEM_INPUT},
234
+ {"role": "user", "content": paper_chunk},
235
+ ]
236
+ messages.append(message)
237
+ chunk_index_map.append((paper_idx, chunk_idx)) # 标记每个 chunk 归属哪个 paper
238
+
239
+
240
+ responses = asyncio.run(
241
+ generate_from_openai_chat_completion(
242
+ client,
243
+ messages=messages,
244
+ engine_name="gpt-4o-mini",
245
+ max_tokens=1000,
246
+ requests_per_minute=100,
247
+ )
248
+ )
249
+
250
+ paper_data_list = [{"title": paper["title"], "content": ""} for paper in papers]
251
+
252
+ for (paper_idx, chunk_idx), response in zip(chunk_index_map, responses):
253
+ if response.strip().lower().startswith("yes"):
254
+ paper_data_list[paper_idx]["content"] += paper_chunk_list[paper_idx][chunk_idx] + "\n"
255
+
256
+ for idx, paper_data in enumerate(paper_data_list):
257
+ if not paper_data["content"].strip():
258
+ paper_data["content"] = papers[idx]["abstract"]
259
+
260
+
261
+ if aspect == "Methodology":
262
+ SYSTEM_INPUT = """Concatenate all the content from the methodology sections of a paper.
263
+ Remove sentences that are irrelevant to the proposed methodology or models, and keep details about key components and innovations.
264
+ Organize the result in JSON format as follows:
265
+ {
266
+ "revised_text": str, not dict, not a summary
267
+ }
268
+ """
269
+ elif aspect == "Result Analysis":
270
+ SYSTEM_INPUT = """Concatenate all the content from the result analysis sections of a paper.
271
+ Remove sentences that are irrelevant to the result analysis of the experiments, and keep details about the metrics, case study and how the paper presents the results.
272
+ Organize the result in JSON format as follows:
273
+ {
274
+ "revised_text": str, not dict, not a summary
275
+ }
276
+ """
277
+ elif aspect == "Experimental Design":
278
+ SYSTEM_INPUT = """Concatenate all the content from the experimental design sections of a paper.
279
+ Remove sentences that are irrelevant to the experiment setup, and keep details about the datasets, baselines, and main experimental, ablation studies.
280
+ Organize the result in JSON format as follows:
281
+ {
282
+ "revised_text": str, not dict, not a summary
283
+ }
284
+ """
285
+ elif aspect == "Literature Review":
286
+ SYSTEM_INPUT = """Concatenate all the content from the literature review sections of a paper.
287
+ Remove sentences that are irrelevant to the literature review, and keep details about the related works.
288
+ Organize the result in JSON format as follows:
289
+ {
290
+ "revised_text": str, not dict, not a summary
291
+ }
292
+ """
293
+ messages = []
294
+ for paper_data in paper_data_list:
295
+ message=[
296
+ {"role": "system", "content": SYSTEM_INPUT} ,
297
+ {"role": "user", "content": paper_data["content"]},
298
+ ]
299
+ messages.append(message)
300
+
301
+ responses = asyncio.run(
302
+ generate_from_openai_chat_completion(
303
+ client,
304
+ messages=messages,
305
+ engine_name="gpt-4o-mini", # gpt-3.5-turbo
306
+ max_tokens=1000, # 32
307
+ requests_per_minute = 20,
308
+ response_format={"type":"json_object"},
309
+ )
310
+ )
311
+
312
+ results = []
313
+ for paper_data, response in zip(paper_data_list, responses):
314
+ response = json.loads(response)
315
+ results.append({"title": paper_data["title"], "content": response["revised_text"]})
316
+ return results
317
+
318
+
319
+
320
+ def chat_review(self, text):
321
+ os.environ["OPENAI_BASE_URL"] = self.api_base
322
+ os.environ["OPENAI_API_KEY"] = self.api
323
+ client = AsyncOpenAI()
324
+
325
+ if self.enable_rag:
326
+ messages=[
327
+ {"role": "system", "content": f"Read the following content from several papers to gain knowledge in the relevant field. Using this knowledge, review a new scientific paper in this field. Based on existing research, identify the limitations of the 'Paper to Review'. Generate the major limitations related to its {self.aspect} in this paper. Do not include any limitation explicitly mentioned in the paper itself and return only the list of limitations. Return only the limitations in the following JSON format: {{\"limitations\": <a list of limitations>"} ,
328
+ {"role": "user", "content": text},
329
+ ]
330
+ else:
331
+ messages=[
332
+ {"role": "system", "content": f"Read the following scientific paper and generate major limitations in this paper about its {self.aspect}. Do not include any limitation explicitly mentioned in the paper itself and return only the limitations. Return only the limitations in the following JSON format: {{\"limitations\": <a list of limitations>"} ,
333
+ {"role": "user", "content": text},
334
+ ]
335
+ try:
336
+ responses = asyncio.run(
337
+ generate_from_openai_chat_completion(
338
+ client,
339
+ messages=[messages],
340
+ engine_name=self.model_name, # gpt-3.5-turbo
341
+ max_tokens=1000, # 32
342
+ requests_per_minute = 20,
343
+ # response_format={"type":"json_object"},
344
+ )
345
+ )
346
+ try:
347
+ limitations = json.loads(responses[0])["limitations"]
348
+ result = ""
349
+ limit_cnt = 1
350
+ for limitation in limitations:
351
+ result += f"{str(limit_cnt)}. {limitation}\n"
352
+ limit_cnt += 1
353
+ except:
354
+ SYSTEM_INPUT = f"Below is an output from an LLM about several limitations of a scientific paper. Please extract the list of limitations and DO NOT make any modification to the original limitations. Return the limitations in the following JSON format: {{\"limitations\": <a list of limitations>}}. If there is no valid response inthe output, return {{\"limitations\": {{}}}}"
355
+ messages=[
356
+ {"role": "system", "content": SYSTEM_INPUT},
357
+ {"role": "user", "content": responses[0]},
358
+ ]
359
+ responses = asyncio.run(
360
+ generate_from_openai_chat_completion(
361
+ client,
362
+ messages=[messages],
363
+ engine_name="gpt-4o-mini", # gpt-3.5-turbo
364
+ max_tokens=1000, # 32
365
+ requests_per_minute = 20,
366
+ response_format={"type":"json_object"},
367
+ )
368
+ )
369
+ limitations = json.loads(responses[0])["limitations"]
370
+ result = ""
371
+ limit_cnt = 1
372
+ for limitation in limitations:
373
+ result += f"{str(limit_cnt)}. {limitation}\n"
374
+ limit_cnt += 1
375
+ # for choice in response.choices:
376
+ # result += choice.message.content
377
+ # result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50)
378
+ except Exception as e:
379
+ result = "Error: "+ str(e)
380
+ # usage = 'xxxxx'
381
+ print("********"*10)
382
+ print(result)
383
+ print("********"*10)
384
+ return result
385
+
386
+
387
+ def retrieve_papers(self, title, abstract):
388
+ query = title
389
+ search_results = search_paper(query)
390
+ if search_results != [] and search_results["data"][0]["title"].lower() == title.lower():
391
+ search_result = search_results[0]
392
+ retrieval = recommendation(search_result["paperId"])
393
+ recommended_paper_list = []
394
+ for recommended_paper in retrieval["recommendedPapers"]:
395
+ if recommended_paper["abstract"] is None:
396
+ continue
397
+ if recommended_paper["isOpenAccess"] and recommended_paper["openAccessPdf"]!= None:
398
+ recommended_paper_list.append(recommended_paper)
399
+
400
+ if len(recommended_paper_list) >= 20:
401
+ break
402
+
403
+ else:
404
+ query = self.query_gen(abstract)
405
+ search_results = search_paper(query)
406
+ recommended_paper_list = []
407
+ if search_results["data"] == []:
408
+ return None
409
+ for search_result in search_results["data"]:
410
+ retrieval = recommendation(search_result["paperId"])
411
+ recommended_papers = []
412
+ for recommended_paper in retrieval["recommendedPapers"]:
413
+ if recommended_paper["abstract"] is None:
414
+ continue
415
+ if recommended_paper["isOpenAccess"] and recommended_paper["openAccessPdf"]!= None:
416
+ recommended_papers.append(recommended_paper)
417
+
418
+ if len(recommended_papers) >= 5:
419
+ break
420
+ recommended_paper_list.extend(recommended_papers)
421
+
422
+ if recommended_paper_list == []:
423
+ return None
424
+ final_papers = self.rerank(recommended_paper_list, title, abstract)
425
+ retrieved_papers = self.extract_related_content(final_papers, self.aspect)
426
+
427
+ return retrieved_papers
428
+
429
+
430
+
431
+
432
+ def extract_from_paper(self, pdf_path):
433
+ os.environ["OPENAI_BASE_URL"] = PRIVATE_API_BASE
434
+ os.environ["OPENAI_API_KEY"] = PRIVATE_API_KEY
435
+ client = AsyncOpenAI()
436
+
437
+ # with open(pdf_path, 'rb') as f: # TODO
438
+ # pdf_bytes = f.read()
439
+ # file_object = BytesIO(pdf_bytes)
440
+
441
+ file_object = BytesIO(pdf_path) # TODO
442
+ pdf_reader = PyPDF2.PdfReader(file_object)
443
+
444
+ doc = fitz.open(stream=pdf_path, filetype="pdf") # TODO
445
+ page = doc.load_page(0)
446
+ pix = page.get_pixmap()
447
+ image_bytes = pix.tobytes("png")
448
+
449
+ image_base64 = base64.b64encode(image_bytes).decode('utf-8')
450
+
451
+ USER_INPUT = [{"type": "text", "text": "The first page of the paper: "}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}]
452
+ messages=[
453
+ {"role": "system", "content": "Given the first-page image of a scientific paper in PDF format, extract and return the title and abstract in the following JSON format: {\"title\": \"<extracted title>\", \"abstract\": \"<extracted abstract>\"}."} ,
454
+ {"role": "user", "content": USER_INPUT},
455
+ ]
456
+ responses = asyncio.run(
457
+ generate_from_openai_chat_completion(
458
+ client,
459
+ messages=[messages],
460
+ engine_name="gpt-4o-mini", # gpt-3.5-turbo
461
+ max_tokens=1000, # 32
462
+ requests_per_minute = 20,
463
+ response_format={"type":"json_object"},
464
+ )
465
+ )
466
+
467
+ response = json.loads(responses[0])
468
+ title = response["title"]
469
+ abstract = response["abstract"]
470
+
471
+
472
+
473
+ num_pages = len(pdf_reader.pages)
474
+ extraction_started = False
475
+ extracted_text = ""
476
+ for page_number in range(num_pages):
477
+ page = pdf_reader.pages[page_number]
478
+ page_text = page.extract_text()
479
+
480
+ extraction_started = True
481
+ page_number_start = page_number
482
+ if extraction_started:
483
+ extracted_text += page_text
484
+ if page_number_start + 1 < page_number:
485
+ break
486
+ return extracted_text, title, abstract
487
+
488
+ def main(api,api_base, paper_pdf, aspect, model_name, enable_rag):
489
+ start_time = time.time()
490
+ # print("key: ", PRIVATE_API_KEY, "\nbase: ", PRIVATE_API_BASE)
491
+ comments = ''
492
+ output2 = ''
493
+ retrieved_content = ''
494
+ if not api or not paper_pdf:
495
+ comments = "It looks like there's a missing API key or PDF input. Make sure you've provided the necessary information or uploaded the required file."
496
+ output2 = "It looks like there's a missing API key or PDF input. Make sure you've provided the necessary information or uploaded the required file."
497
+ else:
498
+ try:
499
+ reviewer1 = Reviewer(api,api_base, paper_pdf, aspect, model_name, enable_rag)
500
+ comments, retrieved_content = reviewer1.review_by_chatgpt(paper_list=paper_pdf)
501
+ time_used = time.time() - start_time
502
+ output2 ="Processing Time:"+ str(round(time_used, 2)) +"seconds"
503
+ except Exception as e:
504
+ comments = "Error: "+ str(e)
505
+ output2 = "Error: "+ str(e)
506
+ return retrieved_content, comments, output2
507
+
508
+
509
+
510
+
511
+
512
+ ########################################################################################################
513
+
514
+ title = "LimitGen"
515
+
516
+
517
+ description = '''<div align='left'>
518
+ <strong>We present a demo for our paper: Can LLMs Identify Critical Limitations within Scientific Research? A Systematic Evaluation on AI Research Papers. Upload the PDF of the paper you want to review, and the demo will automatically generate its identified limitations.
519
+ </div>
520
+ '''
521
+
522
+ inp = [gradio.Textbox(label="Input your API-key",
523
+ value="",
524
+ type='password'),
525
+ gradio.Textbox(label="Input the base URL (ending with /v1). Skip this step if using the original OpenAI API.",
526
+ value="https://api.openai.com/v1"),
527
+
528
+ gradio.File(label="Upload the PDF file of your paper (Make sure the PDF is fully uploaded before clicking Submit)",type="binary"),
529
+ gradio.Radio(choices=["Methodology", "Experimental Design", "Result Analysis", "Literature Review"],
530
+ value="Methodology",
531
+ label="Select the aspect"),
532
+ gradio.Textbox(label="Input the model name",
533
+ value="gpt-4o-mini"),
534
+ gradio.Checkbox(label="Enable RAG", value=False)
535
+
536
+ ]
537
+
538
+ chat_reviewer_gui = gradio.Interface(fn=main,
539
+ inputs=inp,
540
+ outputs = [gradio.Textbox(lines=6, label="Retrieved Literature"), gradio.Textbox(lines=15, label="Output"), gradio.Textbox(lines=2, label="Resource Statistics")],
541
+ title=title,
542
+ description=description)
543
+
544
+ # Start server
545
+ chat_reviewer_gui .launch(quiet=True, show_api=False)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PyMuPDF==1.21.1
2
+ tenacity==8.2.2
3
+ pybase64==1.2.3
4
+ Pillow==9.4.0
5
+ openai==1.33.0
6
+ markdown
7
+ gradio==3.20.1
8
+ PyPDF2
9
+ aiolimiter
10
+ pdf2image
11
+ httpx==0.27.2