Spaces:
Build error
Build error
from PyPDF2 import PdfReader | |
import openai | |
from .prompt import BASE_POINTS, READING_PROMT_V2 | |
from .paper import Paper | |
from .model_interface import OpenAIModel | |
# Setting the API key to use the OpenAI API | |
class PaperReader: | |
""" | |
A class for summarizing research papers using the OpenAI API. | |
Attributes: | |
openai_key (str): The API key to use the OpenAI API. | |
token_length (int): The length of text to send to the API at a time. | |
model (str): The GPT model to use for summarization. | |
points_to_focus (str): The key points to focus on while summarizing. | |
verbose (bool): A flag to enable/disable verbose logging. | |
""" | |
def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo", | |
points_to_focus=BASE_POINTS, verbose=False): | |
# Setting the API key to use the OpenAI API | |
openai.api_key = openai_key | |
# Initializing prompts for the conversation | |
self.init_prompt = READING_PROMT_V2.format(points_to_focus) | |
self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.' | |
self.messages = [] # Initializing the conversation messages | |
self.summary_msg = [] # Initializing the summary messages | |
self.token_len = token_length # Setting the token length to use | |
self.keep_round = 2 # Rounds of previous dialogues to keep in conversation | |
self.model = model # Setting the GPT model to use | |
self.verbose = verbose # Flag to enable/disable verbose logging | |
self.model = OpenAIModel(api_key=openai_key, model=model) | |
def drop_conversation(self, msg): | |
# This method is used to drop previous messages from the conversation and keep only recent ones | |
if len(msg) >= (self.keep_round + 1) * 2 + 1: | |
new_msg = [msg[0]] | |
for i in range(3, len(msg)): | |
new_msg.append(msg[i]) | |
return new_msg | |
else: | |
return msg | |
def send_msg(self, msg): | |
return self.model.send_msg(msg) | |
def _chat(self, message): | |
# This method is used to send a message and get a response from the OpenAI API | |
# Adding the user message to the conversation messages | |
self.messages.append({"role": "user", "content": message}) | |
# Sending the messages to the API and getting the response | |
response = self.send_msg(self.messages) | |
# Adding the system response to the conversation messages | |
self.messages.append({"role": "system", "content": response}) | |
# Dropping previous conversation messages to keep the conversation history short | |
self.messages = self.drop_conversation(self.messages) | |
# Returning the system response | |
return response | |
def summarize(self, paper: Paper): | |
# This method is used to summarize a given research paper | |
# Adding the initial prompt to the conversation messages | |
self.messages = [ | |
{"role": "system", "content": self.init_prompt}, | |
] | |
# Adding the summary prompt to the summary messages | |
self.summary_msg = [{"role": "system", "content": self.summary_prompt}] | |
# Reading and summarizing each part of the research paper | |
for (page_idx, part_idx, text) in paper.iter_pages(): | |
print('page: {}, part: {}'.format(page_idx, part_idx)) | |
# Sending the text to the API and getting the response | |
summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text)) | |
# Logging the summary if verbose logging is enabled | |
if self.verbose: | |
print(summary) | |
# Adding the summary of the part to the summary messages | |
self.summary_msg.append({"role": "user", "content": '{}'.format(summary)}) | |
# Adding a prompt for the user to summarize the whole paper to the summary messages | |
self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'}) | |
# Sending the summary messages to the API and getting the response | |
result = self.send_msg(self.summary_msg) | |
# Returning the summary of the whole paper | |
return result | |
def read_pdf_and_summarize(self, pdf_path): | |
# This method is used to read a research paper from a PDF file and summarize it | |
# Creating a PdfReader object to read the PDF file | |
pdf_reader = PdfReader(pdf_path) | |
paper = Paper(pdf_reader) | |
# Summarizing the full text of the research paper and returning the summary | |
print('reading pdf finished') | |
summary = self.summarize(paper) | |
return summary | |
def get_summary_of_each_part(self): | |
# This method is used to get the summary of each part of the research paper | |
return self.summary_msg | |
def question(self, question): | |
# This method is used to ask a question after summarizing a paper | |
# Adding the question to the summary messages | |
self.summary_msg.append({"role": "user", "content": question}) | |
# Sending the summary messages to the API and getting the response | |
response = self.send_msg(self.summary_msg) | |
# Adding the system response to the summary messages | |
self.summary_msg.append({"role": "system", "content": response}) | |
# Returning the system response | |
return response | |