Spaces:
Running
Running
Commit
·
e2af90f
1
Parent(s):
8a972d8
Upload app.py
Browse files
app.py
CHANGED
@@ -290,7 +290,7 @@ class Reader:
|
|
290 |
def __init__(self, key_word='', query='', filter_keys='',
|
291 |
root_path='./',
|
292 |
gitee_key='',
|
293 |
-
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', p=1.0, temperature=1.0):
|
294 |
self.key = str(key) # OpenAI key
|
295 |
self.user_name = user_name # 读者姓名
|
296 |
self.key_word = key_word # 读者感兴趣的关键词
|
@@ -422,7 +422,7 @@ class Reader:
|
|
422 |
|
423 |
return image_url
|
424 |
|
425 |
-
def summary_with_chat(self, paper_list, key, p, temperature):
|
426 |
htmls = []
|
427 |
utoken = 0
|
428 |
ctoken = 0
|
@@ -437,7 +437,7 @@ class Reader:
|
|
437 |
text += list(paper.section_text_dict.values())[0]
|
438 |
#max_token = 2500 * 4
|
439 |
#text = text[:max_token]
|
440 |
-
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p=p, temperature=temperature)
|
441 |
htmls.append(chat_summary_text)
|
442 |
|
443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
@@ -455,7 +455,7 @@ class Reader:
|
|
455 |
# methods
|
456 |
method_text += paper.section_text_dict[method_key]
|
457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
458 |
-
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p=p, temperature=temperature)
|
459 |
htmls.append(chat_method_text)
|
460 |
else:
|
461 |
chat_method_text = ''
|
@@ -478,7 +478,7 @@ class Reader:
|
|
478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
479 |
else:
|
480 |
text = summary_text
|
481 |
-
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p=p, temperature=temperature)
|
482 |
htmls.append(chat_conclusion_text)
|
483 |
htmls.append("\n")
|
484 |
# token统计
|
@@ -500,7 +500,7 @@ class Reader:
|
|
500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
501 |
stop=tenacity.stop_after_attempt(5),
|
502 |
reraise=True)
|
503 |
-
def chat_conclusion(self, text, key, p, temperature):
|
504 |
openai.api_key = key
|
505 |
conclusion_prompt_token = 650
|
506 |
text_token = len(self.encoding.encode(text))
|
@@ -524,7 +524,7 @@ class Reader:
|
|
524 |
"""},
|
525 |
]
|
526 |
response = openai.ChatCompletion.create(
|
527 |
-
model=
|
528 |
# prompt需要用英语替换,少占用token。
|
529 |
messages=messages,
|
530 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
@@ -547,7 +547,7 @@ class Reader:
|
|
547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
548 |
stop=tenacity.stop_after_attempt(5),
|
549 |
reraise=True)
|
550 |
-
def chat_method(self, text, key, p, temperature):
|
551 |
openai.api_key = key
|
552 |
method_prompt_token = 650
|
553 |
text_token = len(self.encoding.encode(text))
|
@@ -573,7 +573,7 @@ class Reader:
|
|
573 |
"""},
|
574 |
]
|
575 |
response = openai.ChatCompletion.create(
|
576 |
-
model=
|
577 |
messages=messages,
|
578 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
579 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
@@ -596,7 +596,7 @@ class Reader:
|
|
596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
597 |
stop=tenacity.stop_after_attempt(5),
|
598 |
reraise=True)
|
599 |
-
def chat_summary(self, text, key, p, temperature):
|
600 |
openai.api_key = key
|
601 |
summary_prompt_token = 1000
|
602 |
text_token = len(self.encoding.encode(text))
|
@@ -633,7 +633,7 @@ class Reader:
|
|
633 |
]
|
634 |
|
635 |
response = openai.ChatCompletion.create(
|
636 |
-
model=
|
637 |
messages=messages,
|
638 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
639 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
@@ -667,7 +667,7 @@ class Reader:
|
|
667 |
print(f"Query: {self.query}")
|
668 |
print(f"Sort: {self.sort}")
|
669 |
|
670 |
-
def upload_pdf(key, text, p, temperature, file):
|
671 |
# 检查两个输入都不为空
|
672 |
if not key or not text or not file:
|
673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
@@ -679,7 +679,7 @@ def upload_pdf(key, text, p, temperature, file):
|
|
679 |
paper_list = [Paper(path=file, sl=section_list)]
|
680 |
# 创建一个Reader对象
|
681 |
reader = Reader()
|
682 |
-
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p=p, temperature=temperature)
|
683 |
return cost, sum_info
|
684 |
|
685 |
api_title = "api-key可用验证"
|
@@ -724,6 +724,7 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
|
|
724 |
ip = [
|
725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
|
|
727 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
728 |
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
729 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
@@ -733,4 +734,4 @@ chatpaper_gui = gradio.Interface(fn=upload_pdf, inputs=ip, outputs=["json", "htm
|
|
733 |
|
734 |
# Start server
|
735 |
gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
|
736 |
-
gui.launch(quiet=True,show_api=False)
|
|
|
290 |
def __init__(self, key_word='', query='', filter_keys='',
|
291 |
root_path='./',
|
292 |
gitee_key='',
|
293 |
+
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
|
294 |
self.key = str(key) # OpenAI key
|
295 |
self.user_name = user_name # 读者姓名
|
296 |
self.key_word = key_word # 读者感兴趣的关键词
|
|
|
422 |
|
423 |
return image_url
|
424 |
|
425 |
+
def summary_with_chat(self, paper_list, key, model_name, p, temperature):
|
426 |
htmls = []
|
427 |
utoken = 0
|
428 |
ctoken = 0
|
|
|
437 |
text += list(paper.section_text_dict.values())[0]
|
438 |
#max_token = 2500 * 4
|
439 |
#text = text[:max_token]
|
440 |
+
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
441 |
htmls.append(chat_summary_text)
|
442 |
|
443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
|
455 |
# methods
|
456 |
method_text += paper.section_text_dict[method_key]
|
457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
458 |
+
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
459 |
htmls.append(chat_method_text)
|
460 |
else:
|
461 |
chat_method_text = ''
|
|
|
478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
479 |
else:
|
480 |
text = summary_text
|
481 |
+
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
482 |
htmls.append(chat_conclusion_text)
|
483 |
htmls.append("\n")
|
484 |
# token统计
|
|
|
500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
501 |
stop=tenacity.stop_after_attempt(5),
|
502 |
reraise=True)
|
503 |
+
def chat_conclusion(self, text, key, model_name, p, temperature):
|
504 |
openai.api_key = key
|
505 |
conclusion_prompt_token = 650
|
506 |
text_token = len(self.encoding.encode(text))
|
|
|
524 |
"""},
|
525 |
]
|
526 |
response = openai.ChatCompletion.create(
|
527 |
+
model=model_name,
|
528 |
# prompt需要用英语替换,少占用token。
|
529 |
messages=messages,
|
530 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
|
|
547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
548 |
stop=tenacity.stop_after_attempt(5),
|
549 |
reraise=True)
|
550 |
+
def chat_method(self, text, key, model_name, p, temperature):
|
551 |
openai.api_key = key
|
552 |
method_prompt_token = 650
|
553 |
text_token = len(self.encoding.encode(text))
|
|
|
573 |
"""},
|
574 |
]
|
575 |
response = openai.ChatCompletion.create(
|
576 |
+
model=model_name,
|
577 |
messages=messages,
|
578 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
579 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
|
596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
597 |
stop=tenacity.stop_after_attempt(5),
|
598 |
reraise=True)
|
599 |
+
def chat_summary(self, text, key, model_name, p, temperature):
|
600 |
openai.api_key = key
|
601 |
summary_prompt_token = 1000
|
602 |
text_token = len(self.encoding.encode(text))
|
|
|
633 |
]
|
634 |
|
635 |
response = openai.ChatCompletion.create(
|
636 |
+
model=model_name,
|
637 |
messages=messages,
|
638 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
639 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
|
667 |
print(f"Query: {self.query}")
|
668 |
print(f"Sort: {self.sort}")
|
669 |
|
670 |
+
def upload_pdf(key, text, model_name, p, temperature, file):
|
671 |
# 检查两个输入都不为空
|
672 |
if not key or not text or not file:
|
673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
|
679 |
paper_list = [Paper(path=file, sl=section_list)]
|
680 |
# 创建一个Reader对象
|
681 |
reader = Reader()
|
682 |
+
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, model_name=model_name, p=p, temperature=temperature)
|
683 |
return cost, sum_info
|
684 |
|
685 |
api_title = "api-key可用验证"
|
|
|
724 |
ip = [
|
725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
727 |
+
gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], value="gpt-3.5-turbo", label="设备")
|
728 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
729 |
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
730 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
|
|
734 |
|
735 |
# Start server
|
736 |
gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
|
737 |
+
gui.launch(quiet=True,show_api=False)
|