Spaces:
Running
Running
wangrongsheng
commited on
Commit
·
2dea19e
1
Parent(s):
f64978e
优化:增加新的model参数
Browse files
app.py
CHANGED
@@ -422,7 +422,7 @@ class Reader:
|
|
422 |
|
423 |
return image_url
|
424 |
|
425 |
-
def summary_with_chat(self, paper_list, key):
|
426 |
htmls = []
|
427 |
utoken = 0
|
428 |
ctoken = 0
|
@@ -437,7 +437,7 @@ class Reader:
|
|
437 |
text += list(paper.section_text_dict.values())[0]
|
438 |
#max_token = 2500 * 4
|
439 |
#text = text[:max_token]
|
440 |
-
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key))
|
441 |
htmls.append(chat_summary_text)
|
442 |
|
443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
@@ -455,7 +455,7 @@ class Reader:
|
|
455 |
# methods
|
456 |
method_text += paper.section_text_dict[method_key]
|
457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
458 |
-
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key))
|
459 |
htmls.append(chat_method_text)
|
460 |
else:
|
461 |
chat_method_text = ''
|
@@ -478,7 +478,7 @@ class Reader:
|
|
478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
479 |
else:
|
480 |
text = summary_text
|
481 |
-
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key))
|
482 |
htmls.append(chat_conclusion_text)
|
483 |
htmls.append("\n")
|
484 |
# token统计
|
@@ -500,7 +500,7 @@ class Reader:
|
|
500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
501 |
stop=tenacity.stop_after_attempt(5),
|
502 |
reraise=True)
|
503 |
-
def chat_conclusion(self, text, key):
|
504 |
openai.api_key = key
|
505 |
conclusion_prompt_token = 650
|
506 |
text_token = len(self.encoding.encode(text))
|
@@ -527,6 +527,8 @@ class Reader:
|
|
527 |
model="gpt-3.5-turbo",
|
528 |
# prompt需要用英语替换,少占用token。
|
529 |
messages=messages,
|
|
|
|
|
530 |
)
|
531 |
|
532 |
result = ''
|
@@ -545,7 +547,7 @@ class Reader:
|
|
545 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
546 |
stop=tenacity.stop_after_attempt(5),
|
547 |
reraise=True)
|
548 |
-
def chat_method(self, text, key):
|
549 |
openai.api_key = key
|
550 |
method_prompt_token = 650
|
551 |
text_token = len(self.encoding.encode(text))
|
@@ -573,6 +575,8 @@ class Reader:
|
|
573 |
response = openai.ChatCompletion.create(
|
574 |
model="gpt-3.5-turbo",
|
575 |
messages=messages,
|
|
|
|
|
576 |
)
|
577 |
|
578 |
result = ''
|
@@ -592,7 +596,7 @@ class Reader:
|
|
592 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
593 |
stop=tenacity.stop_after_attempt(5),
|
594 |
reraise=True)
|
595 |
-
def chat_summary(self, text, key):
|
596 |
openai.api_key = key
|
597 |
summary_prompt_token = 1000
|
598 |
text_token = len(self.encoding.encode(text))
|
@@ -631,6 +635,8 @@ class Reader:
|
|
631 |
response = openai.ChatCompletion.create(
|
632 |
model="gpt-3.5-turbo",
|
633 |
messages=messages,
|
|
|
|
|
634 |
)
|
635 |
|
636 |
result = ''
|
@@ -661,7 +667,7 @@ class Reader:
|
|
661 |
print(f"Query: {self.query}")
|
662 |
print(f"Sort: {self.sort}")
|
663 |
|
664 |
-
def upload_pdf(key, text, file):
|
665 |
# 检查两个输入都不为空
|
666 |
if not key or not text or not file:
|
667 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
@@ -673,7 +679,7 @@ def upload_pdf(key, text, file):
|
|
673 |
paper_list = [Paper(path=file, sl=section_list)]
|
674 |
# 创建一个Reader对象
|
675 |
reader = Reader()
|
676 |
-
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key)
|
677 |
return cost, sum_info
|
678 |
|
679 |
api_title = "api-key可用验证"
|
@@ -718,6 +724,8 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
|
|
718 |
ip = [
|
719 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
720 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
|
|
|
|
721 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
722 |
]
|
723 |
|
|
|
422 |
|
423 |
return image_url
|
424 |
|
425 |
+
def summary_with_chat(self, paper_list, key, p, temperature):
|
426 |
htmls = []
|
427 |
utoken = 0
|
428 |
ctoken = 0
|
|
|
437 |
text += list(paper.section_text_dict.values())[0]
|
438 |
#max_token = 2500 * 4
|
439 |
#text = text[:max_token]
|
440 |
+
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p, temperature)
|
441 |
htmls.append(chat_summary_text)
|
442 |
|
443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
|
455 |
# methods
|
456 |
method_text += paper.section_text_dict[method_key]
|
457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
458 |
+
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p, temperature)
|
459 |
htmls.append(chat_method_text)
|
460 |
else:
|
461 |
chat_method_text = ''
|
|
|
478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
479 |
else:
|
480 |
text = summary_text
|
481 |
+
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p, temperature)
|
482 |
htmls.append(chat_conclusion_text)
|
483 |
htmls.append("\n")
|
484 |
# token统计
|
|
|
500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
501 |
stop=tenacity.stop_after_attempt(5),
|
502 |
reraise=True)
|
503 |
+
def chat_conclusion(self, text, key, p, temperature):
|
504 |
openai.api_key = key
|
505 |
conclusion_prompt_token = 650
|
506 |
text_token = len(self.encoding.encode(text))
|
|
|
527 |
model="gpt-3.5-turbo",
|
528 |
# prompt需要用英语替换,少占用token。
|
529 |
messages=messages,
|
530 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
531 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
532 |
)
|
533 |
|
534 |
result = ''
|
|
|
547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
548 |
stop=tenacity.stop_after_attempt(5),
|
549 |
reraise=True)
|
550 |
+
def chat_method(self, text, key, p, temperature):
|
551 |
openai.api_key = key
|
552 |
method_prompt_token = 650
|
553 |
text_token = len(self.encoding.encode(text))
|
|
|
575 |
response = openai.ChatCompletion.create(
|
576 |
model="gpt-3.5-turbo",
|
577 |
messages=messages,
|
578 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
579 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
580 |
)
|
581 |
|
582 |
result = ''
|
|
|
596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
597 |
stop=tenacity.stop_after_attempt(5),
|
598 |
reraise=True)
|
599 |
+
def chat_summary(self, text, key, p, temperature):
|
600 |
openai.api_key = key
|
601 |
summary_prompt_token = 1000
|
602 |
text_token = len(self.encoding.encode(text))
|
|
|
635 |
response = openai.ChatCompletion.create(
|
636 |
model="gpt-3.5-turbo",
|
637 |
messages=messages,
|
638 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
639 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
640 |
)
|
641 |
|
642 |
result = ''
|
|
|
667 |
print(f"Query: {self.query}")
|
668 |
print(f"Sort: {self.sort}")
|
669 |
|
670 |
+
def upload_pdf(key, text, p, temperature, file):
|
671 |
# 检查两个输入都不为空
|
672 |
if not key or not text or not file:
|
673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
|
679 |
paper_list = [Paper(path=file, sl=section_list)]
|
680 |
# 创建一个Reader对象
|
681 |
reader = Reader()
|
682 |
+
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p, temperature)
|
683 |
return cost, sum_info
|
684 |
|
685 |
api_title = "api-key可用验证"
|
|
|
724 |
ip = [
|
725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
727 |
+
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
728 |
+
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
729 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
730 |
]
|
731 |
|