gpt-academic

Sleeping

App Files Files Community

binary-husky commited on Mar 31, 2023

Commit

da8cb77

2 Parent(s): dde672c a87ce5b

Merge pull request #147 from JasonGuo1/master

Browse files

feat(toolbox.py，总结word文档.py): 支持rar格式与7z格式解压；word读取

Files changed (4) hide show

crazy_functions/总结word文档.py +127 -0
functional_crazy.py +5 -0
requirements.txt +1 -1
toolbox.py +31 -4

crazy_functions/总结word文档.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from predict import predict_no_ui
+from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
+fast_debug = False
+def 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
+    import time, os
+    # pip install python-docx 用于docx格式，跨平台
+    # pip install pywin32 用于doc格式，仅支持Win平台
+    print('begin analysis on:', file_manifest)
+    for index, fp in enumerate(file_manifest):
+        if fp.split(".")[-1] == "docx":
+            from docx import Document
+            doc = Document(fp)
+            file_content = "\n".join([para.text for para in doc.paragraphs])
+        else:
+            import win32com.client
+            word = win32com.client.Dispatch("Word.Application")
+            word.visible = False
+            # 打开文件
+            print('fp', os.getcwd())
+            doc = word.Documents.Open(os.getcwd() + '/' + fp)
+            # file_content = doc.Content.Text
+            doc = word.ActiveDocument
+            file_content = doc.Range().Text
+            doc.Close()
+            word.Quit()
+        print(file_content)
+        prefix = "接下来请你逐文件分析下面的论文文件，" if index == 0 else ""
+        # private_upload里面的文件名在解压zip后容易出现乱码（rar和7z格式正常），故可以只分析文章内容，不输入文件名
+        i_say = prefix + f'请对下面的文章片段用中英文做概述，文件名是{os.path.relpath(fp, project_folder)},' \
+                         f'文章内容是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 假设你是论文审稿专家，请对下面的文章片段做概述: {os.path.abspath(fp)}'
+        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
+        yield chatbot, history, '正常'
+        if not fast_debug:
+            msg = '正常'
+            # ** gpt request **
+            gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature,
+                                                                 history=[])  # 带超时倒计时
+            chatbot[-1] = (i_say_show_user, gpt_say)
+            history.append(i_say_show_user);
+            history.append(gpt_say)
+            yield chatbot, history, msg
+            if not fast_debug: time.sleep(2)
+    """
+    # 可按需启用
+    i_say = f'根据你上述的分析，对全文进行概括，用学术性语言写一段中文摘要，然后再写一篇英文的。'
+    chatbot.append((i_say, "[Local Message] waiting gpt response."))
+    yield chatbot, history, '正常'
+    i_say = f'我想让你做一个论文写作导师。您的任务是使用人工智能工具（例如自然语言处理）提供有关如何改进其上述文章的反馈。' \
+            f'您还应该利用您在有效写作技巧方面的修辞知识和经验来建议作者可以更好地以书面形式表达他们的想法和想法的方法。' \
+            f'根据你之前的分析，提出建议'
+    chatbot.append((i_say, "[Local Message] waiting gpt response."))
+    yield chatbot, history, '正常'
+    """
+    if not fast_debug:
+        msg = '正常'
+        # ** gpt request **
+        gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature,
+                                                             history=history)  # 带超时倒计时
+        chatbot[-1] = (i_say, gpt_say)
+        history.append(i_say)
+        history.append(gpt_say)
+        yield chatbot, history, msg
+        res = write_results_to_file(history)
+        chatbot.append(("完成了吗？", res))
+        yield chatbot, history, msg
+@CatchException
+def 总结word文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
+    import glob, os
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "批量总结Word文档。函数插件贡献者: JasonGuo1"])
+    yield chatbot, history, '正常'
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        from docx import Document
+    except:
+        report_execption(chatbot, history,
+                         a=f"解析项目: {txt}",
+                         b=f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade python-docx pywin32```。")
+        yield chatbot, history, '正常'
+        return
+    # 清空历史，以免输入溢出
+    history = []
+    # 检测输入参数，如没有给定输入参数，直接退出
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield chatbot, history, '正常'
+        return
+    # 搜索需���处理的文件清单
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
+    # [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
+    # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
+    # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+    # 如果没找到任何文件
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.docx或doc文件: {txt}")
+        yield chatbot, history, '正常'
+        return
+    # 开始正式执行任务
+    yield from 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)

functional_crazy.py CHANGED Viewed

@@ -56,6 +56,7 @@ def get_crazy_functionals():
     if UserVisibleLevel >= 1:
         from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
         from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
         function_plugins.update({
             "[仅供开发调试] 批量总结PDF文档": {
                 "Color": "stop",
@@ -66,6 +67,10 @@ def get_crazy_functionals():
                 "Color": "stop",
                 "Function": HotReload(批量总结PDF文档pdfminer)
             },
         })
     # VisibleLevel=2 尚未充分测试的函数插件，放在这里

     if UserVisibleLevel >= 1:
         from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
         from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
+        from crazy_functions.总结word文档 import 总结word文档
         function_plugins.update({
             "[仅供开发调试] 批量总结PDF文档": {
                 "Color": "stop",
                 "Color": "stop",
                 "Function": HotReload(批量总结PDF文档pdfminer)
             },
+            "[仅供开发调试] 批量总结Word文档": {
+                "Color": "stop",
+                "Function": HotReload(总结word文档)
+            },
         })
     # VisibleLevel=2 尚未充分测试的函数插件，放在这里

requirements.txt CHANGED Viewed

@@ -2,4 +2,4 @@ gradio>=3.23
 requests[socks]
 mdtex2html
 Markdown
-latex2mathml

 requests[socks]
 mdtex2html
 Markdown
+latex2mathml

toolbox.py CHANGED Viewed

@@ -176,8 +176,32 @@ def extract_archive(file_path, dest_dir):
         with tarfile.open(file_path, 'r:*') as tarobj:
             tarobj.extractall(path=dest_dir)
             print("Successfully extracted tar archive to {}".format(dest_dir))
     else:
-        return
 def find_recent_files(directory):
     """
@@ -209,16 +233,19 @@ def on_file_uploaded(files, chatbot, txt):
     except: pass
     time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
     os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
     for file in files:
         file_origin_name = os.path.basename(file.orig_name)
         shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
-        extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
                         dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
     moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
     txt = f'private_upload/{time_tag}'
     moved_files_str = '\t\n\n'.join(moved_files)
-    chatbot.append(['我上传了文件，请查收',
-                    f'[Local Message] 收到以下文件: \n\n{moved_files_str}\n\n调用路径参数已自动修正到: \n\n{txt}\n\n现在您点击任意实验功能时，以上文件将被作为输入参数'])
     return chatbot, txt

         with tarfile.open(file_path, 'r:*') as tarobj:
             tarobj.extractall(path=dest_dir)
             print("Successfully extracted tar archive to {}".format(dest_dir))
+    # 第三方库，需要预先pip install rarfile
+    # 此外，Windows上还需要安装winrar软件，配置其Path环境变量，如"C:\Program Files\WinRAR"才可以
+    elif file_extension == '.rar':
+        try:
+            import rarfile
+            with rarfile.RarFile(file_path) as rf:
+                rf.extractall(path=dest_dir)
+                print("Successfully extracted rar archive to {}".format(dest_dir))
+        except:
+            print("Rar format requires additional dependencies to install")
+            return '\n\n需要安装pip install rarfile来解压rar文件'
+    # 第三方库，需要预先pip install py7zr
+    elif file_extension == '.7z':
+        try:
+            import py7zr
+            with py7zr.SevenZipFile(file_path, mode='r') as f:
+                f.extractall(path=dest_dir)
+                print("Successfully extracted 7z archive to {}".format(dest_dir))
+        except:
+            print("7z format requires additional dependencies to install")
+            return '\n\n需要安装pip install py7zr来解压7z文件'
     else:
+        return ''
+    return ''
 def find_recent_files(directory):
     """
     except: pass
     time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
     os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
+    err_msg = ''
     for file in files:
         file_origin_name = os.path.basename(file.orig_name)
         shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
+        err_msg += extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
                         dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
     moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
     txt = f'private_upload/{time_tag}'
     moved_files_str = '\t\n\n'.join(moved_files)
+    chatbot.append(['我上传了文件，请查收',
+                    f'[Local Message] 收到以下文件: \n\n{moved_files_str}'+
+                    f'\n\n调用路径参数已自动修正到: \n\n{txt}'+
+                    f'\n\n现在您点击任意实验功能时，以上文件将被作为输入参数'+err_msg])
     return chatbot, txt