Tuchuanhuhuhu commited on
Commit
f8a0305
·
1 Parent(s): ee55620

加入川虎助理和川虎助理Pro模型

Browse files
ChuanhuChatbot.py CHANGED
@@ -286,7 +286,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
286
  chatbot,
287
  use_streaming_checkbox,
288
  use_websearch_checkbox,
289
- autogpt_mode,
290
  index_files,
291
  language_select_dropdown,
292
  ],
@@ -349,7 +348,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
349
  chatbot,
350
  use_streaming_checkbox,
351
  use_websearch_checkbox,
352
- autogpt_mode,
353
  index_files,
354
  language_select_dropdown,
355
  ],
 
286
  chatbot,
287
  use_streaming_checkbox,
288
  use_websearch_checkbox,
 
289
  index_files,
290
  language_select_dropdown,
291
  ],
 
348
  chatbot,
349
  use_streaming_checkbox,
350
  use_websearch_checkbox,
 
351
  index_files,
352
  language_select_dropdown,
353
  ],
config_example.json CHANGED
@@ -15,6 +15,14 @@
15
  "local_embedding": false, //是否在本地编制索引
16
  "hide_history_when_not_logged_in": false, //未登录情况下是否不展示对话历史
17
  "default_model": "gpt-3.5-turbo", // 默认模型
 
 
 
 
 
 
 
 
18
  "advance_docs": {
19
  "pdf": {
20
  // 是否认为PDF是双栏的
 
15
  "local_embedding": false, //是否在本地编制索引
16
  "hide_history_when_not_logged_in": false, //未登录情况下是否不展示对话历史
17
  "default_model": "gpt-3.5-turbo", // 默认模型
18
+
19
+ //川虎助理设置
20
+ "default_chuanhu_assistant_model": "gpt-4", //川虎助理使用的模型,可选gpt-3.5或者gpt-4
21
+ "GOOGLE_CSE_ID": "", //谷歌搜索引擎ID,用于川虎助理Pro模式,获取方式请看 https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search
22
+ "GOOGLE_API_KEY": "", //谷歌API Key,用于川虎助理Pro模式
23
+ "WOLFRAM_ALPHA_APPID": "", //Wolfram Alpha API Key,用于川虎助理Pro模式,获取方式请看 https://products.wolframalpha.com/api/
24
+ "SERPAPI_API_KEY": "", //SerpAPI API Key,用于川虎助理Pro模式,获取方式请看 https://serpapi.com/
25
+
26
  "advance_docs": {
27
  "pdf": {
28
  // 是否认为PDF是双栏的
modules/config.py CHANGED
@@ -24,7 +24,8 @@ __all__ = [
24
  "server_name",
25
  "server_port",
26
  "share",
27
- "hide_history_when_not_logged_in"
 
28
  ]
29
 
30
  # 添加一个统一的config文件,避免文件过多造成的疑惑(优先级最低)
@@ -102,6 +103,12 @@ api_host = os.environ.get("api_host", config.get("api_host", ""))
102
  if api_host:
103
  shared.state.set_api_host(api_host)
104
 
 
 
 
 
 
 
105
  @contextmanager
106
  def retrieve_openai_api(api_key = None):
107
  old_api_key = os.environ.get("OPENAI_API_KEY", "")
 
24
  "server_name",
25
  "server_port",
26
  "share",
27
+ "hide_history_when_not_logged_in",
28
+ "default_chuanhu_assistant_model"
29
  ]
30
 
31
  # 添加一个统一的config文件,避免文件过多造成的疑惑(优先级最低)
 
103
  if api_host:
104
  shared.state.set_api_host(api_host)
105
 
106
+ default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-4")
107
+ os.environ["GOOGLE_CSE_ID"] = config.get("GOOGLE_CSE_ID", "")
108
+ os.environ["GOOGLE_API_KEY"] = config.get("GOOGLE_API_KEY", "")
109
+ os.environ["WOLFRAM_ALPHA_APPID"] = config.get("WOLFRAM_ALPHA_APPID", "")
110
+ os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
111
+
112
  @contextmanager
113
  def retrieve_openai_api(api_key = None):
114
  old_api_key = os.environ.get("OPENAI_API_KEY", "")
modules/models/ChuanhuAgent.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.summarize import load_summarize_chain
2
+ from langchain import OpenAI, PromptTemplate, LLMChain
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.chains.mapreduce import MapReduceChain
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.text_splitter import TokenTextSplitter
8
+ from langchain.embeddings import OpenAIEmbeddings
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.agents import load_tools
12
+ from langchain.agents import initialize_agent
13
+ from langchain.agents import AgentType
14
+ from langchain.docstore.document import Document
15
+ from langchain.tools import BaseTool, StructuredTool, Tool, tool
16
+ from langchain.callbacks.stdout import StdOutCallbackHandler
17
+ from langchain.callbacks.manager import BaseCallbackManager
18
+
19
+ from pydantic import BaseModel, Field
20
+
21
+ import requests
22
+ from bs4 import BeautifulSoup
23
+
24
+ from .base_model import BaseLLMModel
25
+ from ..config import default_chuanhu_assistant_model
26
+ from ..presets import SUMMARIZE_PROMPT
27
+ import logging
28
+
29
+ class WebBrowsingInput(BaseModel):
30
+ url: str = Field(description="URL of a webpage")
31
+
32
+ class WebAskingInput(BaseModel):
33
+ url: str = Field(description="URL of a webpage")
34
+ question: str = Field(description="Question that you want to know the answer to, based on the webpage's content.")
35
+
36
+
37
+ class ChuanhuAgent_Client(BaseLLMModel):
38
+ def __init__(self, model_name, openai_api_key, user_name="") -> None:
39
+ super().__init__(model_name=model_name, user=user_name)
40
+ self.text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
41
+ self.api_key = openai_api_key
42
+ self.llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name=default_chuanhu_assistant_model)
43
+ PROMPT = PromptTemplate(template=SUMMARIZE_PROMPT, input_variables=["text"])
44
+ self.summarize_chain = load_summarize_chain(self.llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
45
+ if "Pro" in self.model_name:
46
+ self.tools = load_tools(["google-search-results-json", "llm-math", "arxiv", "wikipedia", "wolfram-alpha"], llm=self.llm)
47
+ else:
48
+ self.tools = load_tools(["ddg-search", "llm-math", "arxiv", "wikipedia"], llm=self.llm)
49
+
50
+ self.tools.append(
51
+ Tool.from_function(
52
+ func=self.summary_url,
53
+ name="Summary Webpage",
54
+ description="useful when you need to know the overall content of a webpage.",
55
+ args_schema=WebBrowsingInput
56
+ )
57
+ )
58
+
59
+ self.tools.append(
60
+ StructuredTool.from_function(
61
+ func=self.ask_url,
62
+ name="Ask Webpage",
63
+ description="useful when you need to ask detailed questions about a webpage.",
64
+ args_schema=WebAskingInput
65
+ )
66
+ )
67
+
68
+ def summary(self, text):
69
+ texts = Document(page_content=text)
70
+ texts = self.text_splitter.split_documents([texts])
71
+ return self.summarize_chain({"input_documents": texts}, return_only_outputs=True)["output_text"]
72
+
73
+ def fetch_url_content(self, url):
74
+ response = requests.get(url)
75
+ soup = BeautifulSoup(response.text, 'html.parser')
76
+
77
+ # 提取所有的文本
78
+ text = ''.join(s.getText() for s in soup.find_all('p'))
79
+ logging.info(f"Extracted text from {url}")
80
+ return text
81
+
82
+ def summary_url(self, url):
83
+ text = self.fetch_url_content(url)
84
+ text_summary = self.summary(text)
85
+ url_content = "webpage content summary:\n" + text_summary
86
+
87
+ return url_content
88
+
89
+ def ask_url(self, url, question):
90
+ text = self.fetch_url_content(url)
91
+ texts = Document(page_content=text)
92
+ texts = self.text_splitter.split_documents([texts])
93
+ # use embedding
94
+ embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)
95
+
96
+ # create vectorstore
97
+ db = FAISS.from_documents(texts, embeddings)
98
+ retriever = db.as_retriever()
99
+ qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever)
100
+ return qa.run(f"{question} Reply in 中文")
101
+
102
+ def get_answer_at_once(self):
103
+ question = self.history[-1]["content"]
104
+ manager = BaseCallbackManager(handlers=[StdOutCallbackHandler()])
105
+ # llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
106
+ agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager)
107
+ reply = agent.run(input=f"{question} Reply in 简体中文")
108
+ return reply, -1
modules/models/base_model.py CHANGED
@@ -34,6 +34,7 @@ class ModelType(Enum):
34
  StableLM = 4
35
  MOSS = 5
36
  YuanAI = 6
 
37
 
38
  @classmethod
39
  def get_type(cls, model_name: str):
@@ -53,6 +54,8 @@ class ModelType(Enum):
53
  model_type = ModelType.MOSS
54
  elif "yuanai" in model_name_lower:
55
  model_type = ModelType.YuanAI
 
 
56
  else:
57
  model_type = ModelType.Unknown
58
  return model_type
@@ -259,7 +262,6 @@ class BaseLLMModel:
259
  chatbot,
260
  stream=False,
261
  use_websearch=False,
262
- autogpt_mode=False,
263
  files=None,
264
  reply_language="中文",
265
  should_check_token_count=True,
@@ -366,7 +368,6 @@ class BaseLLMModel:
366
  chatbot,
367
  stream=False,
368
  use_websearch=False,
369
- autogpt_mode=False,
370
  files=None,
371
  reply_language="中文",
372
  ):
@@ -386,7 +387,6 @@ class BaseLLMModel:
386
  chatbot,
387
  stream=stream,
388
  use_websearch=use_websearch,
389
- autogpt_mode=autogpt_mode,
390
  files=files,
391
  reply_language=reply_language,
392
  )
 
34
  StableLM = 4
35
  MOSS = 5
36
  YuanAI = 6
37
+ ChuanhuAgent = 7
38
 
39
  @classmethod
40
  def get_type(cls, model_name: str):
 
54
  model_type = ModelType.MOSS
55
  elif "yuanai" in model_name_lower:
56
  model_type = ModelType.YuanAI
57
+ elif "川虎助理" in model_name_lower:
58
+ model_type = ModelType.ChuanhuAgent
59
  else:
60
  model_type = ModelType.Unknown
61
  return model_type
 
262
  chatbot,
263
  stream=False,
264
  use_websearch=False,
 
265
  files=None,
266
  reply_language="中文",
267
  should_check_token_count=True,
 
368
  chatbot,
369
  stream=False,
370
  use_websearch=False,
 
371
  files=None,
372
  reply_language="中文",
373
  ):
 
387
  chatbot,
388
  stream=stream,
389
  use_websearch=use_websearch,
 
390
  files=files,
391
  reply_language=reply_language,
392
  )
modules/models/models.py CHANGED
@@ -557,6 +557,7 @@ def get_model(
557
  config.local_embedding = True
558
  # del current_model.model
559
  model = None
 
560
  try:
561
  if model_type == ModelType.OpenAI:
562
  logging.info(f"正在加载OpenAI模型: {model_name}")
@@ -602,10 +603,12 @@ def get_model(
602
  elif model_type == ModelType.YuanAI:
603
  from .inspurai import Yuan_Client
604
  model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
 
 
 
605
  elif model_type == ModelType.Unknown:
606
  raise ValueError(f"未知模型: {model_name}")
607
  logging.info(msg)
608
- chatbot = gr.Chatbot.update(label=model_name)
609
  except Exception as e:
610
  logging.error(e)
611
  msg = f"{STANDARD_ERROR_MSG}: {e}"
 
557
  config.local_embedding = True
558
  # del current_model.model
559
  model = None
560
+ chatbot = gr.Chatbot.update(label=model_name)
561
  try:
562
  if model_type == ModelType.OpenAI:
563
  logging.info(f"正在加载OpenAI模型: {model_name}")
 
603
  elif model_type == ModelType.YuanAI:
604
  from .inspurai import Yuan_Client
605
  model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
606
+ elif model_type == ModelType.ChuanhuAgent:
607
+ from .ChuanhuAgent import ChuanhuAgent_Client
608
+ model = ChuanhuAgent_Client(model_name, access_key, user_name=user_name)
609
  elif model_type == ModelType.Unknown:
610
  raise ValueError(f"未知模型: {model_name}")
611
  logging.info(msg)
 
612
  except Exception as e:
613
  logging.error(e)
614
  msg = f"{STANDARD_ERROR_MSG}: {e}"
modules/presets.py CHANGED
@@ -58,9 +58,9 @@ APPEARANCE_SWITCHER = """
58
  </div>
59
  """
60
 
61
- SUMMARIZE_PROMPT = "你是谁?我们刚才聊了什么?" # 总结对话时的 prompt
62
-
63
  ONLINE_MODELS = [
 
 
64
  "gpt-3.5-turbo",
65
  "gpt-3.5-turbo-0301",
66
  "gpt-4",
@@ -164,6 +164,12 @@ Reply in {reply_language}
164
  If the context isn't useful, return the original answer.
165
  """
166
 
 
 
 
 
 
 
167
  ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
168
 
169
  small_and_beautiful_theme = gr.themes.Soft(
 
58
  </div>
59
  """
60
 
 
 
61
  ONLINE_MODELS = [
62
+ "川虎助理",
63
+ "川虎助理 Pro",
64
  "gpt-3.5-turbo",
65
  "gpt-3.5-turbo-0301",
66
  "gpt-4",
 
164
  If the context isn't useful, return the original answer.
165
  """
166
 
167
+ SUMMARIZE_PROMPT = """Write a concise summary of the following:
168
+
169
+ {text}
170
+
171
+ CONCISE SUMMARY IN 中文:"""
172
+
173
  ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
174
 
175
  small_and_beautiful_theme = gr.themes.Soft(