from __future__ import annotations from typing import TYPE_CHECKING, List import logging import json import commentjson as cjson import os import sys import requests import urllib3 import traceback import pathlib from tqdm import tqdm import colorama from duckduckgo_search import ddg import asyncio import aiohttp from enum import Enum from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.manager import BaseCallbackManager from typing import Any, Dict, List, Optional, Union from langchain.callbacks.base import BaseCallbackHandler from langchain.input import print_text from langchain.schema import AgentAction, AgentFinish, LLMResult from threading import Thread, Condition from collections import deque from ..presets import * from ..index_func import * from ..utils import * from .. import shared from ..config import retrieve_proxy class CallbackToIterator: def __init__(self): self.queue = deque() self.cond = Condition() self.finished = False def callback(self, result): with self.cond: self.queue.append(result) self.cond.notify() # Wake up the generator. def __iter__(self): return self def __next__(self): with self.cond: while not self.queue and not self.finished: # Wait for a value to be added to the queue. self.cond.wait() if not self.queue: raise StopIteration() return self.queue.popleft() def finish(self): with self.cond: self.finished = True self.cond.notify() # Wake up the generator if it's waiting. class ChuanhuCallbackHandler(BaseCallbackHandler): def __init__(self, callback) -> None: """Initialize callback handler.""" self.callback = callback def on_agent_action( self, action: AgentAction, color: Optional[str] = None, **kwargs: Any ) -> Any: self.callback(action.log) def on_tool_end( self, output: str, color: Optional[str] = None, observation_prefix: Optional[str] = None, llm_prefix: Optional[str] = None, **kwargs: Any, ) -> None: """If not the final action, print out observation.""" if observation_prefix is not None: self.callback(f"\n\n{observation_prefix}") self.callback(output) if llm_prefix is not None: self.callback(f"\n\n{llm_prefix}") def on_agent_finish( self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any ) -> None: self.callback(f"{finish.log}\n\n") def on_llm_new_token(self, token: str, **kwargs: Any) -> None: """Run on new LLM token. Only available when streaming is enabled.""" self.callback(token) class ModelType(Enum): Unknown = -1 OpenAI = 0 ChatGLM = 1 LLaMA = 2 XMChat = 3 StableLM = 4 MOSS = 5 YuanAI = 6 ChuanhuAgent = 7 @classmethod def get_type(cls, model_name: str): model_type = None model_name_lower = model_name.lower() if "gpt" in model_name_lower: model_type = ModelType.OpenAI elif "chatglm" in model_name_lower: model_type = ModelType.ChatGLM elif "llama" in model_name_lower or "alpaca" in model_name_lower: model_type = ModelType.LLaMA elif "xmchat" in model_name_lower: model_type = ModelType.XMChat elif "stablelm" in model_name_lower: model_type = ModelType.StableLM elif "moss" in model_name_lower: model_type = ModelType.MOSS elif "yuanai" in model_name_lower: model_type = ModelType.YuanAI elif "川虎助理" in model_name_lower: model_type = ModelType.ChuanhuAgent else: model_type = ModelType.Unknown return model_type class BaseLLMModel: def __init__( self, model_name, system_prompt="", temperature=1.0, top_p=1.0, n_choices=1, stop=None, max_generation_token=None, presence_penalty=0, frequency_penalty=0, logit_bias=None, user="", ) -> None: self.history = [] self.all_token_counts = [] self.model_name = model_name self.model_type = ModelType.get_type(model_name) try: self.token_upper_limit = MODEL_TOKEN_LIMIT[model_name] except KeyError: self.token_upper_limit = DEFAULT_TOKEN_LIMIT self.interrupted = False self.system_prompt = system_prompt self.api_key = None self.need_api_key = False self.single_turn = False self.temperature = temperature self.top_p = top_p self.n_choices = n_choices self.stop_sequence = stop self.max_generation_token = None self.presence_penalty = presence_penalty self.frequency_penalty = frequency_penalty self.logit_bias = logit_bias self.user_identifier = user def get_answer_stream_iter(self): """stream predict, need to be implemented conversations are stored in self.history, with the most recent question, in OpenAI format should return a generator, each time give the next word (str) in the answer """ logging.warning("stream predict not implemented, using at once predict instead") response, _ = self.get_answer_at_once() yield response def get_answer_at_once(self): """predict at once, need to be implemented conversations are stored in self.history, with the most recent question, in OpenAI format Should return: the answer (str) total token count (int) """ logging.warning("at once predict not implemented, using stream predict instead") response_iter = self.get_answer_stream_iter() count = 0 for response in response_iter: count += 1 return response, sum(self.all_token_counts) + count def billing_info(self): """get billing infomation, inplement if needed""" logging.warning("billing info not implemented, using default") return BILLING_NOT_APPLICABLE_MSG def count_token(self, user_input): """get token count from input, implement if needed""" # logging.warning("token count not implemented, using default") return len(user_input) def stream_next_chatbot(self, inputs, chatbot, fake_input=None, display_append=""): def get_return_value(): return chatbot, status_text status_text = i18n("开始实时传输回答……") if fake_input: chatbot.append((fake_input, "")) else: chatbot.append((inputs, "")) user_token_count = self.count_token(inputs) self.all_token_counts.append(user_token_count) logging.debug(f"输入token计数: {user_token_count}") stream_iter = self.get_answer_stream_iter() for partial_text in stream_iter: chatbot[-1] = (chatbot[-1][0], partial_text + display_append) self.all_token_counts[-1] += 1 status_text = self.token_message() yield get_return_value() if self.interrupted: self.recover() break self.history.append(construct_assistant(partial_text)) def next_chatbot_at_once(self, inputs, chatbot, fake_input=None, display_append=""): if fake_input: chatbot.append((fake_input, "")) else: chatbot.append((inputs, "")) if fake_input is not None: user_token_count = self.count_token(fake_input) else: user_token_count = self.count_token(inputs) self.all_token_counts.append(user_token_count) ai_reply, total_token_count = self.get_answer_at_once() self.history.append(construct_assistant(ai_reply)) if fake_input is not None: self.history[-2] = construct_user(fake_input) chatbot[-1] = (chatbot[-1][0], ai_reply + display_append) if fake_input is not None: self.all_token_counts[-1] += count_token(construct_assistant(ai_reply)) else: self.all_token_counts[-1] = total_token_count - sum(self.all_token_counts) status_text = self.token_message() return chatbot, status_text def handle_file_upload(self, files, chatbot, language): """if the model accepts multi modal input, implement this function""" status = gr.Markdown.update() if files: index = construct_index(self.api_key, file_src=files) status = i18n("索引构建完成") # Summarize the document logging.info(i18n("生成内容总结中……")) os.environ["OPENAI_API_KEY"] = self.api_key from langchain.chains.summarize import load_summarize_chain from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI from langchain.callbacks import StdOutCallbackHandler prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":" PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) handler = StdOutCallbackHandler() llm = ChatOpenAI(callbacks=[handler]) chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT) summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"] print(i18n("总结") + f": {summary}") chatbot.append([i18n("总结"), summary]) return gr.Files.update(), chatbot, status def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot): fake_inputs = None display_append = [] limited_context = False fake_inputs = real_inputs if files: from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.vectorstores.base import VectorStoreRetriever limited_context = True msg = "加载索引中……" logging.info(msg) index = construct_index(self.api_key, file_src=files) assert index is not None, "获取索引失败" msg = "索引获取成功,生成回答中……" logging.info(msg) with retrieve_proxy(): retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold",search_kwargs={"k":6, "score_threshold": 0.5}) relevant_documents = retriever.get_relevant_documents(real_inputs) reference_results = [[d.page_content.strip("�"), os.path.basename(d.metadata["source"])] for d in relevant_documents] reference_results = add_source_numbers(reference_results) display_append = add_details(reference_results) display_append = "\n\n" + "".join(display_append) real_inputs = ( replace_today(PROMPT_TEMPLATE) .replace("{query_str}", real_inputs) .replace("{context_str}", "\n\n".join(reference_results)) .replace("{reply_language}", reply_language) ) elif use_websearch: limited_context = True search_results = ddg(real_inputs, max_results=5) reference_results = [] for idx, result in enumerate(search_results): logging.debug(f"搜索结果{idx + 1}:{result}") domain_name = urllib3.util.parse_url(result["href"]).host reference_results.append([result["body"], result["href"]]) display_append.append( # f"{idx+1}. [{domain_name}]({result['href']})\n" f"