Spaces:
Sleeping
Sleeping
from __future__ import annotations | |
import os | |
import llama_index | |
from llama_index import ( | |
LLMPredictor, | |
GPTTreeIndex, | |
Document, | |
GPTSimpleVectorIndex, | |
SimpleDirectoryReader, | |
RefinePrompt, | |
QuestionAnswerPrompt, | |
GPTListIndex, | |
PromptHelper, | |
) | |
from pathlib import Path | |
from docx import Document as DocxDocument | |
from tqdm import tqdm | |
import re | |
from langchain.llms import OpenAIChat, OpenAI | |
from llama_index.composability import ComposableGraph | |
from IPython.display import Markdown, display | |
import json | |
from llama_index import Prompt | |
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type | |
import logging | |
import sys | |
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type | |
import logging | |
import json | |
import gradio as gr | |
# import openai | |
import os | |
import traceback | |
import requests | |
# import markdown | |
import csv | |
import mdtex2html | |
from pypinyin import lazy_pinyin | |
from presets import * | |
from llama_func import * | |
import tiktoken | |
from tqdm import tqdm | |
import colorama | |
import os | |
from llama_index import ( | |
GPTSimpleVectorIndex, | |
GPTTreeIndex, | |
GPTKeywordTableIndex, | |
GPTListIndex, | |
) | |
from llama_index import SimpleDirectoryReader, download_loader | |
from llama_index import ( | |
Document, | |
LLMPredictor, | |
PromptHelper, | |
QuestionAnswerPrompt, | |
RefinePrompt, | |
) | |
from langchain.llms import OpenAIChat, OpenAI | |
from duckduckgo_search import ddg | |
import datetime | |
def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]: | |
logging.debug("Compacting text chunks...๐๐๐") | |
combined_str = [c.strip() for c in text_chunks if c.strip()] | |
combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)] | |
combined_str = "\n\n".join(combined_str) | |
# resplit based on self.max_chunk_overlap | |
text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1) | |
return text_splitter.split_text(combined_str) | |
def postprocess( | |
self, y: List[Tuple[str | None, str | None]] | |
) -> List[Tuple[str | None, str | None]]: | |
""" | |
Parameters: | |
y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format. | |
Returns: | |
List of tuples representing the message and response. Each message and response will be a string of HTML. | |
""" | |
if y is None: | |
return [] | |
for i, (message, response) in enumerate(y): | |
y[i] = ( | |
# None if message is None else markdown.markdown(message), | |
# None if response is None else markdown.markdown(response), | |
None if message is None else message, | |
None if response is None else mdtex2html.convert(response, extensions=['fenced_code','codehilite','tables']), | |
) | |
return y | |