|
""" |
|
|
|
""" |
|
|
|
|
|
import requests |
|
import streamlit as st |
|
import openai |
|
|
|
import os |
|
from dotenv import load_dotenv |
|
import numpy as np |
|
import pandas as pd |
|
import csv |
|
import tempfile |
|
from tempfile import NamedTemporaryFile |
|
import pathlib |
|
from pathlib import Path |
|
import re |
|
from re import sub |
|
import matplotlib.pyplot as plt |
|
from itertools import product |
|
from tqdm import tqdm_notebook, tqdm, trange |
|
import time |
|
from time import sleep |
|
|
|
import seaborn as sns |
|
from matplotlib.pyplot import style |
|
from rich import print |
|
import warnings |
|
import PyPDF2 |
|
from openai import OpenAI |
|
client = OpenAI() |
|
warnings.filterwarnings('ignore') |
|
|
|
''' 以下加载本地知识的核心内容。''' |
|
|
|
|
|
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader |
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.embeddings.huggingface import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
|
|
load_dotenv() |
|
|
|
os.environ["OPENAI_API_KEY"] = os.environ['user_token'] |
|
openai.api_key = os.environ['user_token'] |
|
|
|
|
|
|
|
def langchain_localKB_construct(filepath, username): |
|
print('开始构建Langchain知识库...') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print('now filepath:', filepath.name) |
|
|
|
loader = PyPDFLoader(filepath.name) |
|
|
|
docs = loader.load() |
|
|
|
|
|
|
|
|
|
docs = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs) |
|
|
|
|
|
|
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
embeddings = OpenAIEmbeddings(disallowed_special=()) |
|
print('langchain embeddings:', embeddings) |
|
|
|
vector_store = FAISS.from_documents(docs, embeddings) |
|
|
|
vector_store.save_local(f'./{username}/faiss_index') |
|
|
|
return vector_store |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def langchain_RAG(prompt, username): |
|
|
|
embeddings = OpenAIEmbeddings(disallowed_special=()) |
|
vector_store = FAISS.load_local(f'./{username}/faiss_index', embeddings, allow_dangerous_deserialization=True) |
|
docs = vector_store.similarity_search(prompt, k=5) |
|
context = [doc.page_content for doc in docs] |
|
total_prompt = f"已知信息:\n{context}\n 根据这些已知信息来回答问题:\n{prompt}" |
|
|
|
|
|
return total_prompt, docs |
|
|
|
|