Spaces:
Runtime error
Runtime error
Commit
·
ca413dd
1
Parent(s):
ae359fc
Update app_utils.py
Browse files- app_utils.py +16 -9
app_utils.py
CHANGED
|
@@ -15,6 +15,8 @@ from langchain.document_loaders import DirectoryLoader #
|
|
| 15 |
from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
|
| 16 |
from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
|
| 17 |
#import streamlit as st
|
|
|
|
|
|
|
| 18 |
from tenacity import (
|
| 19 |
retry,
|
| 20 |
stop_after_attempt,
|
|
@@ -53,24 +55,29 @@ def initialize_knowledge_base():
|
|
| 53 |
|
| 54 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
| 55 |
docs = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
|
| 58 |
doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
|
| 59 |
|
| 60 |
# Embed each chunk of text
|
| 61 |
-
|
| 62 |
-
openAI_embeddings = OpenAIEmbeddings()
|
| 63 |
-
for doc in doc_texts:
|
| 64 |
-
|
| 65 |
#embedding = openAI_embeddings.embed_documents(text)
|
| 66 |
#embeddings.append(embedding)
|
| 67 |
-
|
| 68 |
-
|
| 69 |
|
| 70 |
-
vStore = np.concatenate(embeddings, axis=0)
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
conv_model = RetrievalQA.from_chain_type(
|
| 76 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|
|
|
|
| 15 |
from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
|
| 16 |
from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
|
| 17 |
#import streamlit as st
|
| 18 |
+
from langchain.indexes import VectorstoreIndexCreator #导入向量存储索引创建器
|
| 19 |
+
from langchain.vectorstores import DocArrayInMemorySearch #向量存储
|
| 20 |
from tenacity import (
|
| 21 |
retry,
|
| 22 |
stop_after_attempt,
|
|
|
|
| 55 |
|
| 56 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
| 57 |
docs = loader.load()
|
| 58 |
+
|
| 59 |
+
#index = VectorstoreIndexCreator(
|
| 60 |
+
# vectorstore_cls=DocArrayInMemorySearch
|
| 61 |
+
#).from_loaders([loader])
|
| 62 |
+
|
| 63 |
|
| 64 |
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
|
| 65 |
doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
|
| 66 |
|
| 67 |
# Embed each chunk of text
|
| 68 |
+
#embeddings = []
|
| 69 |
+
#openAI_embeddings = OpenAIEmbeddings()
|
| 70 |
+
#for doc in doc_texts:
|
| 71 |
+
# text = str(doc)
|
| 72 |
#embedding = openAI_embeddings.embed_documents(text)
|
| 73 |
#embeddings.append(embedding)
|
| 74 |
+
# embedding = embedding_from_string(text, "text-embedding-ada-002")
|
| 75 |
+
# embeddings.append(embedding)
|
| 76 |
|
| 77 |
+
#vStore = np.concatenate(embeddings, axis=0)
|
| 78 |
|
| 79 |
+
openAI_embeddings = OpenAIEmbeddings()
|
| 80 |
+
vStore = Chroma.from_documents(doc_texts, openAI_embeddings) #Chroma是一个类,用于存储向量,from_documents是一个方法,用于从文档中创建向量存储器,openAI_embeddings是一个类,用于将文本转换为向量
|
| 81 |
|
| 82 |
conv_model = RetrievalQA.from_chain_type(
|
| 83 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|