Spaces:
Runtime error
Runtime error
Commit
·
ca413dd
1
Parent(s):
ae359fc
Update app_utils.py
Browse files- app_utils.py +16 -9
app_utils.py
CHANGED
@@ -15,6 +15,8 @@ from langchain.document_loaders import DirectoryLoader #
|
|
15 |
from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
|
16 |
from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
|
17 |
#import streamlit as st
|
|
|
|
|
18 |
from tenacity import (
|
19 |
retry,
|
20 |
stop_after_attempt,
|
@@ -53,24 +55,29 @@ def initialize_knowledge_base():
|
|
53 |
|
54 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
55 |
docs = loader.load()
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
|
58 |
doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
|
59 |
|
60 |
# Embed each chunk of text
|
61 |
-
|
62 |
-
openAI_embeddings = OpenAIEmbeddings()
|
63 |
-
for doc in doc_texts:
|
64 |
-
|
65 |
#embedding = openAI_embeddings.embed_documents(text)
|
66 |
#embeddings.append(embedding)
|
67 |
-
|
68 |
-
|
69 |
|
70 |
-
vStore = np.concatenate(embeddings, axis=0)
|
71 |
|
72 |
-
|
73 |
-
|
74 |
|
75 |
conv_model = RetrievalQA.from_chain_type(
|
76 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|
|
|
15 |
from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
|
16 |
from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
|
17 |
#import streamlit as st
|
18 |
+
from langchain.indexes import VectorstoreIndexCreator #导入向量存储索引创建器
|
19 |
+
from langchain.vectorstores import DocArrayInMemorySearch #向量存储
|
20 |
from tenacity import (
|
21 |
retry,
|
22 |
stop_after_attempt,
|
|
|
55 |
|
56 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
57 |
docs = loader.load()
|
58 |
+
|
59 |
+
#index = VectorstoreIndexCreator(
|
60 |
+
# vectorstore_cls=DocArrayInMemorySearch
|
61 |
+
#).from_loaders([loader])
|
62 |
+
|
63 |
|
64 |
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
|
65 |
doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
|
66 |
|
67 |
# Embed each chunk of text
|
68 |
+
#embeddings = []
|
69 |
+
#openAI_embeddings = OpenAIEmbeddings()
|
70 |
+
#for doc in doc_texts:
|
71 |
+
# text = str(doc)
|
72 |
#embedding = openAI_embeddings.embed_documents(text)
|
73 |
#embeddings.append(embedding)
|
74 |
+
# embedding = embedding_from_string(text, "text-embedding-ada-002")
|
75 |
+
# embeddings.append(embedding)
|
76 |
|
77 |
+
#vStore = np.concatenate(embeddings, axis=0)
|
78 |
|
79 |
+
openAI_embeddings = OpenAIEmbeddings()
|
80 |
+
vStore = Chroma.from_documents(doc_texts, openAI_embeddings) #Chroma是一个类,用于存储向量,from_documents是一个方法,用于从文档中创建向量存储器,openAI_embeddings是一个类,用于将文本转换为向量
|
81 |
|
82 |
conv_model = RetrievalQA.from_chain_type(
|
83 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|