DiamondYin commited on
Commit
ca413dd
·
1 Parent(s): ae359fc

Update app_utils.py

Browse files
Files changed (1) hide show
  1. app_utils.py +16 -9
app_utils.py CHANGED
@@ -15,6 +15,8 @@ from langchain.document_loaders import DirectoryLoader #
15
  from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
16
  from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
17
  #import streamlit as st
 
 
18
  from tenacity import (
19
  retry,
20
  stop_after_attempt,
@@ -53,24 +55,29 @@ def initialize_knowledge_base():
53
 
54
  loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
55
  docs = loader.load()
 
 
 
 
 
56
 
57
  char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
58
  doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
59
 
60
  # Embed each chunk of text
61
- embeddings = []
62
- openAI_embeddings = OpenAIEmbeddings()
63
- for doc in doc_texts:
64
- text = str(doc)
65
  #embedding = openAI_embeddings.embed_documents(text)
66
  #embeddings.append(embedding)
67
- embedding = embedding_from_string(text, "text-embedding-ada-002")
68
- embeddings.append(embedding)
69
 
70
- vStore = np.concatenate(embeddings, axis=0)
71
 
72
- #openAI_embeddings = OpenAIEmbeddings()
73
- #vStore = Chroma.from_documents(doc_texts, openAI_embeddings) #Chroma是一个类,用于存储向量,from_documents是一个方法,用于从文档中创建向量存储器,openAI_embeddings是一个类,用于将文本转换为向量
74
 
75
  conv_model = RetrievalQA.from_chain_type(
76
  llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
 
15
  from langchain.embeddings.openai import OpenAIEmbeddings # OpenAIGPTEmbeddings
16
  from langchain.text_splitter import CharacterTextSplitter # CharacterTextSplitter is a class in the langchain.text_splitter module that can be used to split text into chunks.
17
  #import streamlit as st
18
+ from langchain.indexes import VectorstoreIndexCreator #导入向量存储索引创建器
19
+ from langchain.vectorstores import DocArrayInMemorySearch #向量存储
20
  from tenacity import (
21
  retry,
22
  stop_after_attempt,
 
55
 
56
  loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
57
  docs = loader.load()
58
+
59
+ #index = VectorstoreIndexCreator(
60
+ # vectorstore_cls=DocArrayInMemorySearch
61
+ #).from_loaders([loader])
62
+
63
 
64
  char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
65
  doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
66
 
67
  # Embed each chunk of text
68
+ #embeddings = []
69
+ #openAI_embeddings = OpenAIEmbeddings()
70
+ #for doc in doc_texts:
71
+ # text = str(doc)
72
  #embedding = openAI_embeddings.embed_documents(text)
73
  #embeddings.append(embedding)
74
+ # embedding = embedding_from_string(text, "text-embedding-ada-002")
75
+ # embeddings.append(embedding)
76
 
77
+ #vStore = np.concatenate(embeddings, axis=0)
78
 
79
+ openAI_embeddings = OpenAIEmbeddings()
80
+ vStore = Chroma.from_documents(doc_texts, openAI_embeddings) #Chroma是一个类,用于存储向量,from_documents是一个方法,用于从文档中创建向量存储器,openAI_embeddings是一个类,用于将文本转换为向量
81
 
82
  conv_model = RetrievalQA.from_chain_type(
83
  llm=OpenAI(model_name="gpt-3.5-turbo-16k"),