Spaces:
Runtime error
Runtime error
DiamondYin
commited on
Commit
·
f71f3ee
1
Parent(s):
b185613
Update app_utils.py
Browse files- app_utils.py +14 -4
app_utils.py
CHANGED
@@ -6,6 +6,7 @@ import boto3 # AWS Polly
|
|
6 |
from pydub import AudioSegment # AudioSegment is a class in the pydub module that can be used to manipulate audio files.
|
7 |
from pydub.playback import play # play is a function in the pydub.playback module that can be used to play audio files.
|
8 |
import logging
|
|
|
9 |
|
10 |
from langchain import OpenAI
|
11 |
from langchain.chains import RetrievalQA # RetrievalQA is a class in the langchain.chains module that can be used to build a retrieval-based question answering system.
|
@@ -38,11 +39,20 @@ def initialize_knowledge_base():
|
|
38 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
39 |
docs = loader.load()
|
40 |
|
41 |
-
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
42 |
-
doc_texts = char_text_splitter.split_documents(docs)
|
43 |
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
conv_model = RetrievalQA.from_chain_type(
|
48 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|
|
|
6 |
from pydub import AudioSegment # AudioSegment is a class in the pydub module that can be used to manipulate audio files.
|
7 |
from pydub.playback import play # play is a function in the pydub.playback module that can be used to play audio files.
|
8 |
import logging
|
9 |
+
import numpy as np
|
10 |
|
11 |
from langchain import OpenAI
|
12 |
from langchain.chains import RetrievalQA # RetrievalQA is a class in the langchain.chains module that can be used to build a retrieval-based question answering system.
|
|
|
39 |
loader = DirectoryLoader('profiles', glob='**/*.txt') #文件夹加载器 profiles文件夹下的所有txt文件
|
40 |
docs = loader.load()
|
41 |
|
42 |
+
char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) #文本分割器 chunk_size=1000, chunk_overlap=0
|
43 |
+
doc_texts = char_text_splitter.split_documents(docs) #文档分割器,作用是将文档分割成小块
|
44 |
|
45 |
+
# Embed each chunk of text
|
46 |
+
embeddings = []
|
47 |
+
embedder = OpenAIEmbeddings()
|
48 |
+
for doc in doc_texts:
|
49 |
+
embedding = embedder.embed(doc)
|
50 |
+
embeddings.append(embedding)
|
51 |
+
|
52 |
+
vStore = np.concatenate(embeddings, axis=0)
|
53 |
+
|
54 |
+
#openAI_embeddings = OpenAIEmbeddings()
|
55 |
+
#vStore = Chroma.from_documents(doc_texts, openAI_embeddings) #Chroma是一个类,用于存储向量,from_documents是一个方法,用于从文档中创建向量存储器,openAI_embeddings是一个类,用于将文本转换为向量
|
56 |
|
57 |
conv_model = RetrievalQA.from_chain_type(
|
58 |
llm=OpenAI(model_name="gpt-3.5-turbo-16k"),
|