Spaces:
Sleeping
Sleeping
finish
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers # For loadin
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # ์์ ํ์ผ์ ์์ฑํ๊ธฐ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ์
๋๋ค.
|
15 |
import os
|
16 |
-
|
17 |
|
18 |
# PDF ๋ฌธ์๋ก๋ถํฐ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์์
๋๋ค.
|
19 |
def get_pdf_text(pdf_docs):
|
@@ -25,9 +25,6 @@ def get_pdf_text(pdf_docs):
|
|
25 |
pdf_doc = pdf_loader.load() # ํ
์คํธ๋ฅผ ์ถ์ถํฉ๋๋ค.
|
26 |
return pdf_doc # ์ถ์ถํ ํ
์คํธ๋ฅผ ๋ฐํํฉ๋๋ค.
|
27 |
|
28 |
-
# ๊ณผ์
|
29 |
-
# ์๋ ํ
์คํธ ์ถ์ถ ํจ์๋ฅผ ์์ฑ
|
30 |
-
|
31 |
|
32 |
def get_text_file(txt_docs):
|
33 |
temp_dir = tempfile.TemporaryDirectory()
|
@@ -78,7 +75,24 @@ def get_vectorstore(text_chunks):
|
|
78 |
# OpenAI ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๋ก๋ํฉ๋๋ค. (Embedding models - Ada v2)
|
79 |
|
80 |
embeddings = OpenAIEmbeddings()
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
return vectorstore # ์์ฑ๋ ๋ฒกํฐ ์คํ ์ด๋ฅผ ๋ฐํํฉ๋๋ค.
|
84 |
|
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # ์์ ํ์ผ์ ์์ฑํ๊ธฐ ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ์
๋๋ค.
|
15 |
import os
|
16 |
+
import numpy as np
|
17 |
|
18 |
# PDF ๋ฌธ์๋ก๋ถํฐ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ ํจ์์
๋๋ค.
|
19 |
def get_pdf_text(pdf_docs):
|
|
|
25 |
pdf_doc = pdf_loader.load() # ํ
์คํธ๋ฅผ ์ถ์ถํฉ๋๋ค.
|
26 |
return pdf_doc # ์ถ์ถํ ํ
์คํธ๋ฅผ ๋ฐํํฉ๋๋ค.
|
27 |
|
|
|
|
|
|
|
28 |
|
29 |
def get_text_file(txt_docs):
|
30 |
temp_dir = tempfile.TemporaryDirectory()
|
|
|
75 |
# OpenAI ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๋ก๋ํฉ๋๋ค. (Embedding models - Ada v2)
|
76 |
|
77 |
embeddings = OpenAIEmbeddings()
|
78 |
+
|
79 |
+
chunk_embeddings = []
|
80 |
+
for chunk in text_chunks:
|
81 |
+
chunk_embedding = embeddings.encode(chunk)
|
82 |
+
chunk_embeddings.append(chunk_embedding)
|
83 |
+
|
84 |
+
# FAISS์ ์ ํฉํ ํํ๋ก ๋ฒกํฐ๋ค์ ์ฌ๊ตฌ์ฑํฉ๋๋ค.
|
85 |
+
flat_embeddings = [emb for chunk in chunk_embeddings for emb in chunk]
|
86 |
+
|
87 |
+
# ์๋ฒ ๋ฉ ์ฐจ์ ์๋ฅผ ํ์ธํฉ๋๋ค.
|
88 |
+
num_dims = len(flat_embeddings[0]) if flat_embeddings else 0
|
89 |
+
|
90 |
+
# FAISS์ ์ ๋ฌํ ์ ์๋ ํํ๋ก ๋ฒกํฐ๋ค์ ์ฌ์ ๋ ฌํฉ๋๋ค.
|
91 |
+
vectors = np.array(flat_embeddings).astype('float32')
|
92 |
+
vectors = vectors.reshape(len(flat_embeddings), num_dims)
|
93 |
+
|
94 |
+
# FAISS ์ธ๋ฑ์ค๋ฅผ ์์ฑํฉ๋๋ค.
|
95 |
+
vectorstore = FAISS.from_numpy(vectors)
|
96 |
|
97 |
return vectorstore # ์์ฑ๋ ๋ฒกํฐ ์คํ ์ด๋ฅผ ๋ฐํํฉ๋๋ค.
|
98 |
|