Spaces:
Sleeping
Sleeping
finish
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers # For loadin
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
15 |
import os
|
16 |
-
|
17 |
|
18 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
19 |
def get_pdf_text(pdf_docs):
|
@@ -75,24 +75,7 @@ def get_vectorstore(text_chunks):
|
|
75 |
# OpenAI μλ² λ© λͺ¨λΈμ λ‘λν©λλ€. (Embedding models - Ada v2)
|
76 |
|
77 |
embeddings = OpenAIEmbeddings()
|
78 |
-
|
79 |
-
chunk_embeddings = []
|
80 |
-
for chunk in text_chunks:
|
81 |
-
chunk_embedding = embeddings.encode(chunk)
|
82 |
-
chunk_embeddings.append(chunk_embedding)
|
83 |
-
|
84 |
-
# FAISSμ μ ν©ν ννλ‘ λ²‘ν°λ€μ μ¬κ΅¬μ±ν©λλ€.
|
85 |
-
flat_embeddings = [emb for chunk in chunk_embeddings for emb in chunk]
|
86 |
-
|
87 |
-
# μλ² λ© μ°¨μ μλ₯Ό νμΈν©λλ€.
|
88 |
-
num_dims = len(flat_embeddings[0]) if flat_embeddings else 0
|
89 |
-
|
90 |
-
# FAISSμ μ λ¬ν μ μλ ννλ‘ λ²‘ν°λ€μ μ¬μ λ ¬ν©λλ€.
|
91 |
-
vectors = np.array(flat_embeddings).astype('float32')
|
92 |
-
vectors = vectors.reshape(len(flat_embeddings), num_dims)
|
93 |
-
|
94 |
-
# FAISS μΈλ±μ€λ₯Ό μμ±ν©λλ€.
|
95 |
-
vectorstore = FAISS.from_numpy(vectors)
|
96 |
|
97 |
return vectorstore # μμ±λ λ²‘ν° μ€ν μ΄λ₯Ό λ°νν©λλ€.
|
98 |
|
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
15 |
import os
|
16 |
+
|
17 |
|
18 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
19 |
def get_pdf_text(pdf_docs):
|
|
|
75 |
# OpenAI μλ² λ© λͺ¨λΈμ λ‘λν©λλ€. (Embedding models - Ada v2)
|
76 |
|
77 |
embeddings = OpenAIEmbeddings()
|
78 |
+
vectorstore = FAISS.from_documents(text_chunks, embeddings) # FAISS λ²‘ν° μ€ν μ΄λ₯Ό μμ±ν©λλ€.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
return vectorstore # μμ±λ λ²‘ν° μ€ν μ΄λ₯Ό λ°νν©λλ€.
|
81 |
|