gaf7921 commited on
Commit
a3a02d3
Β·
1 Parent(s): 32e1913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -4
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
 
 
 
 
4
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS, Chroma
@@ -29,14 +33,34 @@ def get_pdf_text(pdf_docs):
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
- pass
33
-
 
 
 
 
 
34
 
35
  def get_csv_file(docs):
36
- pass
 
 
 
 
 
 
37
 
38
  def get_json_file(docs):
39
- pass
 
 
 
 
 
 
 
 
 
40
 
41
 
42
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
+ from langchain.document_loaders import DirectoryLoader
5
+ from langchain.document_loaders import TextLoader
6
+ from langchain.document_loaders.csv_loader import CSVLoader
7
+ from langchain.document_loaders import JSONLoader
8
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
9
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
10
  from langchain.vectorstores import FAISS, Chroma
 
33
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
34
 
35
  def get_text_file(docs):
36
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
37
+ temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
38
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
39
+ f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
40
+ loader = DirectoryLoader(temp_dir.name, glob=docs.name, loader_cls=TextLoader)
41
+ data = loader.load()
42
+ return date
43
 
44
  def get_csv_file(docs):
45
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
46
+ temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
47
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
48
+ f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
49
+ loader = CSVLoader(file_path=temp_filepath)
50
+ data = loader.load()
51
+ return date
52
 
53
  def get_json_file(docs):
54
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
55
+ temp_filepath = os.path.join(temp_dir.name, docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
56
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
57
+ f.write(docs.getvalue()) # PDF λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
58
+ loader = JSONLoader(
59
+ file_path=temp_filepath,
60
+ jq_schema='.messages[].content',
61
+ text_content=False)
62
+ data = loader.load()
63
+ return date
64
 
65
 
66
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.