DDingcheol commited on
Commit
ba2ba98
ยท
1 Parent(s): d32b31c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -13,6 +13,7 @@ import os
13
  from huggingface_hub import hf_hub_download # Hugging Face Hub์—์„œ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
14
  from transformers import pipeline
15
  from io import BytesIO
 
16
 
17
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
18
  def get_pdf_text(pdf_docs):
@@ -30,16 +31,34 @@ def get_pdf_text(pdf_docs):
30
  def get_text_file(docs):
31
  text_list = []
32
  for doc in docs:
33
- text = doc.decode('utf-8') # 'bytes' ๊ฐ์ฒด๋ฅผ 'utf-8'๋กœ ๋””์ฝ”๋”ฉํ•˜์—ฌ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
34
  text_list.append(text)
35
  return text_list
36
 
37
 
 
38
  def get_csv_file(docs):
39
- pass
 
 
 
 
 
40
 
41
  def get_json_file(docs):
42
- pass
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def get_text_chunks(documents):
45
  text_splitter = RecursiveCharacterTextSplitter(
 
13
  from huggingface_hub import hf_hub_download # Hugging Face Hub์—์„œ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
14
  from transformers import pipeline
15
  from io import BytesIO
16
+ import pandas as pd
17
 
18
  # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
19
  def get_pdf_text(pdf_docs):
 
31
  def get_text_file(docs):
32
  text_list = []
33
  for doc in docs:
34
+ text = doc.getvalue().decode('utf-8') # 'BytesIO' ๊ฐ์ฒด์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
35
  text_list.append(text)
36
  return text_list
37
 
38
 
39
+
40
  def get_csv_file(docs):
41
+ text_list = []
42
+ for doc in docs:
43
+ df = pd.read_csv(doc)
44
+ for column in df.columns:
45
+ text_list.extend(df[column].astype(str).tolist())
46
+ return text_list
47
 
48
  def get_json_file(docs):
49
+ text_list = []
50
+ for doc in docs:
51
+ json_data = doc.read().decode('utf-8')
52
+ data = json.loads(json_data)
53
+ if isinstance(data, dict):
54
+ text_list.extend(list(data.values()))
55
+ elif isinstance(data, list):
56
+ for item in data:
57
+ if isinstance(item, str):
58
+ text_list.append(item)
59
+ elif isinstance(item, dict):
60
+ text_list.extend(list(item.values()))
61
+ return text_list
62
 
63
  def get_text_chunks(documents):
64
  text_splitter = RecursiveCharacterTextSplitter(