Spaces:
Sleeping
Sleeping
File size: 3,005 Bytes
8b16906 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
import common
import os
import pickle
from llama_hub.file.cjk_pdf.base import CJKPDFReader
from llama_hub.file.pptx.base import PptxReader
from llama_hub.file.pandas_excel.base import PandasExcelReader
from llama_hub.file.docx.base import DocxReader
from llama_index import Document, SimpleDirectoryReader
from pathlib import Path
from log import logger
INDEX_NAME = os.environ["INDEX_NAME"]
PKL_NAME = os.environ["PKL_NAME"]
common.check_login()
if "file_uploader_key" not in st.session_state:
st.session_state["file_uploader_key"] = 0
st.title("📝 ImportAllFile")
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf", "xlsx", "docx", "pptx"),key=st.session_state["file_uploader_key"])
if st.button("import",use_container_width=True):
filepath = os.path.join('documents', os.path.basename( uploaded_file.name))
try:
with open(filepath, 'wb') as f:
f.write(uploaded_file.getvalue())
f.close()
loader=None
noextpath,extension = os.path.splitext(filepath)
logger.info(filepath)
document = Document()
if extension == ".txt" or extension ==".md":
logger.info("extension")
document = SimpleDirectoryReader(input_files=[filepath], filename_as_id=True).load_data()[0]
else:
logger.info("else")
if extension == ".pdf":
logger.info("CJKPDFReader")
loader = CJKPDFReader()
elif extension == ".pptx":
logger.info("PptxReader")
loader = PptxReader()
elif extension == ".xlsx":
logger.info("PandasExcelReader")
loader = PandasExcelReader(pandas_config={"header": 0})
elif extension == ".docx":
logger.info("DocxReader")
loader = DocxReader()
else:
logger.error("Can`t read file:" + uploaded_file.name)
document = loader.load_data(file=Path(filepath))[0]
document.metadata={'filename': os.path.basename(uploaded_file.name)}
st.session_state.stored_docs.append(uploaded_file.name)
logger.info(st.session_state.stored_docs)
st.session_state.index.insert(document=document)
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME)
os.remove(filepath)
common.setChatEngine()
with open(PKL_NAME, "wb") as f:
print("pickle")
pickle.dump(st.session_state.stored_docs, f)
st.session_state["file_uploader_key"] += 1
st.experimental_rerun()
except Exception as e:
# cleanup temp file
logger.error(e)
if filepath is not None and os.path.exists(filepath):
os.remove(filepath)
st.subheader("Import File List")
if "stored_docs" in st.session_state:
logger.info(st.session_state.stored_docs)
for docname in st.session_state.stored_docs:
st.write(docname)
|