Spaces:
Sleeping
Sleeping
import streamlit as st | |
import common | |
import os | |
import pickle | |
from llama_hub.file.cjk_pdf.base import CJKPDFReader | |
from llama_hub.file.pptx.base import PptxReader | |
from llama_hub.file.pandas_excel.base import PandasExcelReader | |
from llama_hub.file.docx.base import DocxReader | |
from llama_index import Document, SimpleDirectoryReader | |
from pathlib import Path | |
from log import logger | |
INDEX_NAME = os.environ["INDEX_NAME"] | |
PKL_NAME = os.environ["PKL_NAME"] | |
common.check_login() | |
if "file_uploader_key" not in st.session_state: | |
st.session_state["file_uploader_key"] = 0 | |
st.title("📝 ImportAllFile") | |
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf", "xlsx", "docx", "pptx"),key=st.session_state["file_uploader_key"]) | |
if st.button("import",use_container_width=True): | |
filepath = os.path.join('documents', os.path.basename( uploaded_file.name)) | |
try: | |
with open(filepath, 'wb') as f: | |
f.write(uploaded_file.getvalue()) | |
f.close() | |
loader=None | |
noextpath,extension = os.path.splitext(filepath) | |
logger.info(filepath) | |
document = Document() | |
if extension == ".txt" or extension ==".md": | |
logger.info("extension") | |
document = SimpleDirectoryReader(input_files=[filepath], filename_as_id=True).load_data()[0] | |
else: | |
logger.info("else") | |
if extension == ".pdf": | |
logger.info("CJKPDFReader") | |
loader = CJKPDFReader() | |
elif extension == ".pptx": | |
logger.info("PptxReader") | |
loader = PptxReader() | |
elif extension == ".xlsx": | |
logger.info("PandasExcelReader") | |
loader = PandasExcelReader(pandas_config={"header": 0}) | |
elif extension == ".docx": | |
logger.info("DocxReader") | |
loader = DocxReader() | |
else: | |
logger.error("Can`t read file:" + uploaded_file.name) | |
document = loader.load_data(file=Path(filepath))[0] | |
document.metadata={'filename': os.path.basename(uploaded_file.name)} | |
st.session_state.stored_docs.append(uploaded_file.name) | |
logger.info(st.session_state.stored_docs) | |
st.session_state.index.insert(document=document) | |
st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME) | |
os.remove(filepath) | |
common.setChatEngine() | |
with open(PKL_NAME, "wb") as f: | |
print("pickle") | |
pickle.dump(st.session_state.stored_docs, f) | |
st.session_state["file_uploader_key"] += 1 | |
st.experimental_rerun() | |
except Exception as e: | |
# cleanup temp file | |
logger.error(e) | |
if filepath is not None and os.path.exists(filepath): | |
os.remove(filepath) | |
st.subheader("Import File List") | |
if "stored_docs" in st.session_state: | |
logger.info(st.session_state.stored_docs) | |
for docname in st.session_state.stored_docs: | |
st.write(docname) | |