import streamlit as st import common import os import pickle from llama_hub.file.cjk_pdf.base import CJKPDFReader from llama_hub.file.pptx.base import PptxReader from llama_hub.file.pandas_excel.base import PandasExcelReader from llama_hub.file.docx.base import DocxReader from llama_index import Document, SimpleDirectoryReader from pathlib import Path from log import logger INDEX_NAME = os.environ["INDEX_NAME"] PKL_NAME = os.environ["PKL_NAME"] common.check_login() if "file_uploader_key" not in st.session_state: st.session_state["file_uploader_key"] = 0 st.title("📝 ImportAllFile") uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf", "xlsx", "docx", "pptx"),key=st.session_state["file_uploader_key"]) if st.button("import",use_container_width=True): filepath = os.path.join('documents', os.path.basename( uploaded_file.name)) try: with open(filepath, 'wb') as f: f.write(uploaded_file.getvalue()) f.close() loader=None noextpath,extension = os.path.splitext(filepath) logger.info(filepath) document = Document() if extension == ".txt" or extension ==".md": logger.info("extension") document = SimpleDirectoryReader(input_files=[filepath], filename_as_id=True).load_data()[0] else: logger.info("else") if extension == ".pdf": logger.info("CJKPDFReader") loader = CJKPDFReader() elif extension == ".pptx": logger.info("PptxReader") loader = PptxReader() elif extension == ".xlsx": logger.info("PandasExcelReader") loader = PandasExcelReader(pandas_config={"header": 0}) elif extension == ".docx": logger.info("DocxReader") loader = DocxReader() else: logger.error("Can`t read file:" + uploaded_file.name) document = loader.load_data(file=Path(filepath))[0] document.metadata={'filename': os.path.basename(uploaded_file.name)} st.session_state.stored_docs.append(uploaded_file.name) logger.info(st.session_state.stored_docs) st.session_state.index.insert(document=document) st.session_state.index.storage_context.persist(persist_dir=INDEX_NAME) os.remove(filepath) common.setChatEngine() with open(PKL_NAME, "wb") as f: print("pickle") pickle.dump(st.session_state.stored_docs, f) st.session_state["file_uploader_key"] += 1 st.experimental_rerun() except Exception as e: # cleanup temp file logger.error(e) if filepath is not None and os.path.exists(filepath): os.remove(filepath) st.subheader("Import File List") if "stored_docs" in st.session_state: logger.info(st.session_state.stored_docs) for docname in st.session_state.stored_docs: st.write(docname)