mdasad3617's picture
Update app.py
717ad69 verified
raw
history blame
3.74 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import logging
from PyPDF2 import PdfReader
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
)
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to summarize text using the specified model and tokenizer
def summarize_text(model, tokenizer, text):
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def main():
setup_logging()
logging.info("Starting the Streamlit app.")
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization")
# Streamlit UI
st.title("GenAI Lab Report Analyzer")
st.write("Upload a file, record audio, or type text to generate a summary. Select the appropriate input type and provide the input.")
input_type = st.radio(
"Select Input Type:",
options=["Text", "Text File", "PDF", "DOCX", "Audio"],
index=0
)
file = None
text = None
audio = None
if input_type == "Text":
text = st.text_area("Enter your text here:", placeholder="Type your text here...")
elif input_type == "Text File":
file = st.file_uploader("Upload your text file:", type=["txt"])
elif input_type == "PDF":
file = st.file_uploader("Upload your PDF file:", type=["pdf"])
elif input_type == "DOCX":
file = st.file_uploader("Upload your DOCX file:", type=["docx"])
elif input_type == "Audio":
audio = st.file_uploader("Upload your audio file:", type=["wav", "mp3", "m4a"])
if st.button("Report Result"):
try:
summary = None
if input_type == "Text" and text:
logging.info("Processing text input.")
summary = summarize_text(model, tokenizer, text)
logging.info("Text input processed successfully.")
elif input_type == "Text File" and file:
logging.info(f"Processing text file: {file.name}")
text = file.read().decode("utf-8") # Assuming UTF-8 encoding
summary = summarize_text(model, tokenizer, text)
elif input_type == "PDF" and file:
logging.info(f"Processing PDF file: {file.name}")
text = extract_text_from_pdf(file)
summary = summarize_text(model, tokenizer, text)
elif input_type == "Audio" and audio:
logging.info("Processing audio input.")
# Add audio processing logic here
summary = "Audio processing not implemented yet."
else:
summary = "Invalid input. Please provide a valid file or text."
logging.warning("Invalid input type provided.")
st.text_area("Report Result:", summary, height=200)
except Exception as e:
logging.error(f"Error during summarization: {e}")
st.error("An error occurred during summarization. Please check the logs for more details.")
logging.info("Closing the Streamlit app.")
if __name__ == "__main__":
main()