Spaces:
Running
Running
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import logging | |
from PyPDF2 import PdfReader | |
# Setup logging | |
def setup_logging(): | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.StreamHandler() | |
] | |
) | |
# Function to extract text from a PDF file | |
def extract_text_from_pdf(pdf_file): | |
pdf_reader = PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to summarize text using the specified model and tokenizer | |
def summarize_text(model, tokenizer, text): | |
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True) | |
outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
def main(): | |
setup_logging() | |
logging.info("Starting the Streamlit app.") | |
# Load the model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization") | |
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization") | |
# Streamlit UI | |
st.title("GenAI Lab Report Analyzer") | |
st.write("Upload a file, record audio, or type text to generate a summary. Select the appropriate input type and provide the input.") | |
input_type = st.radio( | |
"Select Input Type:", | |
options=["Text", "Text File", "PDF", "DOCX", "Audio"], | |
index=0 | |
) | |
file = None | |
text = None | |
audio = None | |
if input_type == "Text": | |
text = st.text_area("Enter your text here:", placeholder="Type your text here...") | |
elif input_type == "Text File": | |
file = st.file_uploader("Upload your text file:", type=["txt"]) | |
elif input_type == "PDF": | |
file = st.file_uploader("Upload your PDF file:", type=["pdf"]) | |
elif input_type == "DOCX": | |
file = st.file_uploader("Upload your DOCX file:", type=["docx"]) | |
elif input_type == "Audio": | |
audio = st.file_uploader("Upload your audio file:", type=["wav", "mp3", "m4a"]) | |
if st.button("Report Result"): | |
try: | |
summary = None | |
if input_type == "Text" and text: | |
logging.info("Processing text input.") | |
summary = summarize_text(model, tokenizer, text) | |
logging.info("Text input processed successfully.") | |
elif input_type == "Text File" and file: | |
logging.info(f"Processing text file: {file.name}") | |
text = file.read().decode("utf-8") # Assuming UTF-8 encoding | |
summary = summarize_text(model, tokenizer, text) | |
elif input_type == "PDF" and file: | |
logging.info(f"Processing PDF file: {file.name}") | |
text = extract_text_from_pdf(file) | |
summary = summarize_text(model, tokenizer, text) | |
elif input_type == "Audio" and audio: | |
logging.info("Processing audio input.") | |
# Add audio processing logic here | |
summary = "Audio processing not implemented yet." | |
else: | |
summary = "Invalid input. Please provide a valid file or text." | |
logging.warning("Invalid input type provided.") | |
st.text_area("Report Result:", summary, height=200) | |
except Exception as e: | |
logging.error(f"Error during summarization: {e}") | |
st.error("An error occurred during summarization. Please check the logs for more details.") | |
logging.info("Closing the Streamlit app.") | |
if __name__ == "__main__": | |
main() | |