Spaces:

mdasad3617
/

lab-report-analyzer

Running

File size: 3,742 Bytes

ae7d660
717ad69
d2271c1
7be0cb3
d2271c1
 
 
 
 
 
0927160
 
 
d2271c1
ae7d660
7be0cb3
 
 
 
 
 
 
 
717ad69
 
 
 
 
 
ddb299c
d2271c1
 
ae7d660
717ad69
 
 
ae7d660
d2271c1
 
 
 
 
 
 
 
 
 
 
 
 
ae7d660
d2271c1
 
7be0cb3
 
 
 
 
 
d2271c1
 
 
0927160
d2271c1
7be0cb3
d2271c1
 
717ad69
d2271c1
7be0cb3
 
 
717ad69
7be0cb3
 
 
717ad69
d2271c1
 
 
 
 
 
 
 
717ad69
d2271c1
 
 
 
 
ae7d660
 
7be0cb3

import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import logging
from PyPDF2 import PdfReader

# Setup logging
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()
        ]
    )

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    pdf_reader = PdfReader(pdf_file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Function to summarize text using the specified model and tokenizer
def summarize_text(model, tokenizer, text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def main():
    setup_logging()
    logging.info("Starting the Streamlit app.")

    # Load the model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization")
    model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization")

    # Streamlit UI
    st.title("GenAI Lab Report Analyzer")
    st.write("Upload a file, record audio, or type text to generate a summary. Select the appropriate input type and provide the input.")

    input_type = st.radio(
        "Select Input Type:",
        options=["Text", "Text File", "PDF", "DOCX", "Audio"],
        index=0
    )

    file = None
    text = None
    audio = None

    if input_type == "Text":
        text = st.text_area("Enter your text here:", placeholder="Type your text here...")
    elif input_type == "Text File":
        file = st.file_uploader("Upload your text file:", type=["txt"])
    elif input_type == "PDF":
        file = st.file_uploader("Upload your PDF file:", type=["pdf"])
    elif input_type == "DOCX":
        file = st.file_uploader("Upload your DOCX file:", type=["docx"])
    elif input_type == "Audio":
        audio = st.file_uploader("Upload your audio file:", type=["wav", "mp3", "m4a"])

    if st.button("Report Result"):
        try:
            summary = None
            if input_type == "Text" and text:
                logging.info("Processing text input.")
                summary = summarize_text(model, tokenizer, text)
                logging.info("Text input processed successfully.")
            elif input_type == "Text File" and file:
                logging.info(f"Processing text file: {file.name}")
                text = file.read().decode("utf-8")  # Assuming UTF-8 encoding
                summary = summarize_text(model, tokenizer, text)
            elif input_type == "PDF" and file:
                logging.info(f"Processing PDF file: {file.name}")
                text = extract_text_from_pdf(file)
                summary = summarize_text(model, tokenizer, text)
            elif input_type == "Audio" and audio:
                logging.info("Processing audio input.")
                # Add audio processing logic here
                summary = "Audio processing not implemented yet."
            else:
                summary = "Invalid input. Please provide a valid file or text."
                logging.warning("Invalid input type provided.")

            st.text_area("Report Result:", summary, height=200)
        except Exception as e:
            logging.error(f"Error during summarization: {e}")
            st.error("An error occurred during summarization. Please check the logs for more details.")

    logging.info("Closing the Streamlit app.")

if __name__ == "__main__":
    main()