Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

mdasad3617 commited on Nov 29, 2024

Commit

2e7c2af

verified ·

1 Parent(s): 717ad69

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -14

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import logging
 from PyPDF2 import PdfReader
@@ -21,19 +21,12 @@ def extract_text_from_pdf(pdf_file):
         text += page.extract_text()
     return text
-# Function to summarize text using the specified model and tokenizer
-def summarize_text(model, tokenizer, text):
-    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
-    outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def main():
     setup_logging()
     logging.info("Starting the Streamlit app.")
-    # Load the model and tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization")
-    model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization")
     # Streamlit UI
     st.title("GenAI Lab Report Analyzer")
@@ -65,16 +58,16 @@ def main():
             summary = None
             if input_type == "Text" and text:
                 logging.info("Processing text input.")
-                summary = summarize_text(model, tokenizer, text)
                 logging.info("Text input processed successfully.")
             elif input_type == "Text File" and file:
                 logging.info(f"Processing text file: {file.name}")
                 text = file.read().decode("utf-8")  # Assuming UTF-8 encoding
-                summary = summarize_text(model, tokenizer, text)
             elif input_type == "PDF" and file:
                 logging.info(f"Processing PDF file: {file.name}")
                 text = extract_text_from_pdf(file)
-                summary = summarize_text(model, tokenizer, text)
             elif input_type == "Audio" and audio:
                 logging.info("Processing audio input.")
                 # Add audio processing logic here
@@ -83,7 +76,7 @@ def main():
                 summary = "Invalid input. Please provide a valid file or text."
                 logging.warning("Invalid input type provided.")
-            st.text_area("Report Result:", summary, height=200)
         except Exception as e:
             logging.error(f"Error during summarization: {e}")
             st.error("An error occurred during summarization. Please check the logs for more details.")

 import streamlit as st
+from transformers import pipeline
 import logging
 from PyPDF2 import PdfReader
         text += page.extract_text()
     return text
 def main():
     setup_logging()
     logging.info("Starting the Streamlit app.")
+    # Initialize the summarization pipeline with the specified model
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
     # Streamlit UI
     st.title("GenAI Lab Report Analyzer")
             summary = None
             if input_type == "Text" and text:
                 logging.info("Processing text input.")
+                summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
                 logging.info("Text input processed successfully.")
             elif input_type == "Text File" and file:
                 logging.info(f"Processing text file: {file.name}")
                 text = file.read().decode("utf-8")  # Assuming UTF-8 encoding
+                summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
             elif input_type == "PDF" and file:
                 logging.info(f"Processing PDF file: {file.name}")
                 text = extract_text_from_pdf(file)
+                summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
             elif input_type == "Audio" and audio:
                 logging.info("Processing audio input.")
                 # Add audio processing logic here
                 summary = "Invalid input. Please provide a valid file or text."
                 logging.warning("Invalid input type provided.")
+            st.text_area("Report Result:", summary[0]['summary_text'] if isinstance(summary, list) else summary, height=200)
         except Exception as e:
             logging.error(f"Error during summarization: {e}")
             st.error("An error occurred during summarization. Please check the logs for more details.")