Spaces:
Running
Running
File size: 3,742 Bytes
ae7d660 717ad69 d2271c1 7be0cb3 d2271c1 0927160 d2271c1 ae7d660 7be0cb3 717ad69 ddb299c d2271c1 ae7d660 717ad69 ae7d660 d2271c1 ae7d660 d2271c1 7be0cb3 d2271c1 0927160 d2271c1 7be0cb3 d2271c1 717ad69 d2271c1 7be0cb3 717ad69 7be0cb3 717ad69 d2271c1 717ad69 d2271c1 ae7d660 7be0cb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import logging
from PyPDF2 import PdfReader
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
)
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to summarize text using the specified model and tokenizer
def summarize_text(model, tokenizer, text):
inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def main():
setup_logging()
logging.info("Starting the Streamlit app.")
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization")
# Streamlit UI
st.title("GenAI Lab Report Analyzer")
st.write("Upload a file, record audio, or type text to generate a summary. Select the appropriate input type and provide the input.")
input_type = st.radio(
"Select Input Type:",
options=["Text", "Text File", "PDF", "DOCX", "Audio"],
index=0
)
file = None
text = None
audio = None
if input_type == "Text":
text = st.text_area("Enter your text here:", placeholder="Type your text here...")
elif input_type == "Text File":
file = st.file_uploader("Upload your text file:", type=["txt"])
elif input_type == "PDF":
file = st.file_uploader("Upload your PDF file:", type=["pdf"])
elif input_type == "DOCX":
file = st.file_uploader("Upload your DOCX file:", type=["docx"])
elif input_type == "Audio":
audio = st.file_uploader("Upload your audio file:", type=["wav", "mp3", "m4a"])
if st.button("Report Result"):
try:
summary = None
if input_type == "Text" and text:
logging.info("Processing text input.")
summary = summarize_text(model, tokenizer, text)
logging.info("Text input processed successfully.")
elif input_type == "Text File" and file:
logging.info(f"Processing text file: {file.name}")
text = file.read().decode("utf-8") # Assuming UTF-8 encoding
summary = summarize_text(model, tokenizer, text)
elif input_type == "PDF" and file:
logging.info(f"Processing PDF file: {file.name}")
text = extract_text_from_pdf(file)
summary = summarize_text(model, tokenizer, text)
elif input_type == "Audio" and audio:
logging.info("Processing audio input.")
# Add audio processing logic here
summary = "Audio processing not implemented yet."
else:
summary = "Invalid input. Please provide a valid file or text."
logging.warning("Invalid input type provided.")
st.text_area("Report Result:", summary, height=200)
except Exception as e:
logging.error(f"Error during summarization: {e}")
st.error("An error occurred during summarization. Please check the logs for more details.")
logging.info("Closing the Streamlit app.")
if __name__ == "__main__":
main()
|