Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

lab-report-analyzer / app.py

mdasad3617

Update app.py

717ad69 verified 6 months ago

raw

history blame

3.74 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import logging
	from PyPDF2 import PdfReader

	# Setup logging
	def setup_logging():
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler()
	]
	)

	# Function to extract text from a PDF file
	def extract_text_from_pdf(pdf_file):
	pdf_reader = PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# Function to summarize text using the specified model and tokenizer
	def summarize_text(model, tokenizer, text):
	inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
	outputs = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	def main():
	setup_logging()
	logging.info("Starting the Streamlit app.")

	# Load the model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Falconsai/medical_summarization")
	model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/medical_summarization")

	# Streamlit UI
	st.title("GenAI Lab Report Analyzer")
	st.write("Upload a file, record audio, or type text to generate a summary. Select the appropriate input type and provide the input.")

	input_type = st.radio(
	"Select Input Type:",
	options=["Text", "Text File", "PDF", "DOCX", "Audio"],
	index=0
	)

	file = None
	text = None
	audio = None

	if input_type == "Text":
	text = st.text_area("Enter your text here:", placeholder="Type your text here...")
	elif input_type == "Text File":
	file = st.file_uploader("Upload your text file:", type=["txt"])
	elif input_type == "PDF":
	file = st.file_uploader("Upload your PDF file:", type=["pdf"])
	elif input_type == "DOCX":
	file = st.file_uploader("Upload your DOCX file:", type=["docx"])
	elif input_type == "Audio":
	audio = st.file_uploader("Upload your audio file:", type=["wav", "mp3", "m4a"])

	if st.button("Report Result"):
	try:
	summary = None
	if input_type == "Text" and text:
	logging.info("Processing text input.")
	summary = summarize_text(model, tokenizer, text)
	logging.info("Text input processed successfully.")
	elif input_type == "Text File" and file:
	logging.info(f"Processing text file: {file.name}")
	text = file.read().decode("utf-8") # Assuming UTF-8 encoding
	summary = summarize_text(model, tokenizer, text)
	elif input_type == "PDF" and file:
	logging.info(f"Processing PDF file: {file.name}")
	text = extract_text_from_pdf(file)
	summary = summarize_text(model, tokenizer, text)
	elif input_type == "Audio" and audio:
	logging.info("Processing audio input.")
	# Add audio processing logic here
	summary = "Audio processing not implemented yet."
	else:
	summary = "Invalid input. Please provide a valid file or text."
	logging.warning("Invalid input type provided.")

	st.text_area("Report Result:", summary, height=200)
	except Exception as e:
	logging.error(f"Error during summarization: {e}")
	st.error("An error occurred during summarization. Please check the logs for more details.")

	logging.info("Closing the Streamlit app.")

	if __name__ == "__main__":
	main()