Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import pdfplumber | |
from PIL import Image | |
import easyocr | |
from langdetect import detect | |
# Initialize Models | |
def initialize_models(): | |
return { | |
"report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"), | |
"sentiment_model": pipeline("sentiment-analysis"), | |
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"), | |
"translation_model": { | |
"en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"), | |
"hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"), | |
"ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") | |
} | |
} | |
# Extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
with pdfplumber.open(pdf_file) as pdf: | |
for page in pdf.pages: | |
text += page.extract_text() | |
return text.strip() | |
# Extract text from Image using EasyOCR | |
def extract_text_from_image(image_file): | |
reader = easyocr.Reader(['en']) # Add more languages if needed | |
image = Image.open(image_file) | |
result = reader.readtext(image, detail=0) # `detail=0` returns only the text | |
return " ".join(result).strip() | |
# Check if content is a lab report | |
def is_lab_report(text, model): | |
result = model(text, candidate_labels=["lab report", "not lab report"]) | |
return result["labels"][0] == "lab report" | |
# Analyze sentiment | |
def analyze_sentiment(text, sentiment_model): | |
result = sentiment_model(text)[0] | |
sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative" | |
return sentiment, result["score"] | |
# Summarize content | |
def summarize_content(text, summarize_model): | |
summary = summarize_model(text, max_length=130, min_length=30, do_sample=False) | |
return summary[0]['summary_text'] | |
# Translate content | |
def translate_content(text, translation_models): | |
return { | |
"English": text, | |
"Hindi": translation_models["hi"](text)[0]["translation_text"], | |
"Urdu": translation_models["ur"](text)[0]["translation_text"] | |
} | |
# Streamlit App | |
def main(): | |
st.title("Lab Test Analyzer") | |
models = initialize_models() | |
uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"]) | |
if uploaded_file: | |
file_type = uploaded_file.name.split(".")[-1].lower() | |
extracted_text = "" | |
if file_type == "pdf": | |
st.write("Processing PDF file...") | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
elif file_type in ["png", "jpg", "jpeg"]: | |
st.write("Processing Image file...") | |
extracted_text = extract_text_from_image(uploaded_file) | |
elif file_type == "txt": | |
st.write("Processing Text file...") | |
extracted_text = uploaded_file.read().decode("utf-8") | |
else: | |
st.error("Unsupported file type.") | |
if extracted_text: | |
st.subheader("Extracted Content") | |
st.text_area("Extracted Text", extracted_text, height=200) | |
# Check if it's a lab report | |
if not is_lab_report(extracted_text, models["report_check_model"]): | |
st.error("The uploaded file does not appear to be a lab report.") | |
return | |
st.success("The uploaded file is a valid lab report.") | |
# Sentiment Analysis | |
sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"]) | |
st.subheader("Sentiment Analysis") | |
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})") | |
# Summarization | |
summary = summarize_content(extracted_text, models["summarize_model"]) | |
st.subheader("Summary") | |
st.text_area("Summary", summary, height=150) | |
# Translation | |
translations = translate_content(summary, models["translation_model"]) | |
st.subheader("Translations") | |
st.write("**English**: ", translations["English"]) | |
st.write("**Hindi**: ", translations["Hindi"]) | |
st.write("**Urdu**: ", translations["Urdu"]) | |
else: | |
st.error("Could not extract text from the uploaded file.") | |
if __name__ == "__main__": | |
main() | |