File size: 4,336 Bytes
7f334ec
8cf2395
 
7f334ec
4d72778
8cf2395
cc10da2
7f334ec
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d72778
8cf2395
4d72778
8cf2395
4d72778
 
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f334ec
 
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
from transformers import pipeline
import pdfplumber
from PIL import Image
import easyocr
from langdetect import detect

# Initialize Models
@st.cache_resource
def initialize_models():
    return {
        "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
        "sentiment_model": pipeline("sentiment-analysis"),
        "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
        "translation_model": {
            "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
            "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
            "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
        }
    }

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text()
    return text.strip()

# Extract text from Image using EasyOCR
def extract_text_from_image(image_file):
    reader = easyocr.Reader(['en'])  # Add more languages if needed
    image = Image.open(image_file)
    result = reader.readtext(image, detail=0)  # `detail=0` returns only the text
    return " ".join(result).strip()

# Check if content is a lab report
def is_lab_report(text, model):
    result = model(text, candidate_labels=["lab report", "not lab report"])
    return result["labels"][0] == "lab report"

# Analyze sentiment
def analyze_sentiment(text, sentiment_model):
    result = sentiment_model(text)[0]
    sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
    return sentiment, result["score"]

# Summarize content
def summarize_content(text, summarize_model):
    summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Translate content
def translate_content(text, translation_models):
    return {
        "English": text,
        "Hindi": translation_models["hi"](text)[0]["translation_text"],
        "Urdu": translation_models["ur"](text)[0]["translation_text"]
    }

# Streamlit App
def main():
    st.title("Lab Test Analyzer")

    models = initialize_models()

    uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])

    if uploaded_file:
        file_type = uploaded_file.name.split(".")[-1].lower()
        extracted_text = ""

        if file_type == "pdf":
            st.write("Processing PDF file...")
            extracted_text = extract_text_from_pdf(uploaded_file)
        elif file_type in ["png", "jpg", "jpeg"]:
            st.write("Processing Image file...")
            extracted_text = extract_text_from_image(uploaded_file)
        elif file_type == "txt":
            st.write("Processing Text file...")
            extracted_text = uploaded_file.read().decode("utf-8")
        else:
            st.error("Unsupported file type.")

        if extracted_text:
            st.subheader("Extracted Content")
            st.text_area("Extracted Text", extracted_text, height=200)

            # Check if it's a lab report
            if not is_lab_report(extracted_text, models["report_check_model"]):
                st.error("The uploaded file does not appear to be a lab report.")
                return

            st.success("The uploaded file is a valid lab report.")

            # Sentiment Analysis
            sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
            st.subheader("Sentiment Analysis")
            st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")

            # Summarization
            summary = summarize_content(extracted_text, models["summarize_model"])
            st.subheader("Summary")
            st.text_area("Summary", summary, height=150)

            # Translation
            translations = translate_content(summary, models["translation_model"])
            st.subheader("Translations")
            st.write("**English**: ", translations["English"])
            st.write("**Hindi**: ", translations["Hindi"])
            st.write("**Urdu**: ", translations["Urdu"])

        else:
            st.error("Could not extract text from the uploaded file.")

if __name__ == "__main__":
    main()