File size: 4,210 Bytes
7f334ec
8cf2395
 
7f334ec
8cf2395
 
cc10da2
7f334ec
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f334ec
 
8cf2395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from transformers import pipeline
import pdfplumber
from PIL import Image
import pytesseract
from langdetect import detect

# Initialize Models
@st.cache_resource
def initialize_models():
    return {
        "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
        "sentiment_model": pipeline("sentiment-analysis"),
        "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
        "translation_model": {
            "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
            "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
            "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
        }
    }

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text()
    return text.strip()

# Extract text from Image
def extract_text_from_image(image_file):
    image = Image.open(image_file)
    text = pytesseract.image_to_string(image)
    return text.strip()

# Check if content is a lab report
def is_lab_report(text, model):
    result = model(text, candidate_labels=["lab report", "not lab report"])
    return result["labels"][0] == "lab report"

# Analyze sentiment
def analyze_sentiment(text, sentiment_model):
    result = sentiment_model(text)[0]
    sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
    return sentiment, result["score"]

# Summarize content
def summarize_content(text, summarize_model):
    summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Translate content
def translate_content(text, translation_models):
    return {
        "English": text,
        "Hindi": translation_models["hi"](text)[0]["translation_text"],
        "Urdu": translation_models["ur"](text)[0]["translation_text"]
    }

# Streamlit App
def main():
    st.title("Lab Test Analyzer")

    models = initialize_models()

    uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])

    if uploaded_file:
        file_type = uploaded_file.name.split(".")[-1].lower()
        extracted_text = ""

        if file_type == "pdf":
            st.write("Processing PDF file...")
            extracted_text = extract_text_from_pdf(uploaded_file)
        elif file_type in ["png", "jpg", "jpeg"]:
            st.write("Processing Image file...")
            extracted_text = extract_text_from_image(uploaded_file)
        elif file_type == "txt":
            st.write("Processing Text file...")
            extracted_text = uploaded_file.read().decode("utf-8")
        else:
            st.error("Unsupported file type.")

        if extracted_text:
            st.subheader("Extracted Content")
            st.text_area("Extracted Text", extracted_text, height=200)

            # Check if it's a lab report
            if not is_lab_report(extracted_text, models["report_check_model"]):
                st.error("The uploaded file does not appear to be a lab report.")
                return

            st.success("The uploaded file is a valid lab report.")

            # Sentiment Analysis
            sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
            st.subheader("Sentiment Analysis")
            st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")

            # Summarization
            summary = summarize_content(extracted_text, models["summarize_model"])
            st.subheader("Summary")
            st.text_area("Summary", summary, height=150)

            # Translation
            translations = translate_content(summary, models["translation_model"])
            st.subheader("Translations")
            st.write("**English**: ", translations["English"])
            st.write("**Hindi**: ", translations["Hindi"])
            st.write("**Urdu**: ", translations["Urdu"])

        else:
            st.error("Could not extract text from the uploaded file.")

if __name__ == "__main__":
    main()