File size: 4,005 Bytes
728b290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# App
# Import required libraries
import os
import pandas as pd
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import requests
import json
from pyngrok import ngrok

# Set up Hugging Face API token

api_key = os.getenv("HF_API_KEY")        # Replace with your Hugging Face API token


# Load the CSV dataset
data = pd.read_csv('/content/genetic_diseases_dataset.csv')


# Initialize Sentence Transformer model for RAG-based retrieval
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Create embeddings for the entire dataset for retrieval
# data['embeddings'] = data['description'].apply(lambda x: retriever_model.encode(x))

# Drop unnecessary columns (Unnamed columns)
data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])

# Combine relevant columns into one combined description field
data['combined_description'] = (
    data['Symptoms'].fillna('') + " " +
    data['Severity Level'].fillna('') + " " +
    data['Risk Assessment'].fillna('') + " " +
    data['Treatment Options'].fillna('') + " " +
    data['Suggested Medical Tests'].fillna('') + " " +
    data['Minimum Values for Medical Tests'].fillna('') + " " +
    data['Emergency Treatment'].fillna('')
)

# Initialize the Sentence Transformer model for embeddings
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Function to safely generate embeddings for each row
def generate_embedding(description):
    if description:  # Check if the description is not empty or NaN
        return retriever_model.encode(description).tolist()  # Convert the numpy array to list
    else:
        return []

# Generate embeddings for the combined description
data['embeddings'] = data['combined_description'].apply(generate_embedding)

# Function to retrieve relevant information from CSV dataset based on user query
def get_relevant_info(query, top_k=3):
    query_embedding = retriever_model.encode(query)
    similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
    top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
    return data.iloc[top_indices]

# Function to generate response using Hugging Face Model API
def generate_response(input_text):
    api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
    headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"}
    payload = {"inputs": input_text}

    response = requests.post(api_url, headers=headers, json=payload)
    return json.loads(response.content.decode("utf-8"))[0]["generated_text"]

# Streamlit UI for the Chatbot
def main():
    st.title("Medical Report and Analysis Chatbot")
    st.sidebar.header("Upload Medical Report or Enter Query")

    # Text input for user queries
    user_query = st.sidebar.text_input("Type your question or query")

    # File uploader for medical report
    uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"])

    # Process the query if provided
    if user_query:
        st.write("### Query Response:")

        # Retrieve relevant information from dataset
        relevant_info = get_relevant_info(user_query)
        st.write("#### Relevant Medical Information:")
        for i, row in relevant_info.iterrows():
            st.write(f"- {row['description']}")

        # Generate a response from the Llama3-Med42-8B model
        response = generate_response(user_query)
        st.write("#### Model's Response:")
        st.write(response)

    # Process the uploaded file (if any)
    if uploaded_file:
        # Display analysis of the uploaded report file
        st.write("### Uploaded Report Analysis:")
        report_text = "Extracted report content here"  # Placeholder for file processing logic
        st.write(report_text)

if __name__ == "__main__":
  main()