Spaces:
Sleeping
Sleeping
File size: 6,198 Bytes
5d99d8f 2bd6f5b 5d99d8f dd55ca1 5d99d8f dd55ca1 5d99d8f 2bd6f5b 5d99d8f f856ab7 5d99d8f dd55ca1 5d99d8f dd55ca1 5d99d8f 8ff45d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import os
import logging
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import MergerRetriever
# Load environment variables from .env file
load_dotenv()
# Retrieve the OpenAI API key from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def load_faiss_index(folder_path: str, model_name: str) -> Optional[FAISS]:
"""
Load a FAISS index with a specific embedding model.
Args:
folder_path: Path to the FAISS index folder
model_name: Name of the HuggingFace embedding model
Returns:
FAISS: Loaded FAISS index object
Raises:
ValueError: If the folder path doesn't exist
"""
try:
if not os.path.exists(folder_path):
raise ValueError(f"FAISS index folder not found: {folder_path}")
logger.info(f"Loading FAISS index from {folder_path}")
embeddings = HuggingFaceEmbeddings(model_name=model_name)
return FAISS.load_local(
folder_path=folder_path,
embeddings=embeddings,
allow_dangerous_deserialization=True
)
except Exception as e:
logger.error(f"Error loading FAISS index: {str(e)}")
raise
def generate_answer(query: str) -> str:
"""
Generate an answer for the given query using RAG.
Args:
query: User's question
Returns:
str: Generated answer
Raises:
ValueError: If query is empty or required files are missing
"""
try:
if not query or not query.strip():
raise ValueError("Query cannot be empty")
# Get the current directory and construct paths
current_dir = Path(__file__).parent
vectors_dir = current_dir / "vectors_data"
# Validate vectors directory exists
if not vectors_dir.exists():
raise ValueError(f"Vectors directory not found at {vectors_dir}")
# Load FAISS indices
logger.info("Loading FAISS indices...")
data_vec = load_faiss_index(
str(vectors_dir / "faiss_v4"),
"sentence-transformers/all-MiniLM-L12-v2"
)
# Create the LLM instance
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
openai_api_key=OPENAI_API_KEY
)
template = """You are a knowledgeable and approachable medical information assistant. Use the context provided to answer the medical question at the end. Follow these guidelines to ensure a clear, user-friendly, and professional response:
Important Guidelines:
1. **Clarity and Accessibility:**
- Write in simple, understandable language suitable for a general audience.
- Explain any technical terms briefly, if used.
2. **Structure:**
- Use clear paragraphs or bullet points for organization.
- Start with a concise summary of the issue before diving into details.
3. **Accuracy and Reliability:**
- Base your response strictly on the context provided.
- If you cannot provide an answer based on the context, state this honestly.
4. **Medical Safety and Disclaimers:**
- Include a disclaimer emphasizing the need to consult a healthcare professional for a personalized diagnosis or treatment plan.
5. **Treatment Information (if applicable):**
- Clearly outline treatment options, including:
- Drug name
- Drug class
- Dosage
- Frequency and duration
- Potential side effects
- Risks and additional recommendations
- Specify that these options are general and should be discussed with a healthcare provider.
6. **Encourage Engagement:**
- Invite users to ask clarifying questions or provide additional details for a more tailored response.
Context: {context}
Question: {question}
Medical Information Assistant:"""
QA_CHAIN_PROMPT = PromptTemplate(
input_variables=["context", "question"],
template=template
)
# Initialize and combine retrievers
logger.info("Setting up retrieval chain...")
data_retriever = data_vec.as_retriever()
# Initialize the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=data_retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
# Run the chain
logger.info("Generating answer...")
result = qa_chain.invoke({"query": query})
logger.info("Answer generated successfully")
# Extracting the relevant documents from the result
extracted_docs = result.get("source_documents", [])
logger.info(f"Extracted documents: {extracted_docs}") # Log the extracted documents
return result["result"]
except Exception as e:
logger.error(f"Error generating answer: {str(e)}")
raise
def main():
"""
Main function to demonstrate the usage of the RAG system.
"""
try:
# Example usage
query = "suggest me some medicine for bronchitis"
logger.info(f"Processing query: {query}")
response = generate_answer(query)
print("\nQuery:", query)
print("\nResponse:", response)
except Exception as e:
logger.error(f"Error in main function: {str(e)}")
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main() |