Divrey-Yoel-RAG / validation_service_openai.py
sivan22's picture
Upload 16 files
7f683f9 verified
# validation_service_openai.py
# Works with LangSmith, OpenAI async, built for RAG validation
import os
import traceback
import openai
import asyncio
import json
from typing import Dict, Optional
from langsmith import traceable
# ----- ENVIRONMENT SETUP (Replit secret-based) -----
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"]
os.environ["LANGSMITH_API_KEY"] = os.environ["LANGSMITH_API_KEY"]
os.environ["LANGSMITH_PROJECT"] = os.environ["LANGSMITH_PROJECT"]
# ---------------------------------------------------
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
VALIDATION_MODEL = "gpt-4o"
# Initialize OpenAI Async Client
async_openai_client = None
if OPENAI_API_KEY:
try:
# (no need for wrap_openai here unless you want call-level traces)
async_openai_client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
print("OpenAI ASYNC client initialized for validator service.")
except Exception as e:
print(f"Error initializing OpenAI ASYNC client for validator: {e}")
traceback.print_exc()
else:
print("Warning: OPENAI_API_KEY not found. Validator service (GPT-4o) requires it.")
def check_openai_validator_status():
status = True
messages = []
if not OPENAI_API_KEY:
status = False
messages.append("OpenAI API Key missing.")
if not async_openai_client:
status = False
messages.append("OpenAI Async client (for Validator) initialization failed.")
if status and not messages:
messages.append(f"OpenAI Validator service ready (Model: {VALIDATION_MODEL}).")
return status, " ".join(messages)
@traceable
async def validate_paragraph_relevance_gpt4o(
paragraph_data: Dict,
user_question: str,
paragraph_index: int
) -> Optional[Dict]:
"""
Uses GPT-4o to validate if a SINGLE paragraph (HE+EN text) contains relevant info.
Args:
paragraph_data: A dictionary for the paragraph (needs 'hebrew_text', 'english_text').
user_question: The original user question in Hebrew.
paragraph_index: The index of this paragraph in the list being validated.
Returns:
A dictionary containing the validation result and original paragraph data.
Returns None if an error occurs during validation.
"""
global async_openai_client
if not async_openai_client:
print(f"Error (Paragraph {paragraph_index}): OpenAI async client not available.")
return None
if not paragraph_data:
return {
"validation": {
"contains_relevant_info": False,
"justification": "Input paragraph data was empty."
},
"paragraph_data": {}
}
hebrew_text = paragraph_data.get('hebrew_text', '').strip()
english_text = paragraph_data.get('english_text', '').strip()
if not hebrew_text and not english_text:
return {
"validation": {
"contains_relevant_info": False,
"justification": "Paragraph text is empty."
},
"paragraph_data": paragraph_data
}
prompt_content = f"""User Question (Hebrew):
"{user_question}"
Text Paragraph (Paragraph {paragraph_index+1}):
Hebrew:
---
{hebrew_text if hebrew_text else "(No Hebrew text provided)"}
---
English:
---
{english_text if english_text else "(No English text provided)"}
---
Instruction:
Analyze the Text Paragraph provided above (considering both Hebrew and English versions if available). Determine if any information within this specific paragraph directly answers, or provides significant relevant details contributing to an answer for, the User Question (which is in Hebrew).
Respond ONLY with a valid JSON object containing exactly two keys:
1. 'contains_relevant_info': A boolean value (`true` if relevant information is found, `false` otherwise).
2. 'justification': A brief, 1-sentence explanation (in Hebrew) for your decision, especially if 'true'.
Example valid JSON output:
{{ "contains_relevant_info": true, "justification": "הפסקה דנה ישירות בסיבת העיכוב בקריעת הים." }}
OR
{{ "contains_relevant_info": false, "justification": "הפסקה עוסקת בעניין אחר ואינה רלוונטית לשאלה." }}
Output only the JSON object, nothing else.
"""
try:
response = await async_openai_client.chat.completions.create(
model=VALIDATION_MODEL,
messages=[{"role": "user", "content": prompt_content}],
temperature=0.1,
max_tokens=150,
response_format={"type": "json_object"}
)
json_string = response.choices[0].message.content
try:
validation_result = json.loads(json_string)
if not isinstance(validation_result, dict) or \
'contains_relevant_info' not in validation_result or \
'justification' not in validation_result or \
not isinstance(validation_result['contains_relevant_info'], bool):
print(f"Error (Paragraph {paragraph_index+1}): Parsed JSON has incorrect structure: {validation_result}")
return None
return {
"validation": validation_result,
"paragraph_data": paragraph_data
}
except json.JSONDecodeError as json_err:
print(f"Error (Paragraph {paragraph_index+1}): Failed to decode JSON response: {json_err}. Response was: {json_string}")
return None
except Exception as parse_err:
print(f"Error (Paragraph {paragraph_index+1}): Unexpected error parsing validation structure: {parse_err}")
return None
except openai.APIError as e:
print(f"Error (Paragraph {paragraph_index+1}): OpenAI API Error during validation: {e}")
return None
except Exception as e:
print(f"Error (Paragraph {paragraph_index+1}): Unexpected error during GPT-4o validation API call: {e}")
traceback.print_exc()
return None