Spaces:
Sleeping
Sleeping
# validation_service_openai.py | |
# Works with LangSmith, OpenAI async, built for RAG validation | |
import os | |
import traceback | |
import openai | |
import asyncio | |
import json | |
from typing import Dict, Optional | |
from langsmith import traceable | |
# ----- ENVIRONMENT SETUP (Replit secret-based) ----- | |
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com" | |
os.environ["LANGSMITH_TRACING"] = "true" | |
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"] | |
os.environ["LANGSMITH_API_KEY"] = os.environ["LANGSMITH_API_KEY"] | |
os.environ["LANGSMITH_PROJECT"] = os.environ["LANGSMITH_PROJECT"] | |
# --------------------------------------------------- | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
VALIDATION_MODEL = "gpt-4o" | |
# Initialize OpenAI Async Client | |
async_openai_client = None | |
if OPENAI_API_KEY: | |
try: | |
# (no need for wrap_openai here unless you want call-level traces) | |
async_openai_client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY) | |
print("OpenAI ASYNC client initialized for validator service.") | |
except Exception as e: | |
print(f"Error initializing OpenAI ASYNC client for validator: {e}") | |
traceback.print_exc() | |
else: | |
print("Warning: OPENAI_API_KEY not found. Validator service (GPT-4o) requires it.") | |
def check_openai_validator_status(): | |
status = True | |
messages = [] | |
if not OPENAI_API_KEY: | |
status = False | |
messages.append("OpenAI API Key missing.") | |
if not async_openai_client: | |
status = False | |
messages.append("OpenAI Async client (for Validator) initialization failed.") | |
if status and not messages: | |
messages.append(f"OpenAI Validator service ready (Model: {VALIDATION_MODEL}).") | |
return status, " ".join(messages) | |
async def validate_paragraph_relevance_gpt4o( | |
paragraph_data: Dict, | |
user_question: str, | |
paragraph_index: int | |
) -> Optional[Dict]: | |
""" | |
Uses GPT-4o to validate if a SINGLE paragraph (HE+EN text) contains relevant info. | |
Args: | |
paragraph_data: A dictionary for the paragraph (needs 'hebrew_text', 'english_text'). | |
user_question: The original user question in Hebrew. | |
paragraph_index: The index of this paragraph in the list being validated. | |
Returns: | |
A dictionary containing the validation result and original paragraph data. | |
Returns None if an error occurs during validation. | |
""" | |
global async_openai_client | |
if not async_openai_client: | |
print(f"Error (Paragraph {paragraph_index}): OpenAI async client not available.") | |
return None | |
if not paragraph_data: | |
return { | |
"validation": { | |
"contains_relevant_info": False, | |
"justification": "Input paragraph data was empty." | |
}, | |
"paragraph_data": {} | |
} | |
hebrew_text = paragraph_data.get('hebrew_text', '').strip() | |
english_text = paragraph_data.get('english_text', '').strip() | |
if not hebrew_text and not english_text: | |
return { | |
"validation": { | |
"contains_relevant_info": False, | |
"justification": "Paragraph text is empty." | |
}, | |
"paragraph_data": paragraph_data | |
} | |
prompt_content = f"""User Question (Hebrew): | |
"{user_question}" | |
Text Paragraph (Paragraph {paragraph_index+1}): | |
Hebrew: | |
--- | |
{hebrew_text if hebrew_text else "(No Hebrew text provided)"} | |
--- | |
English: | |
--- | |
{english_text if english_text else "(No English text provided)"} | |
--- | |
Instruction: | |
Analyze the Text Paragraph provided above (considering both Hebrew and English versions if available). Determine if any information within this specific paragraph directly answers, or provides significant relevant details contributing to an answer for, the User Question (which is in Hebrew). | |
Respond ONLY with a valid JSON object containing exactly two keys: | |
1. 'contains_relevant_info': A boolean value (`true` if relevant information is found, `false` otherwise). | |
2. 'justification': A brief, 1-sentence explanation (in Hebrew) for your decision, especially if 'true'. | |
Example valid JSON output: | |
{{ "contains_relevant_info": true, "justification": "הפסקה דנה ישירות בסיבת העיכוב בקריעת הים." }} | |
OR | |
{{ "contains_relevant_info": false, "justification": "הפסקה עוסקת בעניין אחר ואינה רלוונטית לשאלה." }} | |
Output only the JSON object, nothing else. | |
""" | |
try: | |
response = await async_openai_client.chat.completions.create( | |
model=VALIDATION_MODEL, | |
messages=[{"role": "user", "content": prompt_content}], | |
temperature=0.1, | |
max_tokens=150, | |
response_format={"type": "json_object"} | |
) | |
json_string = response.choices[0].message.content | |
try: | |
validation_result = json.loads(json_string) | |
if not isinstance(validation_result, dict) or \ | |
'contains_relevant_info' not in validation_result or \ | |
'justification' not in validation_result or \ | |
not isinstance(validation_result['contains_relevant_info'], bool): | |
print(f"Error (Paragraph {paragraph_index+1}): Parsed JSON has incorrect structure: {validation_result}") | |
return None | |
return { | |
"validation": validation_result, | |
"paragraph_data": paragraph_data | |
} | |
except json.JSONDecodeError as json_err: | |
print(f"Error (Paragraph {paragraph_index+1}): Failed to decode JSON response: {json_err}. Response was: {json_string}") | |
return None | |
except Exception as parse_err: | |
print(f"Error (Paragraph {paragraph_index+1}): Unexpected error parsing validation structure: {parse_err}") | |
return None | |
except openai.APIError as e: | |
print(f"Error (Paragraph {paragraph_index+1}): OpenAI API Error during validation: {e}") | |
return None | |
except Exception as e: | |
print(f"Error (Paragraph {paragraph_index+1}): Unexpected error during GPT-4o validation API call: {e}") | |
traceback.print_exc() | |
return None |