# app.py - LangSmith enabled, designed for Replit + Anthropic + OpenAI import os import streamlit as st import time import traceback import json import asyncio import nest_asyncio from typing import List, Dict from dotenv import load_dotenv load_dotenv() # ----- SETUP SECRETS AND ENV ----- # Hardcoded (safe): you never need these in secrets! os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com" os.environ["LANGSMITH_TRACING"] = "true" # The following must exist in your Replit secrets: # OPENAI_API_KEY, ANTHROPIC_API_KEY, LANGSMITH_API_KEY, LANGSMITH_PROJECT os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"] os.environ["ANTHROPIC_API_KEY"] = os.environ["ANTHROPIC_API_KEY"] os.environ["LANGSMITH_API_KEY"] = os.environ["LANGSMITH_API_KEY"] os.environ["LANGSMITH_PROJECT"] = os.environ["LANGSMITH_PROJECT"] # ---------------------------------- from langsmith import traceable nest_asyncio.apply() from retriever_pinecone import find_similar_paragraphs, check_retriever_status from analysis_service_anthropic import ( analyze_source_relevance_async, check_analyzer_status, ANALYSIS_MODEL as ANTHROPIC_ANALYSIS_MODEL, ) from generation_service_anthropic import ( generate_response_stream_async as generate_anthropic, check_generator_status as check_anthropic_generator, GENERATION_MODEL as ANTHROPIC_GENERATION_MODEL, ) from generation_service_gemini import ( generate_response_stream_gemini as generate_gemini, check_gemini_generator_status, GENERATION_MODEL as GEMINI_GENERATION_MODEL, ) from validation_service_openai import ( validate_paragraph_relevance_gpt4o, check_openai_validator_status, VALIDATION_MODEL as GPT4O_VALIDATION_MODEL, ) try: from generation_service_anthropic import format_context_for_prompt print("Format context function potentially available.") except ImportError: print("Warning: format_context_for_prompt not imported.") st.set_page_config(page_title="Divrey Yoel AI Chat", layout="wide") st.markdown( """""", unsafe_allow_html=True, ) st.markdown("

Divrey Yoel AI Chat

", unsafe_allow_html=True) st.markdown("

חיפוש בטקסטים חסידיים באמצעות RAG

", unsafe_allow_html=True) # --- Status Checks & Sidebar --- retriever_ready, retriever_msg = check_retriever_status() anthropic_analyzer_ready, anthropic_analyzer_msg = check_analyzer_status() anthropic_generator_ready, anthropic_generator_msg = check_anthropic_generator() gemini_generator_ready, gemini_generator_msg = check_gemini_generator_status() openai_validator_ready, openai_validator_msg = check_openai_validator_status() st.sidebar.markdown("

מצב המערכת

", unsafe_allow_html=True) st.sidebar.markdown( f"

מאחזר (Pinecone): {'✅' if retriever_ready else '❌'}

", unsafe_allow_html=True, ) if not retriever_ready: st.sidebar.markdown( f"", unsafe_allow_html=True ) st.markdown( "

שירות האחזור (Pinecone) אינו זמין. לא ניתן להמשיך.

", unsafe_allow_html=True, ) st.stop() st.sidebar.markdown("
", unsafe_allow_html=True) st.sidebar.markdown( f"

מנתח (Anthropic): {'✅ (נדרש לשיטת Anthropic)' if anthropic_analyzer_ready else '❌ (נדרש לשיטת Anthropic)'}

", unsafe_allow_html=True, ) st.sidebar.markdown( f"

מאמת (GPT-4o): {'✅ (נדרש לשיטת GPT-4o)' if openai_validator_ready else '❌ (נדרש לשיטת GPT-4o)'}

", unsafe_allow_html=True, ) st.sidebar.markdown( f"

מחולל (Anthropic): {'✅ (נדרש לשיטות Anthropic/GPT-4o)' if anthropic_generator_ready else '❌ (נדרש לשיטות Anthropic/GPT-4o)'}

", unsafe_allow_html=True, ) st.sidebar.markdown( f"

מחולל (Gemini): {'✅ (נדרש לשיטת Gemini)' if gemini_generator_ready else '❌ (נדרש לשיטת Gemini)'}

", unsafe_allow_html=True, ) st.sidebar.markdown("
", unsafe_allow_html=True) st.sidebar.markdown("

הגדרות RAG

", unsafe_allow_html=True) pipeline_method = st.sidebar.selectbox( "בחר שיטת עיבוד:", options=[ "Anthropic (ניתוח וסינון פרטני)", "Gemini (אחזור ויצירה ישירה)", "GPT-4o Paragraph Validator + Claude Synthesizer", ], index=2, ) is_anthropic_pipeline = pipeline_method == "Anthropic (ניתוח וסינון פרטני)" is_gemini_pipeline = pipeline_method == "Gemini (אחזור ויצירה ישירה)" is_gpt4o_para_pipeline = pipeline_method == "GPT-4o Paragraph Validator + Claude Synthesizer" n_retrieve = st.sidebar.slider( "מספר פסקאות לאחזור (Retrieve)", 1, 300, 100, help="כמה פסקאות לאחזר ראשונית (משותף לכל השיטות)." ) n_analyze = st.sidebar.slider( "מספר פסקאות לניתוח (Anthropic בלבד)", 1, min(n_retrieve, 50), min(21, n_retrieve, 50), help="כמה פסקאות יישלחו לניתוח רלוונטיות פרטני ע'י Claude.", disabled=not is_anthropic_pipeline ) relevance_thresh = st.sidebar.slider( "סף רלוונטיות (Anthropic בלבד)", 1, 10, 5, help="הציון המינימלי (1-10) שפסקה צריכה לקבל מ-Claude כדי להיחשב רלוונטית.", disabled=not is_anthropic_pipeline ) n_validate = st.sidebar.slider( "מספר פסקאות לאימות (GPT-4o בלבד)", 1, min(n_retrieve, 100), min(50, n_retrieve), help="כמה מהפסקאות שאוחזרו יישלחו לאימות רלוונטיות פרטני ע'י GPT-4o.", disabled=not is_gpt4o_para_pipeline ) n_final_context = st.sidebar.slider( "פסקאות מקסימום להקשר סופי (Gemini/Anthropic)", 1, n_retrieve, min(21, n_retrieve), help="Gemini/Anthropic: כמה מהפסקאות הטובות ביותר יישלחו ליצירה. GPT-4o: לא בשימוש ישיר (הקשר נקבע ע'י האימות).", disabled=is_gpt4o_para_pipeline ) services_ready = ( retriever_ready and ((anthropic_analyzer_ready and anthropic_generator_ready) if is_anthropic_pipeline else True) and (gemini_generator_ready if is_gemini_pipeline else True) and ((openai_validator_ready and anthropic_generator_ready) if is_gpt4o_para_pipeline else True) ) if not services_ready and retriever_ready: st.markdown( f"", unsafe_allow_html=True, ) @traceable def run_rag_pipeline(pipeline_prompt: str, selected_pipeline_method: str, status_container=None): is_anthropic_pipeline = selected_pipeline_method == "Anthropic (ניתוח וסינון פרטני)" is_gemini_pipeline = selected_pipeline_method == "Gemini (אחזור ויצירה ישירה)" is_gpt4o_para_pipeline = selected_pipeline_method == "GPT-4o Paragraph Validator + Claude Synthesizer" result = { "full_response": "", "final_docs_data": [], "status_updates": [], "error": None, "analysis_flow": selected_pipeline_method } current_status_label = "מתחיל עיבוד..." message_placeholder = st.empty() try: current_status_label = f"1. מאחזר עד {n_retrieve} פסקאות מ-Pinecone..." start_retrieval = time.time() if status_container: status_container.update(label=current_status_label) retrieved_docs = find_similar_paragraphs(query_text=pipeline_prompt, n_results=n_retrieve) retrieval_time = time.time() - start_retrieval status_msg = f"אוחזרו {len(retrieved_docs)} פסקאות ב-{retrieval_time:.2f} שניות." result["status_updates"].append(f"1. {status_msg}") current_status_label = f"1. {status_msg}" if status_container: status_container.update(label=current_status_label) if not retrieved_docs: result["full_response"] = "
לא אותרו מקורות רלוונטיים לשאילתה.
" if status_container: status_container.update(label="לא נמצאו מסמכים.", state="complete") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result docs_for_generator = [] generator_name = "" if is_anthropic_pipeline: generator_name = "Anthropic" analysis_count = min(len(retrieved_docs), n_analyze) current_status_label = f"2. [Anthropic] מנתח רלוונטיות פרטנית ({analysis_count} פסקאות)..." analysis_start_time = time.time() if status_container: status_container.update(label=current_status_label) async def run_anthropic_analysis(): docs_to_analyze_local = retrieved_docs[:analysis_count] tasks = [analyze_source_relevance_async(d.get('hebrew_text',''), '', pipeline_prompt) for d in docs_to_analyze_local] analysis_results = await asyncio.gather(*tasks, return_exceptions=True) return docs_to_analyze_local, analysis_results try: loop = asyncio.get_event_loop_policy().get_event_loop() if loop.is_running(): nest_asyncio.apply(); loop = asyncio.get_event_loop_policy().get_event_loop() docs_analyzed, analysis_raw_results = loop.run_until_complete(run_anthropic_analysis()) except Exception as loop_err: raise processed_for_filter = []; analysis_success_count = 0; analysis_fail_count = 0; for i, doc in enumerate(docs_analyzed): res = analysis_raw_results[i] if isinstance(res, dict) and 'relevance' in res: doc['analysis'] = res; processed_for_filter.append(doc); analysis_success_count += 1 elif isinstance(res, Exception): analysis_fail_count += 1; else: analysis_fail_count += 1; analysis_time = time.time() - analysis_start_time status_msg = f"ניתוח Anthropic פרטני הושלם ({analysis_success_count} הצלחות, {analysis_fail_count} כשלונות) ב-{analysis_time:.2f} שניות." result["status_updates"].append(f"2. {status_msg}") current_status_label = f"2. {status_msg}" if status_container: status_container.update(label=current_status_label) current_status_label = "3. [Anthropic] סינון לפי ציון רלוונטיות..." if status_container: status_container.update(label=current_status_label) filtered_docs = [] for doc in processed_for_filter: try: score = int(doc.get('analysis', {}).get('relevance', {}).get('relevance_score', '0')) doc['analysis']['relevance']['numeric_score'] = score if score >= relevance_thresh: filtered_docs.append(doc) except Exception as filter_err: pass filtered_docs.sort(key=lambda d: d.get('analysis',{}).get('relevance',{}).get('numeric_score', 0), reverse=True) docs_for_generator = filtered_docs[:n_final_context] status_msg = f"נבחרו {len(docs_for_generator)} פסקאות לאחר סינון Anthropic (סף: {relevance_thresh}, מקס': {n_final_context})." result["status_updates"].append(f"3. {status_msg}") current_status_label = f"3. {status_msg}" if status_container: status_container.update(label=current_status_label) if not docs_for_generator: result["full_response"] = "
לא נמצאו פסקאות רלוונטיות מספיק לאחר סינון Anthropic פרטני.
" if status_container: status_container.update(label="לא נמצאו פסקאות מסוננות.", state="complete") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result elif is_gemini_pipeline: generator_name = "Gemini" status_msg = "2. דילוג על שלב ניתוח/סינון (שיטת Gemini)."; result["status_updates"].append(status_msg) current_status_label = status_msg; if status_container: status_container.update(label=current_status_label) docs_for_generator = retrieved_docs[:n_final_context] status_msg = f"3. נבחרו {len(docs_for_generator)} פסקאות מובילות (לפי אחזור) להקשר עבור Gemini (מקס': {n_final_context})." result["status_updates"].append(status_msg) current_status_label = status_msg if status_container: status_container.update(label=current_status_label) if not docs_for_generator: result["full_response"] = "
לא אותרו מסמכים כלל (שגיאה פנימית).
" if status_container: status_container.update(label="שגיאה בבחירת הקשר.", state="error") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result elif is_gpt4o_para_pipeline: generator_name = "Anthropic" docs_to_validate = retrieved_docs[:n_validate] num_to_validate = len(docs_to_validate) if not docs_to_validate: result["full_response"] = "
שגיאה: אין מסמכים לאימות (לאחר אחזור).
" if status_container: status_container.update(label="שגיאה לפני אימות.", state="error") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result status_msg = f"2. נבחרו {num_to_validate} פסקאות מובילות לאימות פרטני (מתוך {len(retrieved_docs)})." result["status_updates"].append(status_msg) current_status_label = status_msg if status_container: status_container.update(label=current_status_label) current_status_label = f"3. [GPT-4o] מתחיל אימות מקבילי של {num_to_validate} פסקאות..." validation_start_time = time.time() if status_container: status_container.update(label=current_status_label) tasks = [validate_paragraph_relevance_gpt4o(doc, pipeline_prompt, i) for i, doc in enumerate(docs_to_validate)] validation_results = [] try: loop = asyncio.get_event_loop_policy().get_event_loop() if loop.is_running(): nest_asyncio.apply(); loop = asyncio.get_event_loop_policy().get_event_loop() validation_results = loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) except Exception as gather_err: result["error"] = f"שגיאה בביצוע האימות המקבילי: {gather_err}" result["full_response"] = f"
אירעה שגיאה קריטית בשלב אימות המידע.
"; if status_container: status_container.update(label="שגיאה באימות!", state="error") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result validation_time = time.time() - validation_start_time passed_count = 0; failed_count = 0; filtered_paragraphs = [] current_status_label = "4. [GPT-4o] סינון פסקאות לפי תוצאות האימות..." if status_container: status_container.update(label=current_status_label) for i, res in enumerate(validation_results): para_num = i + 1 if isinstance(res, Exception): failed_count += 1; elif isinstance(res, dict) and res.get("validation"): if res["validation"].get("contains_relevant_info") is True: passed_count += 1; filtered_paragraphs.append(res.get("paragraph_data", {})) else: failed_count += 1; filtered_paragraphs = [p for p in filtered_paragraphs if p] status_msg_val = f"אימות GPT-4o פרטני הושלם ({passed_count} עברו, {num_to_validate - passed_count - failed_count} נדחו, {failed_count} נכשלו) ב-{validation_time:.2f} שניות." result["status_updates"].append(f"3. {status_msg_val}") status_msg_filter = f"נאספו {len(filtered_paragraphs)} פסקאות רלוונטיות לאחר אימות." result["status_updates"].append(f"4. {status_msg_filter}") current_status_label = f"4. {status_msg_filter}" if status_container: status_container.update(label=current_status_label) if not filtered_paragraphs: result["full_response"] = "
לא נמצא מידע רלוונטי בפסקאות שנבדקו ע'י GPT-4o.
" if status_container: status_container.update(label="לא נמצא מידע רלוונטי.", state="complete") message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) return result docs_for_generator = filtered_paragraphs else: raise ValueError(f"שיטת עיבוד לא ידועה: {selected_pipeline_method}") current_status_label = f"5. מכין הקשר ({len(docs_for_generator)} פסקאות) ומחולל תשובה סופית ({generator_name})..." result["status_updates"].append(f"5. מכין הקשר ומחולל תשובה ({generator_name})...") if status_container: status_container.update(label=current_status_label) start_generation = time.time() final_response_text = "" generation_error_details = None result["final_docs_data"] = docs_for_generator try: if generator_name == "Gemini": generator_stream = generate_gemini(query=pipeline_prompt, context_documents=docs_for_generator) response_chunks = [] for chunk in generator_stream: if isinstance(chunk, str) and chunk.strip().startswith("--- שגיאה"): generation_error_details = chunk.strip() break response_chunks.append(str(chunk)) temp_stream_response = "".join(response_chunks) message_placeholder.markdown(f"
{temp_stream_response}▌
", unsafe_allow_html=True) if generation_error_details is None: final_response_text = "".join(response_chunks) elif generator_name == "Anthropic": async def consume_anthropic_stream(): history = [{"role": "user", "content": pipeline_prompt}] local_chunks = [] async for chunk in generate_anthropic(messages=history, context_documents=docs_for_generator): if isinstance(chunk, str) and chunk.strip().startswith("--- שגיאה"): raise RuntimeError(f"Error yielded from Anthropic generator: {chunk.strip()}") local_chunks.append(str(chunk)) temp_response = "".join(local_chunks) message_placeholder.markdown(f"
{temp_response}▌
", unsafe_allow_html=True) return "".join(local_chunks) try: loop = asyncio.get_event_loop_policy().get_event_loop() if loop.is_running(): nest_asyncio.apply(); loop = asyncio.get_event_loop_policy().get_event_loop() final_response_text = loop.run_until_complete(consume_anthropic_stream()) except Exception as consume_err: generation_error_details = f"{type(consume_err).__name__}: {str(consume_err)}" else: raise RuntimeError(f"Generator name '{generator_name}' not recognized.") except Exception as gen_err: generation_error_details = f"{type(gen_err).__name__}: {str(gen_err)}" generation_time = time.time() - start_generation if generation_error_details: result["error"] = f"שגיאה במהלך יצירת התשובה ({generator_name}): {generation_error_details}" result["full_response"] = f"
שגיאה ביצירת התשובה.
פרטים: {generation_error_details}
" message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) else: lines_to_remove = ["יהי רצון שנזכה לגאולה השלמה במהרה בימינו אמן.", "יהי רצון שנזכה...", "הכותב וחותם לכבוד התורה ולומדיה", "הכותב וחותם לכבוד התורה...", "בכבוד רב,", "בברכה,"] response_lines = final_response_text.strip().split('\n'); cleaned_lines = response_lines[:] while cleaned_lines: last_line = cleaned_lines[-1].strip() if any(last_line.lower() == ltr.lower() or last_line.lower().startswith(ltr.lower().replace('...','')) for ltr in lines_to_remove): cleaned_lines.pop() else: break final_response_text = "\n".join(cleaned_lines).strip() result["full_response"] = final_response_text message_placeholder.markdown(f"
{final_response_text}
", unsafe_allow_html=True) except Exception as e: pipeline_error_type = type(e).__name__; pipeline_error_msg = str(e) result["error"] = f"שגיאה בזמן הריצה: {pipeline_error_type}: {pipeline_error_msg}" result["full_response"] = f"
שגיאה במהלך העיבוד ({pipeline_error_type})
אנא נסה שוב מאוחר יותר.
פרטים טכניים
{traceback.format_exc()}
" message_placeholder.markdown(result["full_response"], unsafe_allow_html=True) if status_container: status_container.update(label="שגיאה בעיבוד!", state="error") return result if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): content_display = message['content'] if not content_display.strip().startswith(('{content_display}" st.markdown(content_display, unsafe_allow_html=True) if message["role"] == "assistant" and "final_docs" in message and message["final_docs"]: final_docs_data = message.get("final_docs", []) pipeline_flow_used = message.get("analysis_flow", "לא ידוע") if final_docs_data: st.expander("מסמכים שנמצאו", expanded=False).write(final_docs_data) expander_title_text = f"הצג {len(final_docs_data)} פסקאות מקור שנשלחו למחולל" if pipeline_flow_used == "Anthropic (ניתוח וסינון פרטני)": expander_title_text += " (לאחר סינון Anthropic פרטני)" elif pipeline_flow_used == "Gemini (אחזור ויצירה ישירה)": expander_title_text += " (ללא סינון נוסף)" elif pipeline_flow_used == "GPT-4o Paragraph Validator + Claude Synthesizer": expander_title_text += " (לאחר אימות GPT-4o פרטני)" else: expander_title_text += " (לאחר עיבוד)" expander_title = f"{expander_title_text}" with st.expander(expander_title, expanded=False): st.markdown("
", unsafe_allow_html=True) for i, doc in enumerate(final_docs_data): score_info = "" source_name = doc.get('source_name', 'לא ידוע') original_id = doc.get('original_id', 'N/A') hebrew_text = doc.get('hebrew_text', 'טקסט המקור חסר') st.markdown( f"
מקור {i+1}: ספר: {source_name}, ID: {original_id}{score_info}
", unsafe_allow_html=True, ) st.markdown(f"
{hebrew_text}
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) if prompt := st.chat_input("שאל שאלה בענייני חסידות...", disabled=not services_ready, key="chat_input"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(f"
{prompt}
", unsafe_allow_html=True) with st.chat_message("assistant"): status_control_asst = None rag_result_asst = None try: status_label = f"מעבד בקשה באמצעות '{pipeline_method}'..." with st.status(status_label, expanded=True) as status: status_control_asst = status rag_result_asst = run_rag_pipeline( pipeline_prompt=prompt, selected_pipeline_method=pipeline_method, status_container=status_control_asst, ) if rag_result_asst and isinstance(rag_result_asst, dict): pipeline_error_value = rag_result_asst.get("error") final_docs_value = rag_result_asst.get("final_docs_data", []) final_docs_to_store = [] if pipeline_error_value is None: final_docs_to_store = final_docs_value flow_to_store = rag_result_asst.get("analysis_flow", "Error") if pipeline_error_value is not None: flow_to_store = "Error" st.session_state.messages.append({ "role": "assistant", "content": rag_result_asst.get("full_response", "..."), "final_docs": final_docs_to_store, "analysis_flow": flow_to_store, }) if rag_result_asst.get("status_updates"): expander_label = "הצג שלבי עיבוד" with st.expander(expander_label, expanded=False): for update in rag_result_asst["status_updates"]: st.markdown(f"
- {update}
", unsafe_allow_html=True) else: fallback_err_msg_html = "
שגיאה בלתי צפויה בתקשורת עם מנגנון העיבוד (fallback).
" st.session_state.messages.append({ "role": "assistant", "content": fallback_err_msg_html, "final_docs": [], "analysis_flow": "Error", }) except Exception as e: error_display_html = f"
שגיאה קריטית!
{traceback.format_exc()}
" st.error(error_display_html, icon="🔥") st.session_state.messages.append({ "role": "assistant", "content": error_display_html, "final_docs": [], "analysis_flow": "Critical Error", })