import sys import os from pathlib import Path import subprocess import logging project_root = Path(__file__).parent.parent.parent if str(project_root) not in sys.path: sys.path.append(str(project_root)) from course_search.search_system.data_pipeline import DataPipeline from course_search.search_system.rag_system import RAGSystem # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def setup_paths(): """Setup necessary paths and directories""" project_root = Path(__file__).parent.parent.parent if str(project_root) not in sys.path: sys.path.append(str(project_root)) data_dir = project_root / 'data' data_dir.mkdir(exist_ok=True) return project_root, data_dir def main(): try: # Setup paths project_root, data_dir = setup_paths() # Create cache directory cache_dir = data_dir / 'cache' cache_dir.mkdir(exist_ok=True) # Run data pipeline logger.info("Running data pipeline...") pipeline = DataPipeline() df = pipeline.run_pipeline( save_path=str(data_dir / 'courses.pkl'), force_scrape=False # Set to True to force new scraping ) # Initialize RAG system with caching rag_system = RAGSystem() rag_system.load_and_process_data(df, cache_dir=cache_dir) # Run Gradio app logger.info("Starting Gradio app...") gradio_path = Path(__file__).parent / 'gradio_app.py' if not gradio_path.exists(): raise FileNotFoundError(f"Gradio app not found at: {gradio_path}") # Change to project root directory before running os.chdir(str(project_root)) # Run Gradio with proper Python path env = os.environ.copy() env['PYTHONPATH'] = str(project_root) subprocess.run( ['python', str(gradio_path)], env=env, check=True ) except Exception as e: logger.error(f"Error running application: {str(e)}") raise if __name__ == "__main__": main()