Rohil Bansal
search improved
821284f
import sys
import os
from pathlib import Path
import subprocess
import logging
project_root = Path(__file__).parent.parent.parent
if str(project_root) not in sys.path:
sys.path.append(str(project_root))
from course_search.search_system.data_pipeline import DataPipeline
from course_search.search_system.rag_system import RAGSystem
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def setup_paths():
"""Setup necessary paths and directories"""
project_root = Path(__file__).parent.parent.parent
if str(project_root) not in sys.path:
sys.path.append(str(project_root))
data_dir = project_root / 'data'
data_dir.mkdir(exist_ok=True)
return project_root, data_dir
def main():
try:
# Setup paths
project_root, data_dir = setup_paths()
# Create cache directory
cache_dir = data_dir / 'cache'
cache_dir.mkdir(exist_ok=True)
# Run data pipeline
logger.info("Running data pipeline...")
pipeline = DataPipeline()
df = pipeline.run_pipeline(
save_path=str(data_dir / 'courses.pkl'),
force_scrape=False # Set to True to force new scraping
)
# Initialize RAG system with caching
rag_system = RAGSystem()
rag_system.load_and_process_data(df, cache_dir=cache_dir)
# Run Gradio app
logger.info("Starting Gradio app...")
gradio_path = Path(__file__).parent / 'gradio_app.py'
if not gradio_path.exists():
raise FileNotFoundError(f"Gradio app not found at: {gradio_path}")
# Change to project root directory before running
os.chdir(str(project_root))
# Run Gradio with proper Python path
env = os.environ.copy()
env['PYTHONPATH'] = str(project_root)
subprocess.run(
['python', str(gradio_path)],
env=env,
check=True
)
except Exception as e:
logger.error(f"Error running application: {str(e)}")
raise
if __name__ == "__main__":
main()