Spaces:
Sleeping
Sleeping
import sys | |
import os | |
from pathlib import Path | |
import subprocess | |
import logging | |
project_root = Path(__file__).parent.parent.parent | |
if str(project_root) not in sys.path: | |
sys.path.append(str(project_root)) | |
from course_search.search_system.data_pipeline import DataPipeline | |
from course_search.search_system.rag_system import RAGSystem | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def setup_paths(): | |
"""Setup necessary paths and directories""" | |
project_root = Path(__file__).parent.parent.parent | |
if str(project_root) not in sys.path: | |
sys.path.append(str(project_root)) | |
data_dir = project_root / 'data' | |
data_dir.mkdir(exist_ok=True) | |
return project_root, data_dir | |
def main(): | |
try: | |
# Setup paths | |
project_root, data_dir = setup_paths() | |
# Create cache directory | |
cache_dir = data_dir / 'cache' | |
cache_dir.mkdir(exist_ok=True) | |
# Run data pipeline | |
logger.info("Running data pipeline...") | |
pipeline = DataPipeline() | |
df = pipeline.run_pipeline( | |
save_path=str(data_dir / 'courses.pkl'), | |
force_scrape=False # Set to True to force new scraping | |
) | |
# Initialize RAG system with caching | |
rag_system = RAGSystem() | |
rag_system.load_and_process_data(df, cache_dir=cache_dir) | |
# Run Gradio app | |
logger.info("Starting Gradio app...") | |
gradio_path = Path(__file__).parent / 'gradio_app.py' | |
if not gradio_path.exists(): | |
raise FileNotFoundError(f"Gradio app not found at: {gradio_path}") | |
# Change to project root directory before running | |
os.chdir(str(project_root)) | |
# Run Gradio with proper Python path | |
env = os.environ.copy() | |
env['PYTHONPATH'] = str(project_root) | |
subprocess.run( | |
['python', str(gradio_path)], | |
env=env, | |
check=True | |
) | |
except Exception as e: | |
logger.error(f"Error running application: {str(e)}") | |
raise | |
if __name__ == "__main__": | |
main() |