Rohil Bansal
commit
2ed2129
import sys
import os
# Add the project root directory to Python path
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(PROJECT_ROOT)
from course_search.search_system.data_pipeline import DataPipeline
import logging
import argparse
def main():
parser = argparse.ArgumentParser(description='Run the course scraping and embedding pipeline')
parser.add_argument('--output', type=str, default='data/courses_with_embeddings.pkl',
help='Path to save the processed data')
args = parser.parse_args()
# Initialize and run pipeline
pipeline = DataPipeline()
df = pipeline.run_pipeline(save_path=args.output)
print(f"\nProcessed {len(df)} courses")
print(f"Data saved to {args.output}")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
main()