File size: 877 Bytes
2ed2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import sys
import os
# Add the project root directory to Python path
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(PROJECT_ROOT)

from course_search.search_system.data_pipeline import DataPipeline
import logging
import argparse

def main():
    parser = argparse.ArgumentParser(description='Run the course scraping and embedding pipeline')
    parser.add_argument('--output', type=str, default='data/courses_with_embeddings.pkl',
                      help='Path to save the processed data')
    args = parser.parse_args()

    # Initialize and run pipeline
    pipeline = DataPipeline()
    df = pipeline.run_pipeline(save_path=args.output)
    
    print(f"\nProcessed {len(df)} courses")
    print(f"Data saved to {args.output}")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    main()