Spaces:
Sleeping
Sleeping
import sys | |
import os | |
# Add the project root directory to Python path | |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
sys.path.append(PROJECT_ROOT) | |
from course_search.search_system.data_pipeline import DataPipeline | |
import logging | |
import argparse | |
def main(): | |
parser = argparse.ArgumentParser(description='Run the course scraping and embedding pipeline') | |
parser.add_argument('--output', type=str, default='data/courses_with_embeddings.pkl', | |
help='Path to save the processed data') | |
args = parser.parse_args() | |
# Initialize and run pipeline | |
pipeline = DataPipeline() | |
df = pipeline.run_pipeline(save_path=args.output) | |
print(f"\nProcessed {len(df)} courses") | |
print(f"Data saved to {args.output}") | |
if __name__ == "__main__": | |
logging.basicConfig(level=logging.INFO) | |
main() |