import gradio as gr import pandas as pd from pathlib import Path import logging from course_search.search_system.rag_system import RAGSystem # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class CourseSearchApp: def __init__(self): """Initialize the search application""" try: self.load_components() except Exception as e: logger.error(f"Initialization error: {str(e)}") raise def load_components(self): """Initialize RAG system and load data""" try: # Construct path to data file data_path = Path(__file__).parent.parent.parent / 'data' / 'courses.pkl' if not data_path.exists(): raise FileNotFoundError(f"Data file not found at: {data_path}") # Load saved course data df = pd.read_pickle(str(data_path)) logger.info(f"Loaded {len(df)} courses from {data_path}") # Validate DataFrame if len(df) == 0: raise ValueError("Empty DataFrame loaded") required_columns = ['title', 'description', 'curriculum', 'url'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: raise ValueError(f"Missing required columns: {missing_columns}") # Initialize RAG system self.rag_system = RAGSystem() # Create cache directory cache_dir = data_path.parent / 'cache' cache_dir.mkdir(exist_ok=True) self.rag_system.load_and_process_data(df, cache_dir=cache_dir) logger.info("Components loaded successfully") except Exception as e: logger.error(f"Error loading components: {str(e)}") raise def search_courses(self, query: str, num_results: int, show_curriculum: bool) -> str: """ Search for courses and format results for Gradio """ try: results = self.rag_system.search_courses(query, top_k=num_results) # Format results for display markdown_output = "# Search Results\n\n" for i, result in enumerate(results['results'], 1): markdown_output += f"### {i}. {result['title']}\n\n" if result.get('description'): markdown_output += f"**Description:**\n{result['description']}\n\n" # Only show curriculum if checkbox is checked if show_curriculum and result.get('curriculum'): markdown_output += f"**Course Curriculum:**\n{result['curriculum']}\n\n" if result.get('url'): markdown_output += f"**Course Link:** [View Course]({result['url']})\n\n" markdown_output += "---\n\n" return markdown_output except Exception as e: error_msg = f"Error during search: {str(e)}" logger.error(error_msg) return f"# Error\n\n{error_msg}" def create_gradio_interface(): """Create and configure Gradio interface""" try: app = CourseSearchApp() # Define the interface iface = gr.Interface( fn=app.search_courses, inputs=[ gr.Textbox( label="What would you like to learn?", placeholder="e.g., machine learning for beginners, data visualization, python basics", lines=2 ), gr.Slider( minimum=1, maximum=10, value=5, step=1, label="Number of Results" ), gr.Checkbox( label="Show Course Curriculum", value=False, info="Check this to view detailed curriculum for each course" ) ], outputs=gr.Markdown(), title="Analytics Vidhya Course Search", description=""" Search through Analytics Vidhya's free courses using natural language! Get personalized course recommendations based on your interests. """, theme=gr.themes.Soft() ) return iface except Exception as e: logger.error(f"Error creating Gradio interface: {str(e)}") raise def main(): """Main function to run the Gradio app""" try: iface = create_gradio_interface() iface.launch( server_name="0.0.0.0", server_port=7860, share=True ) except Exception as e: logger.error(f"Error launching Gradio app: {str(e)}") raise if __name__ == "__main__": main()