File size: 5,071 Bytes
2ed2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee7ea09
2ed2129
 
 
 
 
 
 
 
ee7ea09
 
 
 
 
 
 
 
 
2ed2129
 
ee7ea09
 
 
 
 
 
2ed2129
 
 
 
 
 
9bfbaa6
2ed2129
 
 
 
 
 
 
 
 
9bfbaa6
 
 
 
 
 
 
 
 
 
 
 
2ed2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bfbaa6
 
 
 
 
2ed2129
 
 
 
 
 
9bfbaa6
2ed2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import pandas as pd
from pathlib import Path
import logging
from course_search.search_system.rag_system import RAGSystem

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class CourseSearchApp:
    def __init__(self):
        """Initialize the search application"""
        try:
            self.load_components()
        except Exception as e:
            logger.error(f"Initialization error: {str(e)}")
            raise

    def load_components(self):
        """Initialize RAG system and load data"""
        try:
            # Construct path to data file
            data_path = Path(__file__).parent.parent.parent / 'data' / 'courses.pkl'
            
            if not data_path.exists():
                raise FileNotFoundError(f"Data file not found at: {data_path}")
            
            # Load saved course data
            df = pd.read_pickle(str(data_path))
            logger.info(f"Loaded {len(df)} courses from {data_path}")
            
            # Validate DataFrame
            if len(df) == 0:
                raise ValueError("Empty DataFrame loaded")
            
            required_columns = ['title', 'description', 'curriculum', 'url']
            missing_columns = [col for col in required_columns if col not in df.columns]
            if missing_columns:
                raise ValueError(f"Missing required columns: {missing_columns}")
            
            # Initialize RAG system
            self.rag_system = RAGSystem()
            
            # Create cache directory
            cache_dir = data_path.parent / 'cache'
            cache_dir.mkdir(exist_ok=True)
            
            self.rag_system.load_and_process_data(df, cache_dir=cache_dir)
            logger.info("Components loaded successfully")
            
        except Exception as e:
            logger.error(f"Error loading components: {str(e)}")
            raise

    def search_courses(self, query: str, num_results: int, show_curriculum: bool) -> str:
        """
        Search for courses and format results for Gradio
        """
        try:
            results = self.rag_system.search_courses(query, top_k=num_results)
            
            # Format results for display
            markdown_output = "# Search Results\n\n"
            for i, result in enumerate(results['results'], 1):
                markdown_output += f"### {i}. {result['title']}\n\n"
                
                if result.get('description'):
                    markdown_output += f"**Description:**\n{result['description']}\n\n"
                
                # Only show curriculum if checkbox is checked
                if show_curriculum and result.get('curriculum'):
                    markdown_output += f"**Course Curriculum:**\n{result['curriculum']}\n\n"
                
                if result.get('url'):
                    markdown_output += f"**Course Link:** [View Course]({result['url']})\n\n"
                
                markdown_output += "---\n\n"
                        
            return markdown_output
            
        except Exception as e:
            error_msg = f"Error during search: {str(e)}"
            logger.error(error_msg)
            return f"# Error\n\n{error_msg}"

def create_gradio_interface():
    """Create and configure Gradio interface"""
    try:
        app = CourseSearchApp()
        
        # Define the interface
        iface = gr.Interface(
            fn=app.search_courses,
            inputs=[
                gr.Textbox(
                    label="What would you like to learn?",
                    placeholder="e.g., machine learning for beginners, data visualization, python basics",
                    lines=2
                ),
                gr.Slider(
                    minimum=1,
                    maximum=10,
                    value=5,
                    step=1,
                    label="Number of Results"
                ),
                gr.Checkbox(
                    label="Show Course Curriculum",
                    value=False,
                    info="Check this to view detailed curriculum for each course"
                )
            ],
            outputs=gr.Markdown(),
            title="Analytics Vidhya Course Search",
            description="""
            Search through Analytics Vidhya's free courses using natural language!
            Get personalized course recommendations based on your interests.
            """,
            theme=gr.themes.Soft()
        )
        
        return iface
        
    except Exception as e:
        logger.error(f"Error creating Gradio interface: {str(e)}")
        raise

def main():
    """Main function to run the Gradio app"""
    try:
        iface = create_gradio_interface()
        iface.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=True
        )
    except Exception as e:
        logger.error(f"Error launching Gradio app: {str(e)}")
        raise

if __name__ == "__main__":
    main()