Rohil Bansal
final
9bfbaa6
import gradio as gr
import pandas as pd
from pathlib import Path
import logging
from course_search.search_system.rag_system import RAGSystem
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CourseSearchApp:
def __init__(self):
"""Initialize the search application"""
try:
self.load_components()
except Exception as e:
logger.error(f"Initialization error: {str(e)}")
raise
def load_components(self):
"""Initialize RAG system and load data"""
try:
# Construct path to data file
data_path = Path(__file__).parent.parent.parent / 'data' / 'courses.pkl'
if not data_path.exists():
raise FileNotFoundError(f"Data file not found at: {data_path}")
# Load saved course data
df = pd.read_pickle(str(data_path))
logger.info(f"Loaded {len(df)} courses from {data_path}")
# Validate DataFrame
if len(df) == 0:
raise ValueError("Empty DataFrame loaded")
required_columns = ['title', 'description', 'curriculum', 'url']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
# Initialize RAG system
self.rag_system = RAGSystem()
# Create cache directory
cache_dir = data_path.parent / 'cache'
cache_dir.mkdir(exist_ok=True)
self.rag_system.load_and_process_data(df, cache_dir=cache_dir)
logger.info("Components loaded successfully")
except Exception as e:
logger.error(f"Error loading components: {str(e)}")
raise
def search_courses(self, query: str, num_results: int, show_curriculum: bool) -> str:
"""
Search for courses and format results for Gradio
"""
try:
results = self.rag_system.search_courses(query, top_k=num_results)
# Format results for display
markdown_output = "# Search Results\n\n"
for i, result in enumerate(results['results'], 1):
markdown_output += f"### {i}. {result['title']}\n\n"
if result.get('description'):
markdown_output += f"**Description:**\n{result['description']}\n\n"
# Only show curriculum if checkbox is checked
if show_curriculum and result.get('curriculum'):
markdown_output += f"**Course Curriculum:**\n{result['curriculum']}\n\n"
if result.get('url'):
markdown_output += f"**Course Link:** [View Course]({result['url']})\n\n"
markdown_output += "---\n\n"
return markdown_output
except Exception as e:
error_msg = f"Error during search: {str(e)}"
logger.error(error_msg)
return f"# Error\n\n{error_msg}"
def create_gradio_interface():
"""Create and configure Gradio interface"""
try:
app = CourseSearchApp()
# Define the interface
iface = gr.Interface(
fn=app.search_courses,
inputs=[
gr.Textbox(
label="What would you like to learn?",
placeholder="e.g., machine learning for beginners, data visualization, python basics",
lines=2
),
gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="Number of Results"
),
gr.Checkbox(
label="Show Course Curriculum",
value=False,
info="Check this to view detailed curriculum for each course"
)
],
outputs=gr.Markdown(),
title="Analytics Vidhya Course Search",
description="""
Search through Analytics Vidhya's free courses using natural language!
Get personalized course recommendations based on your interests.
""",
theme=gr.themes.Soft()
)
return iface
except Exception as e:
logger.error(f"Error creating Gradio interface: {str(e)}")
raise
def main():
"""Main function to run the Gradio app"""
try:
iface = create_gradio_interface()
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)
except Exception as e:
logger.error(f"Error launching Gradio app: {str(e)}")
raise
if __name__ == "__main__":
main()