Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
import logging | |
from course_search.search_system.rag_system import RAGSystem | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
class CourseSearchApp: | |
def __init__(self): | |
"""Initialize the search application""" | |
try: | |
self.load_components() | |
except Exception as e: | |
logger.error(f"Initialization error: {str(e)}") | |
raise | |
def load_components(self): | |
"""Initialize RAG system and load data""" | |
try: | |
# Construct path to data file | |
data_path = Path(__file__).parent.parent.parent / 'data' / 'courses.pkl' | |
if not data_path.exists(): | |
raise FileNotFoundError(f"Data file not found at: {data_path}") | |
# Load saved course data | |
df = pd.read_pickle(str(data_path)) | |
logger.info(f"Loaded {len(df)} courses from {data_path}") | |
# Validate DataFrame | |
if len(df) == 0: | |
raise ValueError("Empty DataFrame loaded") | |
required_columns = ['title', 'description', 'curriculum', 'url'] | |
missing_columns = [col for col in required_columns if col not in df.columns] | |
if missing_columns: | |
raise ValueError(f"Missing required columns: {missing_columns}") | |
# Initialize RAG system | |
self.rag_system = RAGSystem() | |
# Create cache directory | |
cache_dir = data_path.parent / 'cache' | |
cache_dir.mkdir(exist_ok=True) | |
self.rag_system.load_and_process_data(df, cache_dir=cache_dir) | |
logger.info("Components loaded successfully") | |
except Exception as e: | |
logger.error(f"Error loading components: {str(e)}") | |
raise | |
def search_courses(self, query: str, num_results: int, show_curriculum: bool) -> str: | |
""" | |
Search for courses and format results for Gradio | |
""" | |
try: | |
results = self.rag_system.search_courses(query, top_k=num_results) | |
# Format results for display | |
markdown_output = "# Search Results\n\n" | |
for i, result in enumerate(results['results'], 1): | |
markdown_output += f"### {i}. {result['title']}\n\n" | |
if result.get('description'): | |
markdown_output += f"**Description:**\n{result['description']}\n\n" | |
# Only show curriculum if checkbox is checked | |
if show_curriculum and result.get('curriculum'): | |
markdown_output += f"**Course Curriculum:**\n{result['curriculum']}\n\n" | |
if result.get('url'): | |
markdown_output += f"**Course Link:** [View Course]({result['url']})\n\n" | |
markdown_output += "---\n\n" | |
return markdown_output | |
except Exception as e: | |
error_msg = f"Error during search: {str(e)}" | |
logger.error(error_msg) | |
return f"# Error\n\n{error_msg}" | |
def create_gradio_interface(): | |
"""Create and configure Gradio interface""" | |
try: | |
app = CourseSearchApp() | |
# Define the interface | |
iface = gr.Interface( | |
fn=app.search_courses, | |
inputs=[ | |
gr.Textbox( | |
label="What would you like to learn?", | |
placeholder="e.g., machine learning for beginners, data visualization, python basics", | |
lines=2 | |
), | |
gr.Slider( | |
minimum=1, | |
maximum=10, | |
value=5, | |
step=1, | |
label="Number of Results" | |
), | |
gr.Checkbox( | |
label="Show Course Curriculum", | |
value=False, | |
info="Check this to view detailed curriculum for each course" | |
) | |
], | |
outputs=gr.Markdown(), | |
title="Analytics Vidhya Course Search", | |
description=""" | |
Search through Analytics Vidhya's free courses using natural language! | |
Get personalized course recommendations based on your interests. | |
""", | |
theme=gr.themes.Soft() | |
) | |
return iface | |
except Exception as e: | |
logger.error(f"Error creating Gradio interface: {str(e)}") | |
raise | |
def main(): | |
"""Main function to run the Gradio app""" | |
try: | |
iface = create_gradio_interface() | |
iface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True | |
) | |
except Exception as e: | |
logger.error(f"Error launching Gradio app: {str(e)}") | |
raise | |
if __name__ == "__main__": | |
main() |