Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import tempfile | |
import os | |
from io import BytesIO | |
import re | |
import openai | |
import hashlib | |
import json | |
import asyncio | |
import aiohttp | |
from pathlib import Path | |
from concurrent.futures import ThreadPoolExecutor | |
from functools import lru_cache | |
import gradio_client.utils | |
_original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type | |
def _fixed_json_schema_to_python_type(schema, defs=None): | |
# If the schema is a bool, return a fallback type (e.g. "any") | |
if isinstance(schema, bool): | |
return "any" | |
return _original_json_schema_to_python_type(schema, defs) | |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type | |
# Create cache directory if it doesn't exist | |
CACHE_DIR = Path("ai_response_cache") | |
CACHE_DIR.mkdir(exist_ok=True) | |
def get_cache_path(prompt): | |
"""Generate a unique cache file path based on the prompt content""" | |
prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest() | |
return CACHE_DIR / f"{prompt_hash}.json" | |
def get_cached_response(prompt): | |
"""Try to get a cached response for the given prompt""" | |
cache_path = get_cache_path(prompt) | |
if cache_path.exists(): | |
try: | |
with open(cache_path, 'r', encoding='utf-8') as f: | |
return json.load(f)['response'] | |
except Exception as e: | |
print(f"Error reading cache: {e}") | |
return None | |
def cache_response(prompt, response): | |
"""Cache the response for a given prompt""" | |
cache_path = get_cache_path(prompt) | |
try: | |
with open(cache_path, 'w', encoding='utf-8') as f: | |
json.dump({'prompt': prompt, 'response': response}, f) | |
except Exception as e: | |
print(f"Error writing to cache: {e}") | |
async def process_text_batch_async(client, batch_prompts): | |
"""Process a batch of prompts asynchronously""" | |
results = [] | |
# First check cache for each prompt | |
for prompt in batch_prompts: | |
cached = get_cached_response(prompt) | |
if cached: | |
results.append((prompt, cached)) | |
# Filter out prompts that were found in cache | |
uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)] | |
if uncached_prompts: | |
# Process uncached prompts in parallel | |
async def process_single_prompt(prompt): | |
try: | |
response = await client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0 | |
) | |
result = response.choices[0].message.content | |
# Cache the result | |
cache_response(prompt, result) | |
return prompt, result | |
except Exception as e: | |
print(f"Error processing prompt: {e}") | |
return prompt, f"Error: {str(e)}" | |
# Create tasks for all uncached prompts | |
tasks = [process_single_prompt(prompt) for prompt in uncached_prompts] | |
# Run all tasks concurrently and wait for them to complete | |
uncached_results = await asyncio.gather(*tasks) | |
# Combine cached and newly processed results | |
results.extend(uncached_results) | |
# Sort results to match original order of batch_prompts | |
prompt_to_result = {prompt: result for prompt, result in results} | |
return [prompt_to_result[prompt] for prompt in batch_prompts] | |
async def process_text_with_ai_async(texts, instruction): | |
"""Process text with GPT-4o-mini asynchronously in batches""" | |
if not texts: | |
return [] | |
results = [] | |
batch_size = 500 | |
# Create OpenAI async client | |
client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
# Process in batches | |
for i in range(0, len(texts), batch_size): | |
batch = texts[i:i+batch_size] | |
batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch] | |
batch_results = await process_text_batch_async(client, batch_prompts) | |
results.extend(batch_results) | |
return results | |
def process_woocommerce_data_in_memory(netcom_file): | |
""" | |
Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format, | |
and returns the resulting CSV as bytes, suitable for download. | |
""" | |
# Define the brand-to-logo mapping with updated URLs | |
brand_logo_map = { | |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", | |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", | |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", | |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", | |
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", | |
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp", | |
"PMI": "/wp-content/uploads/2025/04/PMI.png", | |
"Comptia": "/wp-content/uploads/2025/04/Comptia.png", | |
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png", | |
"ISC2": "/wp-content/uploads/2025/04/ISC2.png", | |
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png" | |
} | |
# Default prerequisite text for courses without prerequisites | |
default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience." | |
# 1. Read the uploaded CSV into a DataFrame | |
netcom_df = pd.read_csv(netcom_file.name, encoding='latin1') | |
netcom_df.columns = netcom_df.columns.str.strip() # standardize column names | |
# Prepare descriptions for AI processing | |
descriptions = netcom_df['Decription'].fillna("").tolist() | |
objectives = netcom_df['Objectives'].fillna("").tolist() | |
prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist() | |
agendas = netcom_df['Outline'].fillna("").tolist() | |
# Process with AI asynchronously | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
# Run all processing tasks concurrently | |
tasks = [ | |
process_text_with_ai_async( | |
descriptions, | |
"Create a concise 250-character summary of this course description:" | |
), | |
process_text_with_ai_async( | |
descriptions, | |
"Condense this description to maximum 750 characters in paragraph format, with clean formatting:" | |
), | |
process_text_with_ai_async( | |
objectives, | |
"Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':" | |
), | |
process_text_with_ai_async( | |
agendas, | |
"Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':" | |
) | |
] | |
# Process prerequisites separately to handle default case | |
formatted_prerequisites_task = [] | |
for prereq in prerequisites: | |
if not prereq or pd.isna(prereq) or prereq.strip() == "": | |
formatted_prerequisites_task.append(default_prerequisite) | |
else: | |
# For non-empty prerequisites, we'll process them with AI | |
prereq_result = loop.run_until_complete(process_text_with_ai_async( | |
[prereq], | |
"Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':" | |
)) | |
formatted_prerequisites_task.append(prereq_result[0]) | |
# Run all tasks and get results | |
results = loop.run_until_complete(asyncio.gather(*tasks)) | |
loop.close() | |
short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results | |
# Add processed text to dataframe | |
netcom_df['Short_Description'] = short_descriptions | |
netcom_df['Condensed_Description'] = condensed_descriptions | |
netcom_df['Formatted_Objectives'] = formatted_objectives | |
netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task | |
netcom_df['Formatted_Agenda'] = formatted_agendas | |
# 2. Create aggregated dates and times for each Course ID | |
# Sort by Course ID and date first | |
netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date']) | |
date_agg = ( | |
netcom_df.groupby('Course ID')['Course Start Date'] | |
.apply(lambda x: ','.join(x.astype(str).unique())) | |
.reset_index(name='Aggregated_Dates') | |
) | |
time_agg = ( | |
netcom_df.groupby('Course ID') | |
.apply( | |
lambda df: ','.join( | |
f"{st}-{et} {tz}" | |
for st, et, tz in zip(df['Course Start Time'], | |
df['Course End Time'], | |
df['Time Zone']) | |
) | |
) | |
.reset_index(name='Aggregated_Times') | |
) | |
# 3. Extract unique parent products | |
parent_products = netcom_df.drop_duplicates(subset=['Course ID']) | |
# 4. Merge aggregated dates and times | |
parent_products = parent_products.merge(date_agg, on='Course ID', how='left') | |
parent_products = parent_products.merge(time_agg, on='Course ID', how='left') | |
# 5. Create parent (variable) products | |
woo_parent_df = pd.DataFrame({ | |
'Type': 'variable', | |
'SKU': parent_products['Course ID'], | |
'Name': parent_products['Course Name'], | |
'Published': 1, | |
'Visibility in catalog': 'visible', | |
'Short description': parent_products['Short_Description'], | |
'Description': parent_products['Condensed_Description'], | |
'Tax status': 'taxable', | |
'In stock?': 1, | |
'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True), | |
'Categories': 'courses', | |
'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''), | |
'Parent': '', | |
'Brands': parent_products['Vendor'], | |
'Attribute 1 name': 'Date', | |
'Attribute 1 value(s)': parent_products['Aggregated_Dates'], | |
'Attribute 1 visible': 'visible', | |
'Attribute 1 global': 1, | |
'Attribute 2 name': 'Location', | |
'Attribute 2 value(s)': 'Virtual', | |
'Attribute 2 visible': 'visible', | |
'Attribute 2 global': 1, | |
'Attribute 3 name': 'Time', | |
'Attribute 3 value(s)': parent_products['Aggregated_Times'], | |
'Attribute 3 visible': 'visible', | |
'Attribute 3 global': 1, | |
'Meta: outline': parent_products['Formatted_Agenda'], | |
'Meta: days': parent_products['Duration'], | |
'Meta: location': 'Virtual', | |
'Meta: overview': parent_products['Target Audience'], | |
'Meta: objectives': parent_products['Formatted_Objectives'], | |
'Meta: prerequisites': parent_products['Formatted_Prerequisites'], | |
'Meta: agenda': parent_products['Formatted_Agenda'] | |
}) | |
# 6. Create child (variation) products | |
woo_child_df = pd.DataFrame({ | |
'Type': 'variation, virtual', | |
'SKU': netcom_df['Course SID'], | |
'Name': netcom_df['Course Name'], | |
'Published': 1, | |
'Visibility in catalog': 'visible', | |
'Short description': netcom_df['Short_Description'], | |
'Description': netcom_df['Condensed_Description'], | |
'Tax status': 'taxable', | |
'In stock?': 1, | |
'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True), | |
'Categories': 'courses', | |
'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''), | |
'Parent': netcom_df['Course ID'], | |
'Brands': netcom_df['Vendor'], | |
'Attribute 1 name': 'Date', | |
'Attribute 1 value(s)': netcom_df['Course Start Date'], | |
'Attribute 1 visible': 'visible', | |
'Attribute 1 global': 1, | |
'Attribute 2 name': 'Location', | |
'Attribute 2 value(s)': 'Virtual', | |
'Attribute 2 visible': 'visible', | |
'Attribute 2 global': 1, | |
'Attribute 3 name': 'Time', | |
'Attribute 3 value(s)': netcom_df.apply( | |
lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1 | |
), | |
'Attribute 3 visible': 'visible', | |
'Attribute 3 global': 1, | |
'Meta: outline': netcom_df['Formatted_Agenda'], | |
'Meta: days': netcom_df['Duration'], | |
'Meta: location': 'Virtual', | |
'Meta: overview': netcom_df['Target Audience'], | |
'Meta: objectives': netcom_df['Formatted_Objectives'], | |
'Meta: prerequisites': netcom_df['Formatted_Prerequisites'], | |
'Meta: agenda': netcom_df['Formatted_Agenda'] | |
}) | |
# 7. Combine parent + child | |
woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True) | |
# 8. Desired column order (removed Stock and Sold individually?) | |
column_order = [ | |
'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog', | |
'Short description', 'Description', 'Tax status', 'In stock?', | |
'Regular price', 'Categories', 'Images', | |
'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible', | |
'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible', | |
'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible', | |
'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview', | |
'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda' | |
] | |
woo_final_df = woo_final_df[column_order] | |
# 9. Convert to CSV (in memory) | |
output_buffer = BytesIO() | |
woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig') | |
output_buffer.seek(0) | |
return output_buffer | |
def process_file(uploaded_file): | |
""" | |
Takes the uploaded file, processes it, and returns the CSV as a file-like object | |
""" | |
processed_csv_io = process_woocommerce_data_in_memory(uploaded_file) | |
# Create a temporary file to save the CSV data | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: | |
temp_file.write(processed_csv_io.getvalue()) | |
temp_path = temp_file.name | |
return temp_path | |
interface = gr.Interface( | |
fn=process_file, | |
inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]), | |
outputs=gr.File(label="Download WooCommerce CSV"), | |
title="NetCom to WooCommerce CSV Processor", | |
description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV.", | |
analytics_enabled=False, | |
) | |
if __name__ == "__main__": | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
if not openai_api_key: | |
print("Warning: OPENAI_API_KEY environment variable not set") | |
interface.launch() | |