Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request, BackgroundTasks | |
import json | |
import io | |
from openai import Client | |
from supabase import create_client | |
from typing import List, Dict, Any | |
import asyncio | |
import logging | |
from datetime import datetime | |
import os | |
# Initialize logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
app = FastAPI() | |
client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID')) | |
url: str = os.getenv('SUPABASE_URL') | |
key: str = os.getenv('SUPABASE_KEY') | |
supabase: Client = create_client(url, key) | |
async def testv1(request: Request, background_tasks: BackgroundTasks): | |
try: | |
body_data = await request.json() | |
print(body_data) | |
# # Create initial batch job record | |
# save_data = { | |
# 'batch_job_id': f"batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}", | |
# "batch_job_status": False, | |
# "created_at": datetime.utcnow().isoformat() | |
# } | |
# response = ( | |
# supabase.table("batch_processing_details") | |
# .insert(save_data) | |
# .execute() | |
# ) | |
# # Add processing to background tasks | |
# background_tasks.add_task(process_batch_job, dataset, save_data['batch_job_id']) | |
return {'data': 'Batch job is scheduled!'} | |
# return {'data': 'Batch job is scheduled!', 'batch_job_id': save_data['batch_job_id']} | |
except Exception as e: | |
return {'error': str(e)} | |
async def process_batch_job(dataset: Dict[str, Any], batch_job_id: str): | |
""" | |
Background task to process the batch job | |
""" | |
try: | |
logger.info(f"Starting batch processing for job {batch_job_id}") | |
system_prompt = ''' | |
Your goal is to extract movie categories from movie descriptions, as well as a 1-sentence summary for these movies. | |
You will be provided with a movie description, and you will output a json object containing the following information: | |
{ | |
categories: string[] // Array of categories based on the movie description, | |
summary: string // 1-sentence summary of the movie based on the movie description | |
} | |
Categories refer to the genre or type of the movie, like "action", "romance", "comedy", etc. Keep category names simple and use only lower case letters. | |
Movies can have several categories, but try to keep it under 3-4. Only mention the categories that are the most obvious based on the description. | |
''' | |
openai_tasks = [] | |
for ds in dataset.get('data'): | |
id = ds.get('imdb_id') | |
description = ds.get('Description') | |
task = { | |
"custom_id": f"task-{id}", | |
"method": "POST", | |
"url": "/v1/chat/completions", | |
"body": { | |
"model": "gpt-4o-mini", | |
"temperature": 0.1, | |
"response_format": { | |
"type": "json_object" | |
}, | |
"messages": [ | |
{ | |
"role": "system", | |
"content": system_prompt | |
}, | |
{ | |
"role": "user", | |
"content": description | |
} | |
] | |
} | |
} | |
openai_tasks.append(task) | |
# Create batch file | |
json_obj = io.BytesIO() | |
for obj in openai_tasks: | |
json_obj.write((json.dumps(obj) + '\n').encode('utf-8')) | |
batch_file = client.files.create( | |
file=json_obj, | |
purpose="batch" | |
) | |
# Create batch job | |
batch_job = client.batches.create( | |
input_file_id=batch_file.id, | |
endpoint="/v1/chat/completions", | |
completion_window="24h" | |
) | |
# Update status in Supabase | |
supabase.table("batch_processing_details").update({ | |
"batch_job_status": True, | |
"completed_at": datetime.utcnow().isoformat() | |
}).match({"batch_job_id": batch_job_id}).execute() | |
logger.info(f"Batch job {batch_job_id} processed successfully") | |
except Exception as e: | |
logger.error(f"Error processing batch job {batch_job_id}: {str(e)}") | |
# Update status with error | |
supabase.table("batch_processing_details").update({ | |
"batch_job_status": False, | |
"error": str(e), | |
"completed_at": datetime.utcnow().isoformat() | |
}).eq({"batch_job_id": batch_job_id}).execute() | |