File size: 4,818 Bytes
48421ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f88b7d
 
 
 
48421ca
8aa59e6
 
 
 
22419aa
 
 
 
 
 
 
 
 
8aa59e6
22419aa
 
 
 
 
8aa59e6
22419aa
 
 
 
8aa59e6
22419aa
8aa59e6
 
 
 
 
 
 
48421ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from fastapi import FastAPI, Request, BackgroundTasks
import json
import io
from openai import OpenAI
from supabase import create_client
from typing import List, Dict, Any
import asyncio
import logging
from datetime import datetime



# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()
client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))
url: str = os.getenv('SUPABASE_URL')
key: str = os.getenv('SUPABASE_KEY')
supabase: Client = create_client(url, key)


@app.post("/send/batch_processing")
async def testv1(request: Request, background_tasks: BackgroundTasks):
    try:
        body_data = await request.json()

        print(body_data)
        # # Create initial batch job record
        # save_data = {
        #     'batch_job_id': f"batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}",
        #     "batch_job_status": False,
        #     "created_at": datetime.utcnow().isoformat()
        # }
        
        # response = (
        #     supabase.table("batch_processing_details")
        #     .insert(save_data)
        #     .execute()
        # )
        
        # # Add processing to background tasks
        # background_tasks.add_task(process_batch_job, dataset, save_data['batch_job_id'])

        return {'data': 'Batch job is scheduled!'}
        
        # return {'data': 'Batch job is scheduled!', 'batch_job_id': save_data['batch_job_id']}
           
        
    except Exception as e:
        return {'error': str(e)}



async def process_batch_job(dataset: Dict[str, Any], batch_job_id: str):
    """
    Background task to process the batch job
    """
    try:
        logger.info(f"Starting batch processing for job {batch_job_id}")
        
        system_prompt = '''
            Your goal is to extract movie categories from movie descriptions, as well as a 1-sentence summary for these movies.
            You will be provided with a movie description, and you will output a json object containing the following information:
            
            {
                categories: string[] // Array of categories based on the movie description,
                summary: string // 1-sentence summary of the movie based on the movie description
            }
            
            Categories refer to the genre or type of the movie, like "action", "romance", "comedy", etc. Keep category names simple and use only lower case letters.
            Movies can have several categories, but try to keep it under 3-4. Only mention the categories that are the most obvious based on the description.
        '''
        
        openai_tasks = []
        for ds in dataset.get('data'):
            id = ds.get('imdb_id')
            description = ds.get('Description')
            task = {
                "custom_id": f"task-{id}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": "gpt-4o-mini",
                    "temperature": 0.1,
                    "response_format": { 
                        "type": "json_object"
                    },
                    "messages": [
                        {
                            "role": "system",
                            "content": system_prompt
                        },
                        {
                            "role": "user",
                            "content": description
                        }
                    ]
                }
            }
            openai_tasks.append(task)
        
        # Create batch file
        json_obj = io.BytesIO()
        for obj in openai_tasks:
            json_obj.write((json.dumps(obj) + '\n').encode('utf-8'))
        
        batch_file = client.files.create(
            file=json_obj,
            purpose="batch"
        )
        
        # Create batch job
        batch_job = client.batches.create(
            input_file_id=batch_file.id,
            endpoint="/v1/chat/completions",
            completion_window="24h"
        )
        
        # Update status in Supabase
        supabase.table("batch_processing_details").update({
            "batch_job_status": True,
            "completed_at": datetime.utcnow().isoformat()
        }).match({"batch_job_id": batch_job_id}).execute()
        
        logger.info(f"Batch job {batch_job_id} processed successfully")
        
    except Exception as e:
        logger.error(f"Error processing batch job {batch_job_id}: {str(e)}")
        # Update status with error
        supabase.table("batch_processing_details").update({
            "batch_job_status": False,
            "error": str(e),
            "completed_at": datetime.utcnow().isoformat()
        }).eq({"batch_job_id": batch_job_id}).execute()