File size: 6,159 Bytes
33b10b6
 
 
 
 
375b323
33b10b6
 
 
559d3ea
33b10b6
 
 
 
 
 
 
 
 
 
 
 
559d3ea
33b10b6
b386189
 
 
33b10b6
 
 
 
 
e66bbde
33b10b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b386189
 
 
 
33b10b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb1a183
33b10b6
 
 
 
 
 
 
 
 
 
 
 
eb1a183
33b10b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb1a183
33b10b6
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# api/index.py
#from motor.motor_asyncio import AsyncIOMotorClient
#from openai import AsyncOpenAI
from pymongo import MongoClient
from openai import OpenAI
import os, io, base64
import json
import re
from typing import List, Dict, Any
from fastapi import FastAPI

# Move configuration and constants to separate files
from .config import MONGODB_URL, OPENAI_API_KEY
from .schemas import label_reader_schema

# Initialize clients
openai_client = OpenAI(api_key=OPENAI_API_KEY)
print(f"MONGODB_URL is {MONGODB_URL}")
mongodb_client = MongoClient(MONGODB_URL)
db = mongodb_client.consumeWise
collection = db.products
print(f"collection is {collection}")
app = FastAPI()

def encode_image(uploaded_file):
    return base64.b64encode(uploaded_file.read()).decode('utf-8')

def extract_information(images_list: List[Any]) -> Dict[str, Any]:
    global openai_client
    print(f"DEBUG - openai_client : {openai_client}")

    valid_image_files = images_list
    print(f"type(valid_image_files[0]) : {type(valid_image_files[0])}")

    #for uploaded_file in images_list:
    #    try:
    #        # Open the uploaded file as an image
    #        image = Image.open(uploaded_file)
    
    #        # Check image quality (assuming `check_image_quality` accepts PIL images)
    #        quality_result = check_image_quality(image, blur_threshold)
    #        if bool(quality_result['can_ocr']):
    #            # Image is readable, add to valid list
    #            valid_image_files.append(uploaded_file)
    #        else:
    #            return {"Error" : "One of the images is blurry, please re-upload"}
    #    except Exception as e:
    #        print(f"DEBUG - Error processing image {uploaded_file.name}: {str(e)}")
    #        continue
    LABEL_READER_PROMPT = """
You will be provided with a set of images corresponding to a single product. These images are found printed on the packaging of the product.
Your goal will be to extract information from these images to populate the schema provided. Here is some information you will routinely encounter. Ensure that you capture complete information, especially for nutritional information and ingredients:
- Ingredients: List of ingredients in the item. They may have some percent listed in brackets. They may also have metadata or classification like Preservative (INS 211) where INS 211 forms the metadata. Structure accordingly. If ingredients have subingredients like sugar: added sugar, trans sugar, treat them as different ingredients.
- Claims: Like a mango fruit juice says contains fruit.
- Nutritional Information: This will have nutrients, serving size, and nutrients listed per serving. Extract the base value for reference.
- FSSAI License number: Extract the license number. There might be many, so store relevant ones.
- Name: Extract the name of the product.
- Brand/Manufactured By: Extract the parent company of this product.
- Serving size: This might be explicitly stated or inferred from the nutrients per serving.
"""
    try:    
        #image_message = [{"type": "image_url", "image_url": {"url": il}} for il in image_links]
        # Convert valid images to byte streams for API processing
        image_message = [
            {
                "type": "image_url",
                "image_url": {
                "url":  f"data:image/jpeg;base64,{encode_image(uploaded_file)}"
                }
            }
            for uploaded_file in valid_image_files
        ]
        response = openai_client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": LABEL_READER_PROMPT},
                        *image_message,
                    ],
                },
            ],
            response_format={"type": "json_schema", "json_schema": label_reader_schema}
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        raise Exception(f"Error extracting information: {str(e)}")

@app.post("/extract-data")
def extract_data(images_list_json: Dict[str, List[Any]]):
    if not images_list_json or "images_list" not in images_list_json:
        raise Exception("Image links not found")
    
    try:
        extracted_data = extract_information(images_list_json["images_list"])
        result = collection.insert_one(extracted_data)
        extracted_data["_id"] = str(result.inserted_id)
        return extracted_data
    except Exception as e:
        raise Exception(f"An error occurred {e}") from e

@app.get("/find-product")
def find_product(product_name: str):

    if not product_name:
        raise Exception("Please provide a valid product name")
    
    try:
        words = product_name.split()
        search_terms = [' '.join(words[:i]) for i in range(2, len(words) + 1)] + words
        product_list = set()
        
        for term in search_terms:
            query = {"productName": {"$regex": f".*{re.escape(term)}.*", "$options": "i"}}
            # Use .to_list() to fetch all results
            products = collection.find(query).to_list(length=None)
            #async for product in collection.find(query)
            for product in products:
                brand_product_name = f"{product['productName']} by {product['brandName']}"
                product_list.add(brand_product_name)
        
        return {
            "products": list(product_list),
            "message": "Products found" if product_list else "No products found"
        }
    except Exception as e:
        raise Exception(f"An error occurred {e}") from e

@app.get("/get-product")
def get_product(product_name: str):
    if not product_name:
        raise Exception("Please provide a valid product name")
    
    try:
        product = collection.find_one({"productName": product_name})
        if not product:
            raise Exception("Product not found")
        
        product["_id"] = str(product["_id"])
        print(f"product info : {product}")
        return product
    except Exception as e:
        raise Exception(f"An error occurred {e}") from e