Spaces:
Sleeping
Sleeping
Delete config.py and ocr.py files, update base_prompt in base.py
Browse files- src/config.py +0 -7
- src/module/prompts/base.py +3 -3
- src/ocr.py +0 -31
src/config.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
from decouple import config
|
2 |
-
import os
|
3 |
-
|
4 |
-
OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
|
5 |
-
emmbedding_model = "text-embedding-3-large"
|
6 |
-
|
7 |
-
file_Directory= os.path.join(os.getcwd(), "data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/prompts/base.py
CHANGED
@@ -12,10 +12,11 @@ base_prompt = dedent("""
|
|
12 |
|
13 |
Analyse data from the above product description to give me the following details in JSON format:
|
14 |
( return "null" where you don't have a answer)
|
15 |
-
|
16 |
"brand": "sample_brand",
|
17 |
"mrp": "The price might start with MRP or Rs.",
|
18 |
"unit": "per pack",
|
|
|
19 |
"parent_category": "from the above given list",
|
20 |
"ingredients": ["ingredient1", "ingredient2", "ingredient3"],
|
21 |
"calorie_count": "Would be in numbers",
|
@@ -26,8 +27,7 @@ base_prompt = dedent("""
|
|
26 |
"promotion_on_the_pack": "if any",
|
27 |
"type_of_product": "give this your understanding",
|
28 |
"pack_of_or_no_of_units": "No. of Units"
|
29 |
-
|
30 |
-
|
31 |
|
32 |
Analyse data from the above product description to give me the following details in JSON format:
|
33 |
Only return the output in the required json format.
|
|
|
12 |
|
13 |
Analyse data from the above product description to give me the following details in JSON format:
|
14 |
( return "null" where you don't have a answer)
|
15 |
+
|
16 |
"brand": "sample_brand",
|
17 |
"mrp": "The price might start with MRP or Rs.",
|
18 |
"unit": "per pack",
|
19 |
+
"Quantity": 1, ##num of products visible
|
20 |
"parent_category": "from the above given list",
|
21 |
"ingredients": ["ingredient1", "ingredient2", "ingredient3"],
|
22 |
"calorie_count": "Would be in numbers",
|
|
|
27 |
"promotion_on_the_pack": "if any",
|
28 |
"type_of_product": "give this your understanding",
|
29 |
"pack_of_or_no_of_units": "No. of Units"
|
30 |
+
|
|
|
31 |
|
32 |
Analyse data from the above product description to give me the following details in JSON format:
|
33 |
Only return the output in the required json format.
|
src/ocr.py
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
from azure.ai.formrecognizer import DocumentAnalysisClient
|
2 |
-
from azure.core.credentials import AzureKeyCredential
|
3 |
-
from config import key, endpoint
|
4 |
-
import easyocr
|
5 |
-
|
6 |
-
def azure_ocr(image_path):
|
7 |
-
try:
|
8 |
-
# Create a DocumentAnalysisClient instance
|
9 |
-
document_analysis_client = DocumentAnalysisClient(
|
10 |
-
endpoint=endpoint, credential=AzureKeyCredential(key)
|
11 |
-
)
|
12 |
-
|
13 |
-
# Open the image file and begin document analysis
|
14 |
-
with open(image_path, "rb") as image_file:
|
15 |
-
poller = document_analysis_client.begin_analyze_document(
|
16 |
-
"prebuilt-read", document=image_file
|
17 |
-
)
|
18 |
-
result = poller.result()
|
19 |
-
return result.content
|
20 |
-
except Exception as e:
|
21 |
-
print('Error occurred:', e)
|
22 |
-
return ""
|
23 |
-
|
24 |
-
def easy_ocr(image_path):
|
25 |
-
try:
|
26 |
-
reader = easyocr.Reader(['en','hi','bn','mr','ta','te'])
|
27 |
-
result = reader.readtext(image_path)
|
28 |
-
return result
|
29 |
-
except Exception as e:
|
30 |
-
print('Error occurred:', e)
|
31 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|