root-sajjan commited on
Commit
2bcc178
·
verified ·
1 Parent(s): 4e10dac

updated key

Browse files
Files changed (1) hide show
  1. llm/inference.py +111 -109
llm/inference.py CHANGED
@@ -1,110 +1,112 @@
1
- from huggingface_hub import InferenceClient
2
- import nltk
3
- import re
4
- import requests
5
-
6
- nltk.download('punkt')
7
- nltk.download('punkt_tab')
8
- nltk.download('averaged_perceptron_tagger')
9
-
10
-
11
- client = InferenceClient(api_key="xyz")
12
-
13
-
14
- def extract_product_info(text):
15
- # Initialize result dictionary
16
- result = {"brand": None, "model": None, "description": None, "price": None}
17
-
18
- # Extract price separately using regex (to avoid confusion with brand name)
19
- price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
20
- if price_match:
21
- result["price"] = price_match.group().replace("$", "").replace(",", "").strip()
22
- # Remove the price part from the text to prevent it from being included in the brand/model extraction
23
- text = text.replace(price_match.group(), "").strip()
24
-
25
- # Tokenize the remaining text and tag parts of speech
26
- tokens = nltk.word_tokenize(text)
27
- pos_tags = nltk.pos_tag(tokens)
28
-
29
- # Extract brand and model (Proper Nouns + Alphanumeric patterns)
30
- brand_parts = []
31
- model_parts = []
32
- description_parts = []
33
-
34
- # First part: Extract brand and model info
35
- for word, tag in pos_tags:
36
- if tag == 'NNP' or re.match(r'[A-Za-z0-9-]+', word):
37
- if len(brand_parts) == 0: # Assume the first proper noun is the brand
38
- brand_parts.append(word)
39
- else: # Model number tends to follow the brand
40
- model_parts.append(word)
41
- else:
42
- description_parts.append(word)
43
-
44
- # Assign brand and model to result dictionary
45
- if brand_parts:
46
- result["brand"] = " ".join(brand_parts)
47
- if model_parts:
48
- result["model"] = " ".join(model_parts)
49
-
50
- # Combine the remaining parts as description
51
- result["description"] = " ".join(description_parts)
52
-
53
- return result
54
-
55
-
56
-
57
- def extract_info(text):
58
- API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-large"
59
- headers = {"Authorization": "Bearer hf_xyz"}
60
- payload = {"inputs": f"From the given text, extract brand name, model number, description about it, and its average price in today's market. Give me back a python dictionary with keys as brand_name, model_number, desc, price. The text is {text}.",}
61
- response = requests.post(API_URL, headers=headers, json=payload)
62
- print('GOOGLEE LLM OUTPUTTTTTTT\n\n',response )
63
- output = response.json()
64
- print(output)
65
-
66
-
67
-
68
- def get_name(url, object):
69
- messages = [
70
- {
71
- "role": "user",
72
- "content": [
73
- {
74
- "type": "text",
75
- "text": f"Is this a {object}?. Can you guess what it is and give me the closest brand it resembles to? or a model number? And give me its average price in today's market in USD. In output, give me its normal name, model name, model number and price. separated by commas. No description is needed."
76
- },
77
- {
78
- "type": "image_url",
79
- "image_url": {
80
- "url": url
81
- }
82
- }
83
- ]
84
- }
85
- ]
86
-
87
- completion = client.chat.completions.create(
88
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
89
- messages=messages,
90
- max_tokens=500
91
- )
92
-
93
-
94
- print(f'\n\nNow output of LLM:\n')
95
- llm_result = completion.choices[0].message['content']
96
- print(llm_result)
97
- print(f'\n\nThat is the output')
98
-
99
- result = extract_product_info(llm_result)
100
- print(f'\n\nResult brand and price:{result}')
101
-
102
- # result2 = extract_info(llm_result)
103
- # print(f'\n\nFrom Google llm:{result2}')
104
-
105
- return result
106
-
107
- # url = "https://i.ibb.co/mNYvqDL/crop_39.jpg"
108
- # object="fridge"
109
-
 
 
110
  # get_name(url, object)
 
1
+ from huggingface_hub import InferenceClient
2
+ import nltk
3
+ import re
4
+ import requests
5
+ import os
6
+
7
+ api_key = os.getenv("HF_KEY")
8
+ nltk.download('punkt')
9
+ nltk.download('punkt_tab')
10
+ nltk.download('averaged_perceptron_tagger')
11
+
12
+
13
+ client = InferenceClient(api_key=api_key)
14
+
15
+
16
+ def extract_product_info(text):
17
+ # Initialize result dictionary
18
+ result = {"brand": None, "model": None, "description": None, "price": None}
19
+
20
+ # Extract price separately using regex (to avoid confusion with brand name)
21
+ price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
22
+ if price_match:
23
+ result["price"] = price_match.group().replace("$", "").replace(",", "").strip()
24
+ # Remove the price part from the text to prevent it from being included in the brand/model extraction
25
+ text = text.replace(price_match.group(), "").strip()
26
+
27
+ # Tokenize the remaining text and tag parts of speech
28
+ tokens = nltk.word_tokenize(text)
29
+ pos_tags = nltk.pos_tag(tokens)
30
+
31
+ # Extract brand and model (Proper Nouns + Alphanumeric patterns)
32
+ brand_parts = []
33
+ model_parts = []
34
+ description_parts = []
35
+
36
+ # First part: Extract brand and model info
37
+ for word, tag in pos_tags:
38
+ if tag == 'NNP' or re.match(r'[A-Za-z0-9-]+', word):
39
+ if len(brand_parts) == 0: # Assume the first proper noun is the brand
40
+ brand_parts.append(word)
41
+ else: # Model number tends to follow the brand
42
+ model_parts.append(word)
43
+ else:
44
+ description_parts.append(word)
45
+
46
+ # Assign brand and model to result dictionary
47
+ if brand_parts:
48
+ result["brand"] = " ".join(brand_parts)
49
+ if model_parts:
50
+ result["model"] = " ".join(model_parts)
51
+
52
+ # Combine the remaining parts as description
53
+ result["description"] = " ".join(description_parts)
54
+
55
+ return result
56
+
57
+
58
+
59
+ def extract_info(text):
60
+ API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-large"
61
+ headers = {"Authorization": f"Bearer {api_key}"}
62
+ payload = {"inputs": f"From the given text, extract brand name, model number, description about it, and its average price in today's market. Give me back a python dictionary with keys as brand_name, model_number, desc, price. The text is {text}.",}
63
+ response = requests.post(API_URL, headers=headers, json=payload)
64
+ print('GOOGLEE LLM OUTPUTTTTTTT\n\n',response )
65
+ output = response.json()
66
+ print(output)
67
+
68
+
69
+
70
+ def get_name(url, object):
71
+ messages = [
72
+ {
73
+ "role": "user",
74
+ "content": [
75
+ {
76
+ "type": "text",
77
+ "text": f"Is this a {object}?. Can you guess what it is and give me the closest brand it resembles to? or a model number? And give me its average price in today's market in USD. In output, give me its normal name, model name, model number and price. separated by commas. No description is needed."
78
+ },
79
+ {
80
+ "type": "image_url",
81
+ "image_url": {
82
+ "url": url
83
+ }
84
+ }
85
+ ]
86
+ }
87
+ ]
88
+
89
+ completion = client.chat.completions.create(
90
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
91
+ messages=messages,
92
+ max_tokens=500
93
+ )
94
+
95
+
96
+ print(f'\n\nNow output of LLM:\n')
97
+ llm_result = completion.choices[0].message['content']
98
+ print(llm_result)
99
+ print(f'\n\nThat is the output')
100
+
101
+ result = extract_product_info(llm_result)
102
+ print(f'\n\nResult brand and price:{result}')
103
+
104
+ # result2 = extract_info(llm_result)
105
+ # print(f'\n\nFrom Google llm:{result2}')
106
+
107
+ return result
108
+
109
+ # url = "https://i.ibb.co/mNYvqDL/crop_39.jpg"
110
+ # object="fridge"
111
+
112
  # get_name(url, object)