|
from huggingface_hub import InferenceClient |
|
import nltk |
|
import re |
|
import requests |
|
import os |
|
|
|
api_key = os.getenv("HF_KEY") |
|
nltk.download('punkt') |
|
nltk.download('punkt_tab') |
|
nltk.download('averaged_perceptron_tagger') |
|
|
|
|
|
client = InferenceClient(api_key=api_key) |
|
|
|
|
|
def extract_product_info(text): |
|
print(f'Extract function called!') |
|
|
|
result = {"brand": None, "model": None, "description": None, "price": None} |
|
|
|
|
|
price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text) |
|
print(f'price_match:{price_match}') |
|
if price_match: |
|
result["price"] = price_match.group().replace("$", "").replace(",", "").strip() |
|
|
|
text = text.replace(price_match.group(), "").strip() |
|
print(f'text:{text}') |
|
|
|
tokens = nltk.word_tokenize(text) |
|
print(f'tokens are:{tokens}') |
|
pos_tags = nltk.pos_tag(tokens) |
|
print(tokens, pos_tags) |
|
|
|
|
|
brand_parts = [] |
|
model_parts = [] |
|
description_parts = [] |
|
|
|
|
|
for word, tag in pos_tags: |
|
if tag == 'NNP' or re.match(r'[A-Za-z0-9-]+', word): |
|
if len(brand_parts) == 0: |
|
brand_parts.append(word) |
|
else: |
|
model_parts.append(word) |
|
else: |
|
description_parts.append(word) |
|
|
|
|
|
if brand_parts: |
|
result["brand"] = " ".join(brand_parts) |
|
if model_parts: |
|
result["model"] = " ".join(model_parts) |
|
|
|
|
|
result["description"] = " ".join(description_parts) |
|
print(f'extract function returned:\n{result}') |
|
return result |
|
|
|
|
|
|
|
def extract_info(text): |
|
API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-large" |
|
headers = {"Authorization": f"Bearer {api_key}"} |
|
payload = {"inputs": f"From the given text, extract brand name, model number, description about it, and its average price in today's market. Give me back a python dictionary with keys as brand_name, model_number, desc, price. The text is {text}.",} |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
print('GOOGLEE LLM OUTPUTTTTTTT\n\n',response ) |
|
output = response.json() |
|
print(output) |
|
|
|
|
|
def get_name(url, object): |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": f"Is this a {object}?. Can you guess what it is and give me the closest brand it resembles to? or a model number? And give me its average price in today's market in USD. In output, give me its normal name, model name, model number and price. separated by commas. No description is needed." |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": url |
|
} |
|
} |
|
] |
|
} |
|
] |
|
|
|
completion = client.chat.completions.create( |
|
model="meta-llama/Llama-3.2-11B-Vision-Instruct", |
|
messages=messages, |
|
max_tokens=500 |
|
) |
|
|
|
print(f'\n\nNow output of LLM:\n') |
|
llm_result = completion.choices[0].message['content'] |
|
print(llm_result) |
|
|
|
|
|
print(f"Extracting from the output now, function calling") |
|
result = extract_product_info(llm_result) |
|
print(f'\n\nResult brand and price:{result}') |
|
print(f'\n\nThat is the output') |
|
|
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|