root-sajjan commited on
Commit
39acce6
·
verified ·
1 Parent(s): 94c0e24
Files changed (1) hide show
  1. llm/inference.py +4 -1
llm/inference.py CHANGED
@@ -20,14 +20,17 @@ def extract_product_info(text):
20
 
21
  # Extract price separately using regex (to avoid confusion with brand name)
22
  price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
 
23
  if price_match:
24
  result["price"] = price_match.group().replace("$", "").replace(",", "").strip()
25
  # Remove the price part from the text to prevent it from being included in the brand/model extraction
26
  text = text.replace(price_match.group(), "").strip()
27
-
28
  # Tokenize the remaining text and tag parts of speech
29
  tokens = nltk.word_tokenize(text)
 
30
  pos_tags = nltk.pos_tag(tokens)
 
31
 
32
  # Extract brand and model (Proper Nouns + Alphanumeric patterns)
33
  brand_parts = []
 
20
 
21
  # Extract price separately using regex (to avoid confusion with brand name)
22
  price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
23
+ print(f'price_match:{price_match}')
24
  if price_match:
25
  result["price"] = price_match.group().replace("$", "").replace(",", "").strip()
26
  # Remove the price part from the text to prevent it from being included in the brand/model extraction
27
  text = text.replace(price_match.group(), "").strip()
28
+ print(f'text:{text}')
29
  # Tokenize the remaining text and tag parts of speech
30
  tokens = nltk.word_tokenize(text)
31
+ print(f'tokens are:{tokens}')
32
  pos_tags = nltk.pos_tag(tokens)
33
+ print(tokens, pos_tags)
34
 
35
  # Extract brand and model (Proper Nouns + Alphanumeric patterns)
36
  brand_parts = []