Spaces:

root-sajjan
/

backend_image_detection

Sleeping

App Files Files Community

root-sajjan commited on Nov 28, 2024

Commit

964fedc

verified ·

1 Parent(s): 39acce6

updated error handling

Browse files

Files changed (1) hide show

llm/inference.py +69 -2

llm/inference.py CHANGED Viewed

@@ -12,7 +12,7 @@ nltk.download('averaged_perceptron_tagger')
 client = InferenceClient(api_key=api_key)
 def extract_product_info(text):
     print(f'Extract function called!')
     # Initialize result dictionary
@@ -57,7 +57,74 @@ def extract_product_info(text):
     result["description"] = " ".join(description_parts)
     print(f'extract function returned:\n{result}')
     return result
 def extract_info(text):

 client = InferenceClient(api_key=api_key)
+'''
 def extract_product_info(text):
     print(f'Extract function called!')
     # Initialize result dictionary
     result["description"] = " ".join(description_parts)
     print(f'extract function returned:\n{result}')
     return result
+'''
+def extract_product_info(text):
+    print(f"Extract function called with input: {text}")
+    # Initialize result dictionary
+    result = {"brand": None, "model": None, "description": None, "price": None}
+    try:
+        # Extract price using regex
+        price_match = re.search(r'\$\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text)
+        print(f"Price match: {price_match}")
+        if price_match:
+            result["price"] = price_match.group().replace("$", "").replace(",", "").strip()
+            # Remove the price part from the text to prevent interference
+            text = text.replace(price_match.group(), "").strip()
+        print(f"Text after removing price: {text}")
+        # Tokenize the remaining text
+        try:
+            tokens = nltk.word_tokenize(text)
+            print(f"Tokens: {tokens}")
+        except Exception as e:
+            print(f"Error during tokenization: {e}")
+            # Fall back to a simple split if tokenization fails
+            tokens = text.split()
+            print(f"Fallback tokens: {tokens}")
+        # POS tagging
+        try:
+            pos_tags = nltk.pos_tag(tokens)
+            print(f"POS Tags: {pos_tags}")
+        except Exception as e:
+            print(f"Error during POS tagging: {e}")
+            # If POS tagging fails, create dummy tags
+            pos_tags = [(word, "NN") for word in tokens]
+            print(f"Fallback POS Tags: {pos_tags}")
+        # Extract brand, model, and description
+        brand_parts = []
+        model_parts = []
+        description_parts = []
+        for word, tag in pos_tags:
+            if tag == 'NNP' or re.match(r'[A-Za-z0-9-]+', word):
+                if len(brand_parts) == 0:  # Assume the first proper noun is the brand
+                    brand_parts.append(word)
+                else:  # Model number tends to follow the brand
+                    model_parts.append(word)
+            else:
+                description_parts.append(word)
+        # Assign values to the result dictionary
+        if brand_parts:
+            result["brand"] = " ".join(brand_parts)
+        if model_parts:
+            result["model"] = " ".join(model_parts)
+        if description_parts:
+            result["description"] = " ".join(description_parts)
+        print(f"Extract function returned: {result}")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        # Return a fallback result in case of a critical error
+        result["description"] = text
+        print(f"Fallback result: {result}")
+    return result
 def extract_info(text):