Spaces:

asdfaman
/

sadhya

Sleeping

App Files Files Community

asdfaman commited on Nov 25, 2024

Commit

097bb8f

verified ·

1 Parent(s): e54b061

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -200

app.py CHANGED Viewed

@@ -1,67 +1,92 @@
 import streamlit as st
-from ultralytics import YOLO
-import tensorflow as tf  # Change this to import TensorFlow
 import numpy as np
-from PIL import Image, ImageOps, ImageDraw, ImageFont
 import pandas as pd
 import time
 from paddleocr import PaddleOCR, draw_ocr
-import re
-import dateparser
 import os
-import matplotlib.pyplot as plt
-#######Llama3bi integration########
-import torch
-from transformers import pipeline
-model_id = "meta-llama/Llama-3.2-3B-Instruct"
-pipe = pipeline(
-    "text-generation",
-    model=model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-)
 messages = [
-    {"role": "system", "content": """Your task is to get the product details out of the text given. The text given will be raw text from OCR of social media images of products,
-                                     and the goal is to get product details and description so that it can be used for amazon product listing. """},
 ]
 # Function to get Instagram post details
 import instaloader
 def get_instagram_post_details(post_url):
     try:
         shortcode = post_url.split('/')[-2]
         post = instaloader.Post.from_shortcode(L.context, shortcode)
-        # Retrieve caption and image URL
         caption = post.caption
-        image_url = post.url
-        return caption, image_url
     except Exception as e:
         return str(e), None
 # Initialize PaddleOCR model
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
 # Team details
 team_members = [
     {"name": "Aman Deep", "image": "aman.jpg"},  # Replace with actual paths to images
-    {"name": "Nandini", "image": "myimage.jpg"},
-    {"name": "Abhay Sharma", "image": "gaurav.jpg"},
-    {"name": "Ratan Prakash Mishra", "image": "anandimg.jpg"}
 ]
 # Function to preprocess the images for the model
 def preprocess_image(image):
     """
     Preprocess the input image for model prediction.
     Args:
         image (PIL.Image): Input image in PIL format.
     Returns:
         np.ndarray: Preprocessed image array ready for prediction.
     """
@@ -90,58 +115,6 @@ def preprocess_image(image):
         print(f"Error processing image: {e}")
         return None  # Return None if there's an error
-# Function to create a high-quality circular mask for an image
-def make_image_circular1(img, size=(256, 256)):
-    img = img.resize(size, Image.LANCZOS)
-    mask = Image.new("L", size, 0)
-    draw = ImageDraw.Draw(mask)
-    draw.ellipse((0, 0) + size, fill=255)
-    output = ImageOps.fit(img, mask.size, centering=(0.5, 0.5))
-    output.putalpha(mask)  # Apply the mask as transparency
-    return output
-# Function to check if a file exists
-def file_exists(file_path):
-    return os.path.isfile(file_path)
-def make_image_circular(image):
-    # Create a circular mask
-    mask = Image.new("L", image.size, 0)
-    draw = ImageDraw.Draw(mask)
-    draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255)
-    # Apply the mask to the image
-    circular_image = Image.new("RGB", image.size)
-    circular_image.paste(image.convert("RGBA"), (0, 0), mask)
-    return circular_image
-# Function to extract dates from recognized text using regex
-def extract_dates_with_dateparser(texts, result):
-    date_texts = []
-    date_boxes = []
-    date_scores = []
-    def is_potential_date(text):
-        valid_date_pattern = r'^(0[1-9]|[12][0-9]|3[01])[-/.]?(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \
-                             r'^(0[1-9]|[12][0-9]|3[01])[-/.]?[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$|' \
-                             r'^(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \
-                             r'^[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$'
-        return bool(re.match(valid_date_pattern, text))
-    dates_found = []
-    for i, text in enumerate(texts):
-        if is_potential_date(text):  # Only process texts that are potential dates
-            parsed_date = dateparser.parse(text, settings={'DATE_ORDER': 'DMY'})
-            if parsed_date:
-                dates_found.append(parsed_date.strftime('%Y-%m-%d'))  # Store as 'YYYY-MM-DD'
-                date_texts.append(text)  # Store the original text
-                date_boxes.append(result[0][i][0])  # Store the bounding box
-                date_scores.append(result[0][i][1][1])  # Store confidence score
-    return dates_found, date_texts, date_boxes, date_scores
 # Function to display circular images in a matrix format
 def display_images_in_grid(images, max_images_per_row=4):
     num_images = len(images)
@@ -163,10 +136,18 @@ def display_team_members(members, max_members_per_row=4):
         for j, member in enumerate(members[i * max_members_per_row:(i + 1) * max_members_per_row]):
             with cols[j]:
                 img = Image.open(member["image"])  # Load the image
-                circular_img = make_image_circular(img)  # Convert to circular format
                 st.image(circular_img, use_column_width=True)  # Display the circular image
                 st.write(member["name"])  # Display the name below the image
 # Title and description
 st.title("Amazon Smbhav")
 # Team Details with links
@@ -262,7 +243,6 @@ elif app_mode=="Project Details":
     - **Multi-Market Compatibility:** Expand support to other e-commerce platforms.
     This approach automates listing creation directly from social media content, helping sellers quickly launch optimized Amazon product pages.
     """)
 elif app_mode == "Team Details":
@@ -275,7 +255,6 @@ elif app_mode == "Task 1":
     st.write("## Task 1: 🖼️ OCR to Extract Details 📄")
     st.write("Using OCR to extract details from product packaging material, including brand name and pack size.")
     # Instantiate Instaloader
     L = instaloader.Instaloader()
@@ -286,39 +265,39 @@ elif app_mode == "Task 1":
     post_url = st.text_input("Enter Instagram Post URL:")
     if post_url:
-        caption, image_path = get_instagram_post_details(post_url)
-    if image_path and os.path.exists(image_path):
-        st.subheader("Caption:")
-        st.write(caption)
-        st.subheader("Image:")
-        # Load and display the image
-        image = Image.open(image_path)
-        st.image(image, use_column_width=True)
-        # Convert image to numpy array for OCR processing
-        img_array = np.array(image)
-        # Perform OCR on the image
-        st.write(f"Extracting details from {uploaded_image.name}...")
-        result = ocr.ocr(img_array, cls=True)
-        #############################
-        #OCR result text to be parsed here through LLM and get product listing content.
-        messages.append({"role": "user", "content": ""})
-        outputs = pipe(
-            messages,
-            max_new_tokens=256,
-        )
-        productListingContent = outputs[0]["generated_text"][-1]
-        st.markdown(productListingContent)
-        #############################
-        # Clean up (optional)
-        os.remove(image_path)
-    else:
-        st.error("Failed to retrieve the post details. Please check the URL.")
     # File uploader for images (supports multiple files)
     uploaded_files = st.file_uploader("Upload images of products", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
@@ -329,112 +308,46 @@ elif app_mode == "Task 1":
         for uploaded_file in uploaded_files:
             img = Image.open(uploaded_file)
-            circular_img = make_image_circular(img)  # Create circular images
-            circular_images.append(circular_img)
         # Display the circular images in a matrix/grid format
         display_images_in_grid(circular_images, max_images_per_row=4)
-        # Function to simulate loading process with a progress bar
-        def simulate_progress():
-            progress_bar = st.progress(0)
-            for percent_complete in range(100):
-                time.sleep(0.02)
-                progress_bar.progress(percent_complete + 1)
-        # Function to remove gibberish using regex (removes non-alphanumeric chars, filters out very short text)
-        def clean_text(text):
-            # Keep text with letters, digits, and spaces, and remove short/irrelevant text
-            return re.sub(r'[^a-zA-Z0-9\s]', '', text).strip()
-        # Function to extract the most prominent text (product name) and other details
-        def extract_product_info(results):
-            product_name = ""
-            product_details = ""
-            largest_text_size = 0
-            for line in results:
-                for box in line:
-                    text, confidence = box[1][0], box[1][1]
-                    text_size = box[0][2][1] - box[0][0][1]  # Calculate height of the text box
-                    # Clean the text to avoid gibberish
-                    clean_text_line = clean_text(text)
-                    if confidence > 0.7 and len(clean_text_line) > 2:  # Only consider confident, meaningful text
-                        if text_size > largest_text_size:  # Assume the largest text is the product name
-                            largest_text_size = text_size
-                            product_name = clean_text_line
-                        else:
-                            product_details += clean_text_line + " "
-            return product_name, product_details.strip()
         if st.button("Start Analysis"):
             simulate_progress()
             # Loop through each uploaded image and process them
             for uploaded_image in uploaded_files:
                 # Load the uploaded image
                 image = Image.open(uploaded_image)
-                # st.image(image, caption=f'Uploaded Image: {uploaded_image.name}', use_column_width=True)
                 # Convert image to numpy array for OCR processing
                 img_array = np.array(image)
                 # Perform OCR on the image
                 st.write(f"Extracting details from {uploaded_image.name}...")
                 result = ocr.ocr(img_array, cls=True)
-                #############################
                 #OCR result text to be parsed here through LLM and get product listing content.
-                messages.append({"role": "user", "content": ""})
-                outputs = pipe(
-                    messages,
-                    max_new_tokens=256,
                 )
-                productListingContent = outputs[0]["generated_text"][-1]
-                st.markdown(productListingContent)
-                #############################
-                # Process the OCR result to extract product name and properties
-                product_name, product_details = extract_product_info(result)
-                # UI display for single image product details
-                st.markdown("---")
-                st.markdown(f"### **Product Name:** `{product_name}`")
-                st.write(f"**Product Properties:** {product_details}")
                 st.markdown("---")
     else:
         st.write("Please upload images to extract product details.")
-def make_image_circular1(image):
-    # Create a circular mask
-    mask = Image.new("L", image.size, 0)
-    draw = ImageDraw.Draw(mask)
-    draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255)
-    # Apply the mask to the image
-    circular_image = Image.new("RGB", image.size)
-    circular_image.paste(image.convert("RGBA"), (0, 0), mask)
-    return circular_image
-def display_images_in_grid1(images, max_images_per_row=4):
-    rows = (len(images) + max_images_per_row - 1) // max_images_per_row  # Calculate number of rows needed
-    for i in range(0, len(images), max_images_per_row):
-        cols_to_show = images[i:i + max_images_per_row]
-        # Prepare to display in a grid format
-        cols = st.columns(max_images_per_row)  # Create columns dynamically
-        for idx, img in enumerate(cols_to_show):
-            img = img.convert("RGB")  # Ensure the image is in RGB mode
-            if idx < len(cols):
-                cols[idx].image(img, use_column_width=True)
 # Footer with animation
 st.markdown("""
     <style>
@@ -452,4 +365,4 @@ st.markdown("""
     <div class="footer">
         <p>© 2024 Amazon Smbhav Challenge. All rights reserved.</p>
     </div>
-""", unsafe_allow_html=True)

+from itertools import product
 import streamlit as st
 import numpy as np
 import pandas as pd
+from PIL import Image, ImageOps, ImageDraw, ImageFont
 import time
 from paddleocr import PaddleOCR, draw_ocr
 import os
+from dotenv import load_dotenv
+import os
+from huggingface_hub import login
+load_dotenv()  # Load .env file
+huggingface_token = os.getenv("HF_TOKEN")
+login(huggingface_token)
+##########################LLAMA3BI################################
+from huggingface_hub import InferenceClient
+client = InferenceClient(api_key=huggingface_token)
 messages = [
+    {"role": "system", "content": """Your task is to get the product details out of the text given.
+                                    The text given will be raw text from OCR of social media images of products,
+                                    and the goal is to get product details and description so that it can be used for amazon product listing.
+                                    TRY TO KEEP THE LISTING IN FOLLOWING FORMAT.
+                                    📦 [Product Name]
+                                    💰 Price: $XX.XX
+                                    ✨ Key Features:
+                                    •⁠  ⁠[Main Feature 1]
+                                    •⁠  ⁠[Main Feature 2]
+                                    •⁠  ⁠[Main Feature 3]
+                                    📸 [Product Image]
+                                    🏷 Available Now on Amazon
+                                    ✈️ Prime Shipping Available
+                                    🛍 Shop Now: [Link]
+                                    🔍 Search: [Main Keywords]
+                                    [#RelevantHashtags] """},
 ]
 # Function to get Instagram post details
 import instaloader
+from io import BytesIO
+import requests
 def get_instagram_post_details(post_url):
     try:
+        # Initialize Instaloader
+        L = instaloader.Instaloader()
+        # Extract shortcode from URL
         shortcode = post_url.split('/')[-2]
+        # Load post using Instaloader
         post = instaloader.Post.from_shortcode(L.context, shortcode)
+        # Retrieve caption
         caption = post.caption
+        # Retrieve the image URL
+        image_url = post.url
+        # Fetch image using requests
+        response = requests.get(image_url)
+        response.raise_for_status()  # Raise an exception for failed requests
+        # Open image using PIL and convert to NumPy array
+        img = Image.open(BytesIO(response.content))
+        img_array = np.array(img)
+        return caption, img_array
     except Exception as e:
         return str(e), None
 # Initialize PaddleOCR model
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
 # Team details
 team_members = [
     {"name": "Aman Deep", "image": "aman.jpg"},  # Replace with actual paths to images
+    {"name": "Nandini", "image": "nandini.jpg"},
+    {"name": "Abhay Sharma", "image": "abhay.jpg"},
+    {"name": "Ratan Prakash Mishra", "image": "ratan.jpg"}
 ]
 # Function to preprocess the images for the model
 def preprocess_image(image):
     """
     Preprocess the input image for model prediction.
     Args:
         image (PIL.Image): Input image in PIL format.
     Returns:
         np.ndarray: Preprocessed image array ready for prediction.
     """
         print(f"Error processing image: {e}")
         return None  # Return None if there's an error
 # Function to display circular images in a matrix format
 def display_images_in_grid(images, max_images_per_row=4):
     num_images = len(images)
         for j, member in enumerate(members[i * max_members_per_row:(i + 1) * max_members_per_row]):
             with cols[j]:
                 img = Image.open(member["image"])  # Load the image
+                # circular_img = make_image_circular(img)  # Convert to circular format
+                circular_img = img
                 st.image(circular_img, use_column_width=True)  # Display the circular image
                 st.write(member["name"])  # Display the name below the image
+# Function to simulate loading process with a progress bar
+def simulate_progress():
+    progress_bar = st.progress(0)
+    for percent_complete in range(100):
+        time.sleep(0.02)
+        progress_bar.progress(percent_complete + 1)
 # Title and description
 st.title("Amazon Smbhav")
 # Team Details with links
     - **Multi-Market Compatibility:** Expand support to other e-commerce platforms.
     This approach automates listing creation directly from social media content, helping sellers quickly launch optimized Amazon product pages.
     """)
 elif app_mode == "Team Details":
     st.write("## Task 1: 🖼️ OCR to Extract Details 📄")
     st.write("Using OCR to extract details from product packaging material, including brand name and pack size.")
     # Instantiate Instaloader
     L = instaloader.Instaloader()
     post_url = st.text_input("Enter Instagram Post URL:")
     if post_url:
+        caption, imageArray = get_instagram_post_details(post_url)
+        if caption or imageArray.shape[0] > 0:
+            st.subheader("Caption:")
+            st.write(caption)
+            st.subheader("Image:")
+            st.image(imageArray, use_column_width=True)
+            # Convert image to numpy array for OCR processing
+            img_array = imageArray
+            #############################
+            # Perform OCR on the image
+            result = ocr.ocr(img_array, cls=True)
+            text = ""
+            for line in result:
+                for box in line:
+                    currText, confidence = box[1][0], box[1][1]
+                    text += currText + " "
+            message = f"image ocr: {text} Caption: {caption}"
+            st.write(message)
+            #OCR result text to be parsed here through LLM and get product listing content.
+            simulate_progress()
+            messages.append({"role": "user", "content": message})
+            completion = client.chat.completions.create(
+                model="meta-llama/Llama-3.2-3B-Instruct",
+                messages=messages,
+                max_tokens=500
+            )
+            productListingContent = completion.choices[0].message
+            st.markdown(productListingContent.content)
+            #############################
+        else:
+            st.error("Failed to retrieve the post details. Please check the URL. ////////")
     # File uploader for images (supports multiple files)
     uploaded_files = st.file_uploader("Upload images of products", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
         for uploaded_file in uploaded_files:
             img = Image.open(uploaded_file)
+            circular_images.append(img)
         # Display the circular images in a matrix/grid format
         display_images_in_grid(circular_images, max_images_per_row=4)
         if st.button("Start Analysis"):
             simulate_progress()
             # Loop through each uploaded image and process them
             for uploaded_image in uploaded_files:
                 # Load the uploaded image
                 image = Image.open(uploaded_image)
                 # Convert image to numpy array for OCR processing
                 img_array = np.array(image)
+                # #############################
                 # Perform OCR on the image
                 st.write(f"Extracting details from {uploaded_image.name}...")
                 result = ocr.ocr(img_array, cls=True)
+                text = ""
+                for line in result:
+                    for box in line:
+                        currText, confidence = box[1][0], box[1][1]
+                        text += currText + " "
+                st.write(f"OCR Result: {text}")
                 #OCR result text to be parsed here through LLM and get product listing content.
+                simulate_progress()
+                messages.append({"role": "user", "content": text})
+                completion = client.chat.completions.create(
+                    model="meta-llama/Llama-3.2-3B-Instruct",
+                    messages=messages,
+                    max_tokens=500
                 )
+                productListingContent = completion.choices[0].message
+                st.markdown(productListingContent.content)
                 st.markdown("---")
+                #########################################
     else:
         st.write("Please upload images to extract product details.")
 # Footer with animation
 st.markdown("""
     <style>
     <div class="footer">
         <p>© 2024 Amazon Smbhav Challenge. All rights reserved.</p>
     </div>
+""", unsafe_allow_html=True)