engajify commited on
Commit
73a0a63
·
verified ·
1 Parent(s): 2d286d6

Upload 5 files

Browse files
Files changed (5) hide show
  1. F50xXeBbcAA0IIx.jpeg +0 -0
  2. README.md +18 -3
  3. app.py +107 -0
  4. gitattributes +35 -0
  5. requirements.txt +5 -0
F50xXeBbcAA0IIx.jpeg ADDED
README.md CHANGED
@@ -1,12 +1,27 @@
1
  ---
2
- title: Test 1
3
- emoji: 🦀
4
- colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.31.5
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Action Detection In Images
3
+ emoji: 😻
4
+ colorFrom: blue
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.31.5
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ # General Action Classifier
16
+
17
+ This is a Gradio interface that allows users to upload an image and specify candidate labels to check if a certain action is present in the image. The app uses a CLIP-ViT model to classify the image based on the provided labels.
18
+
19
+ ## How to Use
20
+
21
+ 1. Upload an image.
22
+ 2. Enter candidate labels separated by commas.
23
+ 3. The app will classify the image and display the results.
24
+
25
+ ## Example
26
+
27
+ For instance, to check if a person is holding a beverage, you can enter labels like: "human with beverage, human, beverage".
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import ImageFilter, Image
3
+ from transformers import AutoModelForZeroShotImageClassification, AutoProcessor
4
+ import torch
5
+
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
+ # Initialize the CLIP-ViT model
9
+ checkpoint = "openai/clip-vit-large-patch14-336"
10
+ model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
11
+ model = model.to(device)
12
+
13
+ processor = AutoProcessor.from_pretrained(checkpoint)
14
+
15
+ def classify_image(image, candidate_labels):
16
+ messages = []
17
+ candidate_labels = [label.strip() for label in candidate_labels.split(",") if label.strip()] + ["other"]
18
+
19
+ if len(candidate_labels) == 1:
20
+ candidate_labels.append("other")
21
+
22
+ # Blur the image
23
+ image = image.filter(ImageFilter.GaussianBlur(radius=5))
24
+
25
+ # Process the image and candidate labels
26
+ inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
27
+ inputs = {key: val.to(device) for key, val in inputs.items()}
28
+
29
+ # Get model's output
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+
33
+ logits = outputs.logits_per_image[0]
34
+ probs = logits.softmax(dim=-1).cpu().numpy()
35
+
36
+ # Organize results
37
+ results = [
38
+ {"score": float(score), "label": candidate_label}
39
+ for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
40
+ ]
41
+
42
+ # Convert results to list of lists for Dataframe
43
+ results_for_df = [[res['label'], res['score']] for res in results]
44
+
45
+ # Decision-making logic
46
+ top_label = results[0]["label"]
47
+ second_label = results[1]["label"] if len(results) > 1 else "None"
48
+
49
+ # Add messages to understand the scores
50
+ messages.append(f"Top label: {top_label} with score: {results[0]['score']:.2f}")
51
+ messages.append(f"Second label: {second_label} with score: {results[1]['score']:.2f}" if len(results) > 1 else "")
52
+
53
+ # Example decision logic for specific scenarios (can be customized further)
54
+ if top_label == candidate_labels[0] and results[0]["score"] >= 0.58 and second_label != "other":
55
+ messages.append("Triggered the new 0.58 check!")
56
+ result = True
57
+ elif top_label == candidate_labels[0] and second_label in candidate_labels[:-1] and (results[0]['score'] + results[1]['score']) >= 0.90:
58
+ messages.append("Triggered the 90% combined check!")
59
+ result = True
60
+ elif top_label == candidate_labels[1] and second_label == candidate_labels[0] and (results[0]['score'] + results[1]['score']) >= 0.95:
61
+ messages.append("Triggered the 90% reverse order check!")
62
+ result = True
63
+ else:
64
+ result = False
65
+
66
+ return result, top_label, results_for_df, messages
67
+
68
+ # Default values
69
+ default_labels = "human with beverage,human,beverage"
70
+ default_image_path = "F50xXeBbcAA0IIx.jpeg"
71
+
72
+ # Load default image
73
+ default_image = Image.open(default_image_path)
74
+
75
+ iface = gr.Interface(
76
+ fn=classify_image,
77
+ inputs=[
78
+ gr.Image(type="pil", label="Upload an Image", value=default_image),
79
+ gr.Textbox(label="Candidate Labels (comma separated)", value=default_labels)
80
+ ],
81
+ outputs=[
82
+ gr.Label(label="Result"),
83
+ gr.Textbox(label="Top Label"),
84
+ gr.Dataframe(headers=["Label", "Score"], label="Details"),
85
+ gr.Textbox(label="Messages")
86
+ ],
87
+ title="General Action Classifier",
88
+ description="""
89
+ **Instructions:**
90
+
91
+ 1. **Upload an Image**: Drag and drop an image or click to upload an image file. A default image is provided.
92
+
93
+ 2. **Enter Candidate Labels**:
94
+ - Provide candidate labels separated by commas.
95
+ - For example: `human with beverage,human,beverage`
96
+ - The label "other" will automatically be added to the list of candidate labels.
97
+ - You can enter just one label, and "other" will still be added automatically. Default labels are provided.
98
+
99
+ 3. **View Results**:
100
+ - The result will indicate whether the specified action (top label) is present in the image.
101
+ - Detailed scores for each label will be displayed in a table.
102
+ - Additional messages explaining the decision process will also be shown.
103
+ """
104
+ )
105
+
106
+ if __name__ == "__main__":
107
+ iface.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ pillow
5
+ requests