Exched commited on
Commit
5c78d10
·
verified ·
1 Parent(s): 426ae67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -23
app.py CHANGED
@@ -1,40 +1,55 @@
1
  import gradio as gr
2
  import torch
 
3
  from PIL import Image
4
  import requests
5
  from io import BytesIO
6
 
7
- # Load YOLOv5 pre-trained model from Hugging Face
8
- model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # You can choose other versions like yolov5m or yolov5l
 
9
 
10
- # Function for object detection
11
- def detect_objects(input_image):
12
- # If the input is a URL, download the image
 
 
 
13
  if isinstance(input_image, str):
14
  response = requests.get(input_image)
15
  img = Image.open(BytesIO(response.content))
16
  else:
17
  img = Image.fromarray(input_image)
18
 
19
- # Run YOLOv5 object detection
20
- results = model(img)
21
-
22
- # Render results on image
23
- results.render() # Render boxes on the image
24
 
25
- # Return image with detections
26
- output_image = results.imgs[0]
27
- return Image.fromarray(output_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Create Gradio interface
30
- interface = gr.Interface(
31
- fn=detect_objects,
32
- inputs=gr.inputs.Image(type="numpy", label="Upload an image"),
33
- outputs=gr.outputs.Image(type="pil", label="Detected Image"),
34
- title="YOLOv5 Object Detection",
35
- description="Upload an image and detect objects using YOLOv5 model. The model can identify objects like people, cars, animals, and more.",
36
- theme="huggingface"
37
- )
38
 
39
  # Launch the interface
40
- interface.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPProcessor, CLIPModel
4
  from PIL import Image
5
  import requests
6
  from io import BytesIO
7
 
8
+ # Load CLIP model for image classification
9
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
10
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
11
 
12
+ # Load Mistral-7B-Instruct-v0.3 model for chat
13
+ mistral_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
14
+ mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
15
+
16
+ # Function for image classification with CLIP (anime recognition)
17
+ def classify_image(input_image):
18
  if isinstance(input_image, str):
19
  response = requests.get(input_image)
20
  img = Image.open(BytesIO(response.content))
21
  else:
22
  img = Image.fromarray(input_image)
23
 
24
+ # Prepare the image and text (anime-related labels)
25
+ inputs = clip_processor(text=["anime", "cartoon", "realistic", "painting"], images=img, return_tensors="pt", padding=True)
26
+ outputs = clip_model(**inputs)
27
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
28
+ probs = logits_per_image.softmax(dim=1) # we can apply softmax to get the label probabilities
29
 
30
+ # Return the predicted class label
31
+ labels = ["anime", "cartoon", "realistic", "painting"]
32
+ predicted_label = labels[probs.argmax()]
33
+ return predicted_label
34
+
35
+ # Function for chat with Mistral 7B Instruct
36
+ def chat_with_mistral(input_text):
37
+ inputs = mistral_tokenizer(input_text, return_tensors="pt")
38
+ outputs = mistral_model.generate(inputs["input_ids"], max_length=150)
39
+ response = mistral_tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ return response
41
+
42
+ # Create Gradio interface for both Image Classification and Chat
43
+ with gr.Blocks() as demo:
44
+ with gr.Tab("Chat with Mistral"):
45
+ chat_input = gr.Textbox(label="Ask Mistral 7B", placeholder="Type your question here...")
46
+ chat_output = gr.Textbox(label="Mistral's Reply", interactive=False)
47
+ chat_input.submit(chat_with_mistral, inputs=chat_input, outputs=chat_output)
48
 
49
+ with gr.Tab("Classify Anime Image"):
50
+ img_input = gr.Image(type="numpy", label="Upload Image for Anime Classification")
51
+ img_output = gr.Textbox(label="Predicted Label", interactive=False)
52
+ img_input.change(classify_image, inputs=img_input, outputs=img_output)
 
 
 
 
 
53
 
54
  # Launch the interface
55
+ demo.launch()