akshit-g commited on
Commit
8ac4152
Β·
1 Parent(s): 431c5f2

add : files

Browse files
Files changed (3) hide show
  1. README.md +8 -6
  2. app.py +94 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: SeeForMe Image
3
- emoji: 🐠
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.8.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: SeeForMe
3
+ emoji: πŸ‘οΈπŸ‘οΈ
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.39.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: See For Me - Image Version
11
+ license: apache-2.0
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import re
4
+ import gradio as gr
5
+ from threading import Thread
6
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
+ from PIL import ImageDraw
8
+ from torchvision.transforms.v2 import Resize
9
+
10
+ import subprocess
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
+
13
+ model_id = "vikhyatk/moondream2"
14
+ revision = "2024-08-26"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
16
+ moondream = AutoModelForCausalLM.from_pretrained(
17
+ model_id, trust_remote_code=True, revision=revision,
18
+ torch_dtype=torch.bfloat16, device_map={"": "cuda"},
19
+ attn_implementation="flash_attention_2"
20
+ )
21
+ moondream.eval()
22
+
23
+
24
+ @spaces.GPU(duration=10)
25
+ def answer_question(img, prompt):
26
+ image_embeds = moondream.encode_image(img)
27
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
28
+ thread = Thread(
29
+ target=moondream.answer_question,
30
+ kwargs={
31
+ "image_embeds": image_embeds,
32
+ "question": prompt,
33
+ "tokenizer": tokenizer,
34
+ "streamer": streamer,
35
+ },
36
+ )
37
+ thread.start()
38
+
39
+ buffer = ""
40
+ for new_text in streamer:
41
+ buffer += new_text
42
+ yield buffer.strip()
43
+
44
+ def extract_floats(text):
45
+ # Regular expression to match an array of four floating point numbers
46
+ pattern = r"\[\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\s*\]"
47
+ match = re.search(pattern, text)
48
+ if match:
49
+ # Extract the numbers and convert them to floats
50
+ return [float(num) for num in match.groups()]
51
+ return None # Return None if no match is found
52
+
53
+
54
+ def extract_bbox(text):
55
+ bbox = None
56
+ if extract_floats(text) is not None:
57
+ x1, y1, x2, y2 = extract_floats(text)
58
+ bbox = (x1, y1, x2, y2)
59
+ return bbox
60
+
61
+ def process_answer(img, answer):
62
+ if extract_bbox(answer) is not None:
63
+ x1, y1, x2, y2 = extract_bbox(answer)
64
+ draw_image = Resize(768)(img)
65
+ width, height = draw_image.size
66
+ x1, x2 = int(x1 * width), int(x2 * width)
67
+ y1, y2 = int(y1 * height), int(y2 * height)
68
+ bbox = (x1, y1, x2, y2)
69
+ ImageDraw.Draw(draw_image).rectangle(bbox, outline="red", width=3)
70
+ return gr.update(visible=True, value=draw_image)
71
+
72
+ return gr.update(visible=False, value=None)
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown(
76
+ """
77
+ # See For Me : Real-time Video Assistance for the Visually Impaired using DL
78
+ The "See For Me" web application is designed to support visually challenged individuals by enhancing their ability to navigate and interact with their environment. Leveraging advancements in machine learning (ML) and deep learning (DL), the project aims to provide real-time visual assistance, enabling users to access and understand textual information in their surroundings.
79
+ """
80
+ )
81
+ with gr.Row():
82
+ prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
83
+ submit = gr.Button("Submit")
84
+ with gr.Row():
85
+ img = gr.Image(type="pil", label="Upload an Image")
86
+ with gr.Column():
87
+ output = gr.Markdown(label="Response")
88
+ ann = gr.Image(visible=False, label="Annotated Image")
89
+
90
+ submit.click(answer_question, [img, prompt], output)
91
+ prompt.submit(answer_question, [img, prompt], output)
92
+ output.change(process_answer, [img, output], ann, show_progress=False)
93
+
94
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ timm==0.9.12
2
+ transformers==4.44.0
3
+ einops==0.8.0
4
+ accelerate==0.32.1