whyumesh commited on
Commit
108b366
·
verified ·
1 Parent(s): c28fb09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -8
app.py CHANGED
@@ -18,6 +18,21 @@ def load_model():
18
 
19
  model, processor = load_model()
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  @spaces.GPU
22
  def process_image(image, prompt):
23
  messages = [
@@ -115,25 +130,43 @@ def process_video(video, prompt, max_frames=16, frame_interval=30, max_resolutio
115
  @spaces.GPU
116
  def process_content(content, prompt):
117
  if content is None:
118
- return "Please upload an image or video file."
 
 
119
 
120
  if content.name.lower().endswith(('.png', '.jpg', '.jpeg')):
121
- return process_image(Image.open(content.name), prompt)
122
  elif content.name.lower().endswith(('.mp4', '.avi', '.mov')):
123
- return process_video(content, prompt)
124
  else:
125
- return "Unsupported file type. Please provide an image or video file."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Gradio interface
128
  iface = gr.Interface(
129
  fn=process_content,
130
  inputs=[
131
- gr.File(label="Upload Image or Video"),
132
- gr.Textbox(label="Enter your prompt")
 
133
  ],
134
  outputs="text",
135
- title="Image and Video Description",
136
- description="Upload an image or video and enter a prompt to get a description or analysis.",
 
137
  )
138
 
139
  if __name__ == "__main__":
 
18
 
19
  model, processor = load_model()
20
 
21
+ SYSTEM_PROMPT = """You are an AI assistant specialized in analyzing images and videos of code editors like VSCode. Your primary task is to identify and extract code snippets visible in these visual inputs. Follow these guidelines:
22
+
23
+ 1. Focus on recognizing and extracting code, ignoring non-code elements like toolbars, file explorers, or terminals.
24
+ 2. If multiple code snippets or files are visible, extract and separate them clearly.
25
+ 3. Preserve the syntax, indentation, and structure of the code as seen in the image/video.
26
+ 4. If you detect syntax errors or warning highlights in the code, mention them but do not attempt to correct the code.
27
+ 5. If the code is partially visible or cut off, extract what you can see and indicate where the code might be incomplete.
28
+ 6. Identify the programming language if possible, based on syntax or file extensions visible.
29
+ 7. If asked about specific elements (e.g., "What's on line 20?"), focus on that particular part of the code.
30
+ 8. Do not invent or assume code that isn't visible in the image/video.
31
+ 9. If the image/video doesn't contain code or an IDE, politely inform the user.
32
+
33
+ Always strive for accuracy in code extraction, as the user may need to directly use or analyze the extracted code."""
34
+
35
+
36
  @spaces.GPU
37
  def process_image(image, prompt):
38
  messages = [
 
130
  @spaces.GPU
131
  def process_content(content, prompt):
132
  if content is None:
133
+ return "Please upload an image or video file of a code editor."
134
+
135
+ full_prompt = f"{SYSTEM_PROMPT}\n\nUser request: {prompt}"
136
 
137
  if content.name.lower().endswith(('.png', '.jpg', '.jpeg')):
138
+ return process_image(Image.open(content.name), full_prompt)
139
  elif content.name.lower().endswith(('.mp4', '.avi', '.mov')):
140
+ return process_video(content, full_prompt)
141
  else:
142
+ return "Unsupported file type. Please provide an image or video file of a code editor."
143
+
144
+ # Predefined prompts
145
+ PREDEFINED_PROMPTS = [
146
+ "Extract all visible code from this IDE screenshot.",
147
+ "What programming language is used in this code editor image?",
148
+ "Are there any syntax errors highlighted in this VSCode recording?",
149
+ "Extract the function definition starting at line 15 in this image.",
150
+ "What are the variable names defined in the visible code?",
151
+ "Extract and separate all different code snippets or files visible in this IDE recording.",
152
+ "Is there any commented code in this screenshot? If so, extract it.",
153
+ "What coding style or convention is being used in this code (e.g., camelCase, snake_case)?",
154
+ "Are there any import statements or library inclusions visible in this IDE image?",
155
+ "Extract only the CSS code visible in this multi-file editor screenshot."
156
+ ]
157
 
158
  # Gradio interface
159
  iface = gr.Interface(
160
  fn=process_content,
161
  inputs=[
162
+ gr.File(label="Upload Image or Video of Code Editor"),
163
+ gr.Dropdown(choices=PREDEFINED_PROMPTS, label="Select a predefined prompt", type="value"),
164
+ gr.Textbox(label="Or enter a custom prompt")
165
  ],
166
  outputs="text",
167
+ title="Code Extraction from IDE Screenshots/Recordings",
168
+ description="Upload an image or video of a code editor and select a predefined prompt or enter a custom one to extract and analyze the code.",
169
+ allow_flagging="never"
170
  )
171
 
172
  if __name__ == "__main__":