VishalD1234 commited on
Commit
667a64e
·
verified ·
1 Parent(s): 3f60308

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -42
app.py CHANGED
@@ -10,13 +10,6 @@ MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
10
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
11
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
12
 
13
- # Configurable constants
14
- NUM_FRAMES = 24 # Default number of frames to extract
15
- MAX_NEW_TOKENS = 2048
16
- TOP_K = 1
17
- TOP_P = 0.1
18
- DEFAULT_TEMPERATURE = 1.0
19
-
20
  # Delay Reasons for Each Manufacturing Step
21
  DELAY_REASONS = {
22
  "Step 1": ["Delay in Bead Insertion", "Lack of raw material"],
@@ -98,34 +91,38 @@ def get_step_info(step_number):
98
  "Standard Time": "7 seconds",
99
  "Video_substeps_expected": {
100
  "0-3 seconds": "Technician unloads(removes) carcass(tire) from the machine."
101
- }
 
 
 
 
102
  }
103
  }
104
 
105
  return step_details.get(step_number, {"Error": "Invalid step number. Please provide a valid step number."})
106
 
 
 
107
  def load_video(video_data, strategy='chat'):
108
  """Loads and processes video data into a format suitable for model input."""
109
  bridge.set_bridge('torch')
110
-
 
111
  if isinstance(video_data, str):
112
  decord_vr = VideoReader(video_data, ctx=cpu(0))
113
  else:
114
  decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
115
 
 
116
  total_frames = len(decord_vr)
117
- if total_frames < NUM_FRAMES:
118
- raise ValueError("Uploaded video is too short for meaningful analysis.")
119
-
120
  timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
121
  max_second = round(max(timestamps)) + 1
122
 
123
- frame_id_list = []
124
  for second in range(max_second):
125
  closest_num = min(timestamps, key=lambda x: abs(x - second))
126
  index = timestamps.index(closest_num)
127
  frame_id_list.append(index)
128
- if len(frame_id_list) >= NUM_FRAMES:
129
  break
130
 
131
  video_data = decord_vr.get_batch(frame_id_list)
@@ -154,10 +151,7 @@ def load_model():
154
 
155
  def predict(prompt, video_data, temperature, model, tokenizer):
156
  """Generates predictions based on the video and textual prompt."""
157
- try:
158
- video = load_video(video_data, strategy='chat')
159
- except ValueError as e:
160
- return f"Error loading video: {str(e)}"
161
 
162
  inputs = model.build_conversation_input_ids(
163
  tokenizer=tokenizer,
@@ -175,20 +169,20 @@ def predict(prompt, video_data, temperature, model, tokenizer):
175
  }
176
 
177
  gen_kwargs = {
178
- "max_new_tokens": MAX_NEW_TOKENS,
179
- "pad_token_id": tokenizer.pad_token_id,
180
- "top_k": TOP_K,
181
  "do_sample": False,
182
- "top_p": TOP_P,
183
- "temperature": temperature or DEFAULT_TEMPERATURE,
184
  }
185
 
186
  with torch.no_grad():
187
  outputs = model.generate(**inputs, **gen_kwargs)
188
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
189
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
190
 
191
- return f"Analysis Result:\n{response}"
192
 
193
  def get_analysis_prompt(step_number):
194
  """Constructs the prompt for analyzing delay reasons based on the selected step."""
@@ -199,23 +193,100 @@ def get_analysis_prompt(step_number):
199
 
200
  step_name = step_info["Name"]
201
  standard_time = step_info["Standard Time"]
202
- substeps = step_info["Video_substeps_expected"]
203
- delay_reasons = DELAY_REASONS.get(f"Step {step_number}", ["No specific reasons provided."])
204
 
205
- substeps_text = "\n".join([f"- {time}: {action}" for time, action in substeps.items()])
206
- reasons_text = "\n".join([f"- {reason}" for reason in delay_reasons])
207
-
208
  return f"""
209
- You are an AI expert system analyzing manufacturing delays in tire production. Below are the details:
210
- Step: {step_number} - {step_name}
211
- Standard Time: {standard_time}
212
- Substeps Expected in Video:
213
- {substeps_text}
214
-
215
- Potential Delay Reasons:
216
- {reasons_text}
217
-
218
- Task: Analyze the provided video to identify the delay reason. Use the following format:
219
- 1. **Selected Reason:** [Choose the most likely reason from the list above]
220
- 2. **Visual Evidence:** [Describe specific visual cues from the video that support your analysis.]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
11
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
12
 
 
 
 
 
 
 
 
13
  # Delay Reasons for Each Manufacturing Step
14
  DELAY_REASONS = {
15
  "Step 1": ["Delay in Bead Insertion", "Lack of raw material"],
 
91
  "Standard Time": "7 seconds",
92
  "Video_substeps_expected": {
93
  "0-3 seconds": "Technician unloads(removes) carcass(tire) from the machine."
94
+ },
95
+ "Potential_Delay_reasons": [
96
+ "Person not available in time(in 3 sec) to remove carcass.",
97
+ "Person is doing bead(ring) insertion before carcass unload causing unload to be delayed by more than 3 sec"
98
+ ]
99
  }
100
  }
101
 
102
  return step_details.get(step_number, {"Error": "Invalid step number. Please provide a valid step number."})
103
 
104
+
105
+
106
  def load_video(video_data, strategy='chat'):
107
  """Loads and processes video data into a format suitable for model input."""
108
  bridge.set_bridge('torch')
109
+ num_frames = 24
110
+
111
  if isinstance(video_data, str):
112
  decord_vr = VideoReader(video_data, ctx=cpu(0))
113
  else:
114
  decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
115
 
116
+ frame_id_list = []
117
  total_frames = len(decord_vr)
 
 
 
118
  timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
119
  max_second = round(max(timestamps)) + 1
120
 
 
121
  for second in range(max_second):
122
  closest_num = min(timestamps, key=lambda x: abs(x - second))
123
  index = timestamps.index(closest_num)
124
  frame_id_list.append(index)
125
+ if len(frame_id_list) >= num_frames:
126
  break
127
 
128
  video_data = decord_vr.get_batch(frame_id_list)
 
151
 
152
  def predict(prompt, video_data, temperature, model, tokenizer):
153
  """Generates predictions based on the video and textual prompt."""
154
+ video = load_video(video_data, strategy='chat')
 
 
 
155
 
156
  inputs = model.build_conversation_input_ids(
157
  tokenizer=tokenizer,
 
169
  }
170
 
171
  gen_kwargs = {
172
+ "max_new_tokens": 2048,
173
+ "pad_token_id": 128002,
174
+ "top_k": 1,
175
  "do_sample": False,
176
+ "top_p": 0.1,
177
+ "temperature": temperature,
178
  }
179
 
180
  with torch.no_grad():
181
  outputs = model.generate(**inputs, **gen_kwargs)
182
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
183
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
184
 
185
+ return response
186
 
187
  def get_analysis_prompt(step_number):
188
  """Constructs the prompt for analyzing delay reasons based on the selected step."""
 
193
 
194
  step_name = step_info["Name"]
195
  standard_time = step_info["Standard Time"]
196
+ analysis = step_info["Analysis"]
 
197
 
 
 
 
198
  return f"""
199
+ You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify delay reasons based on visual evidence from production line footage.
200
+ Task Context:
201
+ You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. The step is called {step_name}, and its standard time is {standard_time}.
202
+ Required Analysis:
203
+ Carefully observe the video for visual cues indicating production interruption.
204
+ - If no person is visible in any of the frames, the reason probably might be due to their absence.
205
+ - If a person is visible in the video and is observed touching and modifying the layers of the tire, it indicates an issue with tire patching, and the person might be repairing it.
206
+ - Compare observed evidence against the following possible delay reasons:
207
+ - {analysis}
208
+ Following are the subactivities needs to happen in this step.
209
+
210
+ {get_step_info(step_number)}
211
+
212
+
213
+ Please provide your output in the following format:
214
+ Output_Examples = {
215
+ ["Delay in Bead Insertion", "Lack of raw material"],
216
+ ["Inner Liner Adjustment by Technician", "Person rebuilding defective Tire Sections"],
217
+ ["Manual Adjustment in Ply1 Apply", "Technician repairing defective Tire Sections"],
218
+ ["Delay in Bead Set", "Lack of raw material"],
219
+ ["Delay in Turnup", "Lack of raw material"],
220
+ ["Person Repairing Sidewall", "Person rebuilding defective Tire Sections"],
221
+ ["Delay in Sidewall Stitching", "Lack of raw material"],
222
+ ["No person available to load Carcass", "No person available to collect tire"]
223
+ }
224
+ 1. **Selected Reason:** [State the most likely reason from the given options]
225
+ 2. **Visual Evidence:** [Describe specific visual cues that support your selection]
226
+ 3. **Reasoning:** [Explain why this reason best matches the observed evidence]
227
+ 4. **Alternative Analysis:** [Brief explanation of why other possible reasons are less likely]
228
+ Important: Base your analysis solely on visual evidence from the video. Focus on concrete, observable details rather than assumptions. Clearly state if no person or specific activity is observed.
229
  """
230
+
231
+
232
+
233
+ model, tokenizer = load_model()
234
+
235
+ def inference(video, step_number):
236
+ """Analyzes video to predict possible issues based on the manufacturing step."""
237
+ try:
238
+ if not video:
239
+ return "Please upload a video first."
240
+
241
+ prompt = get_analysis_prompt(step_number)
242
+ temperature = 0.3
243
+ response = predict(prompt, video, temperature, model, tokenizer)
244
+
245
+ return response
246
+ except Exception as e:
247
+ return f"An error occurred during analysis: {str(e)}"
248
+
249
+ def create_interface():
250
+ """Creates the Gradio interface for the Manufacturing Analysis System."""
251
+ with gr.Blocks() as demo:
252
+ gr.Markdown("""
253
+ # Manufacturing Analysis System
254
+ Upload a video of the manufacturing step and select the step number.
255
+ The system will analyze the video and provide observations.
256
+ """)
257
+
258
+ with gr.Row():
259
+ with gr.Column():
260
+ video = gr.Video(label="Upload Manufacturing Video", sources=["upload"])
261
+ step_number = gr.Dropdown(
262
+ choices=[f"Step {i}" for i in range(1, 9)],
263
+ label="Manufacturing Step"
264
+ )
265
+ analyze_btn = gr.Button("Analyze", variant="primary")
266
+
267
+ with gr.Column():
268
+ output = gr.Textbox(label="Analysis Result", lines=10)
269
+
270
+ gr.Examples(
271
+ examples=[
272
+ ["7838_step2_2_eval.mp4", "Step 2"],
273
+ ["7838_step6_2_eval.mp4", "Step 6"],
274
+ ["7838_step8_1_eval.mp4", "Step 8"],
275
+ ["7993_step6_3_eval.mp4", "Step 6"],
276
+ ["7993_step8_3_eval.mp4", "Step 8"]
277
+ ],
278
+ inputs=[video, step_number],
279
+ cache_examples=False
280
+ )
281
+
282
+ analyze_btn.click(
283
+ fn=inference,
284
+ inputs=[video, step_number],
285
+ outputs=[output]
286
+ )
287
+
288
+ return demo
289
+
290
+ if __name__ == "__main__":
291
+ demo = create_interface()
292
+ demo.queue().launch(share=True)