khurrameycon commited on
Commit
564bd99
·
verified ·
1 Parent(s): 3ed8bd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -164
app.py CHANGED
@@ -1,45 +1,46 @@
1
- # import gradio as gr
2
  import os
3
- import torch
4
- from transformers import AutoProcessor, MllamaForConditionalGeneration, TextIteratorStreamer
5
  from PIL import Image
6
- import spaces
7
  import tempfile
8
- import requests
9
  from PyPDF2 import PdfReader
10
  from threading import Thread
11
- from flask import Flask, request, jsonify
12
  import io
13
  import fitz
 
 
14
 
15
- # Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
16
- # IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
17
- # IS_SPACE = os.environ.get("SPACE_ID", None) is not None
18
-
19
- # Determine the device (GPU if available, else CPU)
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
22
-
23
- print(f"Using device: {device}")
24
- print(f"Low memory mode: {LOW_MEMORY}")
25
-
26
  app = Flask(__name__)
27
 
28
- # Get Hugging Face token from environment variables
29
  HF_TOKEN = os.environ.get('HF_TOKEN')
 
30
 
31
- # Load the model and processor
32
- model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
33
- model = MllamaForConditionalGeneration.from_pretrained(
34
- model_name,
35
- use_auth_token=HF_TOKEN,
36
- torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
37
- device_map="auto" if device == "cuda" else None, # Use device mapping if CUDA is available
38
- )
39
 
40
- # Move the model to the appropriate device (GPU if available)
41
- # model.to(device)
42
- processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def extract_image_from_pdf(pdf_url, dpi=75):
45
  """
@@ -76,62 +77,62 @@ def extract_image_from_pdf(pdf_url, dpi=75):
76
  print(f"Error extracting first page: {e}")
77
  return None
78
 
79
-
80
-
81
  def predict_image(image_url, text, file_pref):
82
  try:
83
- # Download the image from the URL
84
- # response = requests.get(image_url)
85
- # response.raise_for_status() # Raise an error for invalid responses
86
- # image = Image.open(io.BytesIO(response.content)).convert("RGB")
87
  if file_pref == 'img':
88
  response = requests.get(image_url)
89
- response.raise_for_status() # Raise an error for invalid responses
90
  image = Image.open(io.BytesIO(response.content)).convert("RGB")
91
  else:
92
  image = extract_image_from_pdf(image_url)
93
 
94
- messages = [
95
- {"role": "user", "content": [
96
- {"type": "image"}, # Specify that an image is provided
97
- {"type": "text", "text": text} # Add the user-provided text input
98
- ]}
99
- ]
100
 
101
- # Create the input text using the processor's chat template
102
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
103
-
104
- # Process the inputs and move to the appropriate device
105
- inputs = processor(image, input_text, return_tensors="pt").to(device)
106
-
107
- # outputs = model.generate(**inputs, max_new_tokens=100)
108
-
109
- # # Decode the output to return the final response
110
- # response = processor.decode(outputs[0], skip_special_tokens=True)
111
-
112
- # return response
113
-
114
- streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
115
-
116
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=4096)
117
- generated_text = ""
118
-
119
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  thread.start()
121
- buffer = ""
122
 
 
123
  for new_text in streamer:
124
  buffer += new_text
125
- # generated_text_without_prompt = buffer
126
- # # time.sleep(0.01)
127
- # yield buffer
128
-
129
  return buffer
130
 
131
  except Exception as e:
132
  raise ValueError(f"Error during prediction: {str(e)}")
133
 
134
-
135
  def extract_text_from_pdf(pdf_url):
136
  try:
137
  response = requests.get(pdf_url)
@@ -149,48 +150,44 @@ def extract_text_from_pdf(pdf_url):
149
  return text
150
  except Exception as e:
151
  raise ValueError(f"Error extracting text from PDF: {str(e)}")
152
- # raise HTTPException(status_code=400, detail=f"Error extracting text from PDF: {str(e)}")
153
 
154
- @spaces.GPU
155
  def predict_text(text):
156
- # pdf_text = extract_text_from_pdf('https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf')
157
-
158
- text_combined = text # + "\n\nExtracted Text from PDF:\n" + pdf_text
159
-
160
- # Prepare the input messages
161
- messages = [{"role": "user", "content": [{"type": "text", "text": text_combined}]}]
162
 
163
- # Create the input text using the processor's chat template
164
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
165
-
166
- # Process the inputs and move to the appropriate device
167
- # inputs = processor(image, input_text, return_tensors="pt").to(device)
168
- inputs = processor(text=input_text, return_tensors="pt").to("cuda")
169
- # Generate a response from the model
170
- # outputs = model.generate(**inputs, max_new_tokens=1024)
171
-
172
- # # Decode the output to return the final response
173
- # response = processor.decode(outputs[0], skip_special_tokens=True, skip_prompt=True)
174
-
175
-
176
- streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
177
-
178
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
179
- generated_text = ""
180
-
181
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
 
 
 
 
 
182
  thread.start()
183
- buffer = ""
184
 
 
185
  for new_text in streamer:
186
  buffer += new_text
187
- # generated_text_without_prompt = buffer
188
- # # time.sleep(0.01)
189
- # yield buffer
190
-
191
  return buffer
192
 
193
-
194
  PROMPT = (
195
  "Extract the following information as per this format:\n"
196
  "'Course Code:'\n"
@@ -213,47 +210,6 @@ PROMPT_SKILLS = (
213
  "'Tertiary Skills'."
214
  )
215
 
216
-
217
- # PROMPT_IMAGE = (
218
- # "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
219
- # "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
220
- # "If any specific information is missing or unavailable in the image, replace the corresponding field with null. "
221
- # "Ensure the format is consistent, strictly adhering to the structure shown below.\n\n"
222
- # "Required JSON Format:\n\n"
223
- # "{\n"
224
- # ' "student": {\n'
225
- # ' "name": "string",\n'
226
- # ' "id": "string",\n'
227
- # ' "dob": "string",\n'
228
- # ' "original_start_date": "string",\n'
229
- # ' "cumulative_gpa": "string",\n'
230
- # ' "program": "string",\n'
231
- # ' "status": "string"\n'
232
- # ' },\n'
233
- # ' "courses": [\n'
234
- # ' {\n'
235
- # ' "transfer_institution": "string",\n'
236
- # ' "course_code": "string",\n'
237
- # ' "course_name": "string",\n'
238
- # ' "credits_attempted": number,\n'
239
- # ' "credits_earned": number,\n'
240
- # ' "grade": "string",\n'
241
- # ' "quality_points": number,\n'
242
- # ' "semester_code": "string",\n'
243
- # ' "semester_dates": "string"\n'
244
- # ' }\n'
245
- # " // Additional courses can be added here\n"
246
- # " ]\n"
247
- # "}\n\n"
248
- # "Instructions:\n\n"
249
- # "1. Extract the student information and course details as displayed in the image.\n"
250
- # "2. Use null for any missing or unavailable information.\n"
251
- # "3. Format the extracted data exactly as shown above. Do not deviate from this structure.\n"
252
- # "4. Use accurate field names and ensure proper nesting of data (e.g., 'student' and 'courses' sections).\n"
253
- # "5. The values for numeric fields like credits_attempted, credits_earned, and quality_points should be numbers (not strings).\n"
254
- # )
255
-
256
-
257
  PROMPT_IMAGE_STUDENT = (
258
  "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
259
  "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
@@ -309,8 +265,6 @@ PROMPT_IMAGE_COURSES = (
309
  "5. Return only the 'courses' section as JSON.\n"
310
  )
311
 
312
-
313
-
314
  @app.route("/", methods=["GET"])
315
  def home():
316
  return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
@@ -351,9 +305,6 @@ def extract_info():
351
  response_student = predict_image(img_url, prompt_student, file_pref)
352
  response_courses = predict_image(img_url, prompt_courses, file_pref)
353
  response_image = response_student + response_courses
354
-
355
- # response_image = {"student": response_student.get("student", {}), "courses": response_courses.get("courses", [])}
356
-
357
  else:
358
  response_image = ''
359
 
@@ -363,21 +314,4 @@ def extract_info():
363
  return jsonify({"error": str(e)}), 500
364
 
365
  if __name__ == "__main__":
366
- app.run(host="0.0.0.0", port=7860)
367
-
368
-
369
- # # Define the Gradio interface
370
- # interface = gr.Interface(
371
- # fn=predict_text,
372
- # inputs=[
373
- # # gr.Image(type="pil", label="Image Input"), # Image input with label
374
- # gr.Textbox(label="Text Input") # Textbox input with label
375
- # ],
376
- # outputs=gr.Textbox(label="Generated Response"), # Output with a more descriptive label
377
- # title="Llama 3.2 11B Vision Instruct Demo", # Title of the interface
378
- # description="This demo uses Meta's Llama 3.2 11B Vision model to generate responses based on an image and text input.", # Short description
379
- # theme="compact" # Using a compact theme for a cleaner look
380
- # )
381
-
382
- # # Launch the interface
383
- # interface.launch(debug=True)
 
1
+ from flask import Flask, request, jsonify
2
  import os
3
+ import requests
 
4
  from PIL import Image
 
5
  import tempfile
 
6
  from PyPDF2 import PdfReader
7
  from threading import Thread
 
8
  import io
9
  import fitz
10
+ from groq import Groq
11
+ from queue import Queue
12
 
13
+ # Initialize Flask app
 
 
 
 
 
 
 
 
 
 
14
  app = Flask(__name__)
15
 
16
+ # Get API tokens from environment variables
17
  HF_TOKEN = os.environ.get('HF_TOKEN')
18
+ GROQ_API_KEY = os.environ.get('GROQ_API_KEY')
19
 
20
+ # Initialize Groq client
21
+ client = Groq(api_key=GROQ_API_KEY)
 
 
 
 
 
 
22
 
23
+ # Configuration for low memory mode (maintaining original functionality)
24
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
25
+ print(f"Low memory mode: {LOW_MEMORY}")
26
+
27
+ class TextStreamer:
28
+ def __init__(self):
29
+ self.queue = Queue()
30
+ self.buffer = ""
31
+
32
+ def put(self, text):
33
+ self.queue.put(text)
34
+
35
+ def __iter__(self):
36
+ while True:
37
+ if not self.queue.empty():
38
+ text = self.queue.get()
39
+ if text is None: # End signal
40
+ break
41
+ yield text
42
+ else:
43
+ continue
44
 
45
  def extract_image_from_pdf(pdf_url, dpi=75):
46
  """
 
77
  print(f"Error extracting first page: {e}")
78
  return None
79
 
 
 
80
  def predict_image(image_url, text, file_pref):
81
  try:
 
 
 
 
82
  if file_pref == 'img':
83
  response = requests.get(image_url)
84
+ response.raise_for_status()
85
  image = Image.open(io.BytesIO(response.content)).convert("RGB")
86
  else:
87
  image = extract_image_from_pdf(image_url)
88
 
89
+ streamer = TextStreamer()
 
 
 
 
 
90
 
91
+ def generate_response():
92
+ try:
93
+ completion = client.chat.completions.create(
94
+ model="mixtral-8x7b-32768",
95
+ messages=[
96
+ {
97
+ "role": "user",
98
+ "content": [
99
+ {
100
+ "type": "image_url",
101
+ "image_url": {"url": image_url}
102
+ },
103
+ {
104
+ "type": "text",
105
+ "text": text
106
+ }
107
+ ]
108
+ }
109
+ ],
110
+ temperature=0.7,
111
+ max_tokens=4096,
112
+ top_p=1,
113
+ stream=True
114
+ )
115
+
116
+ for chunk in completion:
117
+ if chunk.choices[0].delta.content:
118
+ streamer.put(chunk.choices[0].delta.content)
119
+ streamer.put(None) # Signal the end
120
+ except Exception as e:
121
+ print(f"Error in generate_response: {e}")
122
+ streamer.put(None)
123
+
124
+ thread = Thread(target=generate_response)
125
  thread.start()
 
126
 
127
+ buffer = ""
128
  for new_text in streamer:
129
  buffer += new_text
130
+
 
 
 
131
  return buffer
132
 
133
  except Exception as e:
134
  raise ValueError(f"Error during prediction: {str(e)}")
135
 
 
136
  def extract_text_from_pdf(pdf_url):
137
  try:
138
  response = requests.get(pdf_url)
 
150
  return text
151
  except Exception as e:
152
  raise ValueError(f"Error extracting text from PDF: {str(e)}")
 
153
 
 
154
  def predict_text(text):
155
+ streamer = TextStreamer()
 
 
 
 
 
156
 
157
+ def generate_response():
158
+ try:
159
+ completion = client.chat.completions.create(
160
+ model="mixtral-8x7b-32768",
161
+ messages=[
162
+ {
163
+ "role": "user",
164
+ "content": text
165
+ }
166
+ ],
167
+ temperature=0.7,
168
+ max_tokens=2048,
169
+ top_p=1,
170
+ stream=True
171
+ )
172
+
173
+ for chunk in completion:
174
+ if chunk.choices[0].delta.content:
175
+ streamer.put(chunk.choices[0].delta.content)
176
+ streamer.put(None) # Signal the end
177
+ except Exception as e:
178
+ print(f"Error in generate_response: {e}")
179
+ streamer.put(None)
180
+
181
+ thread = Thread(target=generate_response)
182
  thread.start()
 
183
 
184
+ buffer = ""
185
  for new_text in streamer:
186
  buffer += new_text
187
+
 
 
 
188
  return buffer
189
 
190
+ # [Rest of the prompts remain exactly the same as in original]
191
  PROMPT = (
192
  "Extract the following information as per this format:\n"
193
  "'Course Code:'\n"
 
210
  "'Tertiary Skills'."
211
  )
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  PROMPT_IMAGE_STUDENT = (
214
  "You are a highly intelligent assistant designed to analyze images and extract structured information from them. "
215
  "Your task is to analyze the given image of a student's academic record and generate a response in the exact JSON format provided below. "
 
265
  "5. Return only the 'courses' section as JSON.\n"
266
  )
267
 
 
 
268
  @app.route("/", methods=["GET"])
269
  def home():
270
  return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
 
305
  response_student = predict_image(img_url, prompt_student, file_pref)
306
  response_courses = predict_image(img_url, prompt_courses, file_pref)
307
  response_image = response_student + response_courses
 
 
 
308
  else:
309
  response_image = ''
310
 
 
314
  return jsonify({"error": str(e)}), 500
315
 
316
  if __name__ == "__main__":
317
+ app.run(host="0.0.0.0", port=7860)