Spaces:
Sleeping
Sleeping
Stardragon2099
commited on
Commit
·
345e5f6
1
Parent(s):
d8012f6
ported to chatgpt api
Browse files- app.py +54 -20
- requirements.txt +2 -1
app.py
CHANGED
@@ -5,39 +5,45 @@ from transformers import AutoModelForCausalLM, AutoProcessor
|
|
5 |
import torch
|
6 |
import gradio as gr
|
7 |
import re
|
|
|
|
|
|
|
8 |
|
9 |
# model_name = "arjunanand13/Florence-enphase2"
|
10 |
-
model_name = "Stardragon2099/florence-adlp-40e"
|
11 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
-
print(f"Using device: {device}")
|
13 |
|
14 |
-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
15 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
16 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
|
17 |
-
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
|
18 |
|
19 |
-
|
|
|
|
|
|
|
20 |
|
21 |
DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component "
|
22 |
"and we need to identify if the leg is inserted in the hole or not. Return 'True' "
|
23 |
"if any leg is not completely seated in the hole; return 'False' if the leg is inserted "
|
24 |
"in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.")
|
25 |
|
26 |
-
def predict(img, prompt):
|
27 |
-
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
-
|
39 |
|
40 |
-
|
41 |
|
42 |
|
43 |
# if not isinstance(image, Image.Image):
|
@@ -52,6 +58,34 @@ def predict(img, prompt):
|
|
52 |
# answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
53 |
# return answer
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def get_prompt(method, component_label=None):
|
56 |
global prompt
|
57 |
if method == "leg_lift":
|
|
|
5 |
import torch
|
6 |
import gradio as gr
|
7 |
import re
|
8 |
+
import openai
|
9 |
+
import base64
|
10 |
+
from io import BytesIO
|
11 |
|
12 |
# model_name = "arjunanand13/Florence-enphase2"
|
13 |
+
# model_name = "Stardragon2099/florence-adlp-40e"
|
14 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
+
# print(f"Using device: {device}")
|
16 |
|
17 |
+
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
18 |
+
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
19 |
+
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
|
20 |
+
# processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
|
21 |
|
22 |
+
# Retrieve the API key from Hugging Face Secrets
|
23 |
+
openai.api_key = os.environ.get("Chatgpt_api")
|
24 |
+
|
25 |
+
# torch.cuda.empty_cache()
|
26 |
|
27 |
DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component "
|
28 |
"and we need to identify if the leg is inserted in the hole or not. Return 'True' "
|
29 |
"if any leg is not completely seated in the hole; return 'False' if the leg is inserted "
|
30 |
"in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.")
|
31 |
|
32 |
+
# def predict(img, prompt):
|
33 |
+
# inputs = processor(text=prompt, images=img, return_tensors="pt").to(device, torch_dtype)
|
34 |
|
35 |
+
# generated_ids = model.generate(
|
36 |
+
# input_ids=inputs["input_ids"],
|
37 |
+
# pixel_values=inputs["pixel_values"],
|
38 |
+
# max_new_tokens=1024,
|
39 |
+
# do_sample=False,
|
40 |
+
# num_beams=3
|
41 |
+
# )
|
42 |
+
# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
43 |
|
44 |
+
# parsed_answer = processor.post_process_generation(generated_text, task= prompt, image_size=(img.width, img.height))
|
45 |
|
46 |
+
# return parsed_answer
|
47 |
|
48 |
|
49 |
# if not isinstance(image, Image.Image):
|
|
|
58 |
# answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
59 |
# return answer
|
60 |
|
61 |
+
def predict(img, prompt):
|
62 |
+
# Encode the Pillow image in base64
|
63 |
+
buffer = BytesIO()
|
64 |
+
img.save(buffer, format="PNG") # Save the image in PNG format (can change to JPEG if needed)
|
65 |
+
buffer.seek(0)
|
66 |
+
encoded_image = base64.b64encode(buffer.read()).decode('utf-8')
|
67 |
+
|
68 |
+
# Combine prompt and image
|
69 |
+
# system_prompt = (
|
70 |
+
# "You are an AI that can process text and images. The user has uploaded an image encoded in base64 "
|
71 |
+
# "format along with a text prompt. You need to consider both the image and the text while responding."
|
72 |
+
# )
|
73 |
+
|
74 |
+
messages = [
|
75 |
+
{"role": "system", "content": prompt},
|
76 |
+
{"role": "user", "content": f"Image (base64): {encoded_image}"}
|
77 |
+
]
|
78 |
+
|
79 |
+
# Make API call
|
80 |
+
try:
|
81 |
+
response = openai.ChatCompletion.create(
|
82 |
+
model="gpt-4",
|
83 |
+
messages=messages
|
84 |
+
)
|
85 |
+
return response['choices'][0]['message']['content']
|
86 |
+
except Exception as e:
|
87 |
+
return f"Error: {e}"
|
88 |
+
|
89 |
def get_prompt(method, component_label=None):
|
90 |
global prompt
|
91 |
if method == "leg_lift":
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ torch
|
|
2 |
pillow
|
3 |
transformers
|
4 |
timm
|
5 |
-
einops
|
|
|
|
2 |
pillow
|
3 |
transformers
|
4 |
timm
|
5 |
+
einops
|
6 |
+
openai
|