sounar commited on
Commit
0f47e56
·
verified ·
1 Parent(s): 9c1f656

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -53
app.py CHANGED
@@ -1,78 +1,65 @@
1
- import torch
2
- from PIL import Image
3
- from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
4
  import gradio as gr
 
 
5
  import os
6
 
7
  # Retrieve the token from environment variables
8
  api_token = os.getenv("HF_TOKEN").strip()
9
 
10
- # Configure quantization for efficient memory usage
11
- bnb_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_quant_type="nf4",
14
- bnb_4bit_use_double_quant=True,
15
- bnb_4bit_compute_dtype=torch.float16,
16
- )
17
 
18
- # Load the model and tokenizer with required arguments
19
- model = AutoModel.from_pretrained(
20
- "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
21
- quantization_config=bnb_config,
22
- device_map="auto",
23
- torch_dtype=torch.float16,
24
- trust_remote_code=True,
25
- attn_implementation="flash_attention_2",
26
- token=api_token # Authenticate with your Hugging Face token
27
  )
28
 
29
- tokenizer = AutoTokenizer.from_pretrained(
30
- "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
 
31
  trust_remote_code=True,
32
- token=api_token # Authenticate with your Hugging Face token
 
33
  )
34
 
35
- # Function to handle input text and image
36
- def process_query(image, question):
37
  try:
38
- # Convert image to RGB format
39
- image = image.convert('RGB')
40
-
41
- # Construct the input message
42
- msgs = [{'role': 'user', 'content': [image, question]}]
43
 
44
- # Generate response using the model
45
- res = model.chat(
46
- image=image,
47
- msgs=msgs,
48
- tokenizer=tokenizer,
49
- sampling=True,
50
- temperature=0.95,
51
- stream=True
52
  )
53
 
54
- # Collect the generated response
55
- generated_text = ""
56
- for new_text in res:
57
- generated_text += new_text
58
-
59
- return generated_text
60
 
61
  except Exception as e:
 
62
  return f"Error: {str(e)}"
63
 
64
- # Define Gradio interface
65
  iface = gr.Interface(
66
- fn=process_query,
67
- inputs=[
68
- gr.Image(type="pil", label="Upload an Image"),
69
- gr.Textbox(label="Enter a Question")
70
- ],
71
  outputs="text",
72
- title="ContactDoctor Multimodal Medical Assistant",
73
- description="Upload an image (e.g., X-ray or skin condition) and ask a medical question."
74
  )
75
 
76
- # Launch the Gradio app with API enabled
77
  if __name__ == "__main__":
78
- iface.launch(enable_api=True, share=True) # Enables API and generates a public link
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
  import os
5
 
6
  # Retrieve the token from environment variables
7
  api_token = os.getenv("HF_TOKEN").strip()
8
 
9
+ # Model name
10
+ model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
 
 
 
 
 
11
 
12
+ # Load the Hugging Face model and tokenizer with required arguments
13
+ tokenizer = AutoTokenizer.from_pretrained(
14
+ model_name,
15
+ token=api_token, # Authenticate with Hugging Face token
16
+ trust_remote_code=True # Allow custom code from the repository
 
 
 
 
17
  )
18
 
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ model_name,
21
+ token=api_token,
22
  trust_remote_code=True,
23
+ device_map="auto", # Efficient device allocation
24
+ torch_dtype=torch.float16 # Mixed precision for faster inference
25
  )
26
 
27
+ # Define the function to process user input
28
+ def generate_response(input_text):
29
  try:
30
+ # Tokenize the input text
31
+ inputs = tokenizer(input_text, return_tensors="pt")
32
+
33
+ # Ensure input tensor is sent to the same device as the model
34
+ input_ids = inputs["input_ids"].to(model.device)
35
 
36
+ # Generate a response using the model
37
+ outputs = model.generate(
38
+ input_ids,
39
+ max_length=256, # Limit the output length
40
+ num_return_sequences=1, # Generate a single response
41
+ temperature=0.7, # Adjust for creativity vs. determinism
42
+ top_p=0.9, # Nucleus sampling
43
+ top_k=50 # Top-k sampling
44
  )
45
 
46
+ # Decode and return the generated text
47
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+ return response
 
 
 
49
 
50
  except Exception as e:
51
+ # Return error details in case of failure
52
  return f"Error: {str(e)}"
53
 
54
+ # Create a Gradio interface
55
  iface = gr.Interface(
56
+ fn=generate_response,
57
+ inputs="text",
 
 
 
58
  outputs="text",
59
+ title="ContactDoctor Medical Assistant",
60
+ description="Provide input symptoms or queries and get AI-powered medical advice."
61
  )
62
 
63
+ # Launch the Gradio app
64
  if __name__ == "__main__":
65
+ iface.launch()