Spaces:

sounar
/

ContactDoctor-API

Paused

App Files Files Community

sounar commited on Nov 18, 2024

Commit

0f47e56

verified ·

1 Parent(s): 9c1f656

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -53

app.py CHANGED Viewed

@@ -1,78 +1,65 @@
-import torch
-from PIL import Image
-from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
 import gradio as gr
 import os
 # Retrieve the token from environment variables
 api_token = os.getenv("HF_TOKEN").strip()
-# Configure quantization for efficient memory usage
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_compute_dtype=torch.float16,
-)
-# Load the model and tokenizer with required arguments
-model = AutoModel.from_pretrained(
-    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
-    quantization_config=bnb_config,
-    device_map="auto",
-    torch_dtype=torch.float16,
-    trust_remote_code=True,
-    attn_implementation="flash_attention_2",
-    token=api_token  # Authenticate with your Hugging Face token
 )
-tokenizer = AutoTokenizer.from_pretrained(
-    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
     trust_remote_code=True,
-    token=api_token  # Authenticate with your Hugging Face token
 )
-# Function to handle input text and image
-def process_query(image, question):
     try:
-        # Convert image to RGB format
-        image = image.convert('RGB')
-        # Construct the input message
-        msgs = [{'role': 'user', 'content': [image, question]}]
-        # Generate response using the model
-        res = model.chat(
-            image=image,
-            msgs=msgs,
-            tokenizer=tokenizer,
-            sampling=True,
-            temperature=0.95,
-            stream=True
         )
-        # Collect the generated response
-        generated_text = ""
-        for new_text in res:
-            generated_text += new_text
-        return generated_text
     except Exception as e:
         return f"Error: {str(e)}"
-# Define Gradio interface
 iface = gr.Interface(
-    fn=process_query,
-    inputs=[
-        gr.Image(type="pil", label="Upload an Image"),
-        gr.Textbox(label="Enter a Question")
-    ],
     outputs="text",
-    title="ContactDoctor Multimodal Medical Assistant",
-    description="Upload an image (e.g., X-ray or skin condition) and ask a medical question."
 )
-# Launch the Gradio app with API enabled
 if __name__ == "__main__":
-    iface.launch(enable_api=True, share=True)  # Enables API and generates a public link

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
 import os
 # Retrieve the token from environment variables
 api_token = os.getenv("HF_TOKEN").strip()
+# Model name
+model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
+# Load the Hugging Face model and tokenizer with required arguments
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    token=api_token,  # Authenticate with Hugging Face token
+    trust_remote_code=True  # Allow custom code from the repository
 )
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    token=api_token,
     trust_remote_code=True,
+    device_map="auto",  # Efficient device allocation
+    torch_dtype=torch.float16  # Mixed precision for faster inference
 )
+# Define the function to process user input
+def generate_response(input_text):
     try:
+        # Tokenize the input text
+        inputs = tokenizer(input_text, return_tensors="pt")
+        # Ensure input tensor is sent to the same device as the model
+        input_ids = inputs["input_ids"].to(model.device)
+        # Generate a response using the model
+        outputs = model.generate(
+            input_ids,
+            max_length=256,  # Limit the output length
+            num_return_sequences=1,  # Generate a single response
+            temperature=0.7,  # Adjust for creativity vs. determinism
+            top_p=0.9,  # Nucleus sampling
+            top_k=50  # Top-k sampling
         )
+        # Decode and return the generated text
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response
     except Exception as e:
+        # Return error details in case of failure
         return f"Error: {str(e)}"
+# Create a Gradio interface
 iface = gr.Interface(
+    fn=generate_response,
+    inputs="text",
     outputs="text",
+    title="ContactDoctor Medical Assistant",
+    description="Provide input symptoms or queries and get AI-powered medical advice."
 )
+# Launch the Gradio app
 if __name__ == "__main__":
+    iface.launch()