Spaces:

sounar
/

ContactDoctor-API

Paused

sounar commited on Nov 18, 2024

Commit

0b2a88c

verified ·

1 Parent(s): b0f7fc0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,15 +12,16 @@ model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
 # Load the Hugging Face model and tokenizer with required arguments
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
-    token=api_token,  # Use `token` instead of `use_auth_token`
-    trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     token=api_token,
     trust_remote_code=True,
-    device_map="auto",  # Efficiently allocate resources
-    torch_dtype=torch.float16  # Use half precision for faster inference
 )
 # Define the function to process user input
@@ -32,11 +33,11 @@ def generate_response(input_text):
         # Generate a response using the model
         outputs = model.generate(
             inputs["input_ids"],
-            max_length=256,
-            num_return_sequences=1,
-            temperature=0.7,
-            top_p=0.9,
-            top_k=50
         )
         # Decode and return the generated text
@@ -53,7 +54,7 @@ iface = gr.Interface(
     outputs="text",
     title="ContactDoctor Medical Assistant",
     description="Provide input symptoms or queries and get AI-powered medical advice.",
-    enable_api=True
 )
 # Launch the Gradio app

 # Load the Hugging Face model and tokenizer with required arguments
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
+    token=api_token,  # Authenticate with Hugging Face token
+    trust_remote_code=True  # Allow custom code from the repository
 )
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     token=api_token,
     trust_remote_code=True,
+    device_map="auto",  # Efficient device allocation
+    torch_dtype=torch.float16  # Mixed precision for faster inference
 )
 # Define the function to process user input
         # Generate a response using the model
         outputs = model.generate(
             inputs["input_ids"],
+            max_length=256,  # Limit the output length
+            num_return_sequences=1,  # Generate a single response
+            temperature=0.7,  # Adjust for creativity vs. determinism
+            top_p=0.9,  # Nucleus sampling
+            top_k=50  # Top-k sampling
         )
         # Decode and return the generated text
     outputs="text",
     title="ContactDoctor Medical Assistant",
     description="Provide input symptoms or queries and get AI-powered medical advice.",
+    enable_api=True  # Enables API for external calls
 )
 # Launch the Gradio app