Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -12,15 +12,16 @@ model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
|
|
12 |
# Load the Hugging Face model and tokenizer with required arguments
|
13 |
tokenizer = AutoTokenizer.from_pretrained(
|
14 |
model_name,
|
15 |
-
token=api_token, #
|
16 |
-
trust_remote_code=True
|
17 |
)
|
|
|
18 |
model = AutoModelForCausalLM.from_pretrained(
|
19 |
model_name,
|
20 |
token=api_token,
|
21 |
trust_remote_code=True,
|
22 |
-
device_map="auto", #
|
23 |
-
torch_dtype=torch.float16 #
|
24 |
)
|
25 |
|
26 |
# Define the function to process user input
|
@@ -32,11 +33,11 @@ def generate_response(input_text):
|
|
32 |
# Generate a response using the model
|
33 |
outputs = model.generate(
|
34 |
inputs["input_ids"],
|
35 |
-
max_length=256,
|
36 |
-
num_return_sequences=1,
|
37 |
-
temperature=0.7,
|
38 |
-
top_p=0.9,
|
39 |
-
top_k=50
|
40 |
)
|
41 |
|
42 |
# Decode and return the generated text
|
@@ -53,7 +54,7 @@ iface = gr.Interface(
|
|
53 |
outputs="text",
|
54 |
title="ContactDoctor Medical Assistant",
|
55 |
description="Provide input symptoms or queries and get AI-powered medical advice.",
|
56 |
-
enable_api=True
|
57 |
)
|
58 |
|
59 |
# Launch the Gradio app
|
|
|
12 |
# Load the Hugging Face model and tokenizer with required arguments
|
13 |
tokenizer = AutoTokenizer.from_pretrained(
|
14 |
model_name,
|
15 |
+
token=api_token, # Authenticate with Hugging Face token
|
16 |
+
trust_remote_code=True # Allow custom code from the repository
|
17 |
)
|
18 |
+
|
19 |
model = AutoModelForCausalLM.from_pretrained(
|
20 |
model_name,
|
21 |
token=api_token,
|
22 |
trust_remote_code=True,
|
23 |
+
device_map="auto", # Efficient device allocation
|
24 |
+
torch_dtype=torch.float16 # Mixed precision for faster inference
|
25 |
)
|
26 |
|
27 |
# Define the function to process user input
|
|
|
33 |
# Generate a response using the model
|
34 |
outputs = model.generate(
|
35 |
inputs["input_ids"],
|
36 |
+
max_length=256, # Limit the output length
|
37 |
+
num_return_sequences=1, # Generate a single response
|
38 |
+
temperature=0.7, # Adjust for creativity vs. determinism
|
39 |
+
top_p=0.9, # Nucleus sampling
|
40 |
+
top_k=50 # Top-k sampling
|
41 |
)
|
42 |
|
43 |
# Decode and return the generated text
|
|
|
54 |
outputs="text",
|
55 |
title="ContactDoctor Medical Assistant",
|
56 |
description="Provide input symptoms or queries and get AI-powered medical advice.",
|
57 |
+
enable_api=True # Enables API for external calls
|
58 |
)
|
59 |
|
60 |
# Launch the Gradio app
|