sounar commited on
Commit
0b2a88c
·
verified ·
1 Parent(s): b0f7fc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -12,15 +12,16 @@ model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
12
  # Load the Hugging Face model and tokenizer with required arguments
13
  tokenizer = AutoTokenizer.from_pretrained(
14
  model_name,
15
- token=api_token, # Use `token` instead of `use_auth_token`
16
- trust_remote_code=True
17
  )
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_name,
20
  token=api_token,
21
  trust_remote_code=True,
22
- device_map="auto", # Efficiently allocate resources
23
- torch_dtype=torch.float16 # Use half precision for faster inference
24
  )
25
 
26
  # Define the function to process user input
@@ -32,11 +33,11 @@ def generate_response(input_text):
32
  # Generate a response using the model
33
  outputs = model.generate(
34
  inputs["input_ids"],
35
- max_length=256,
36
- num_return_sequences=1,
37
- temperature=0.7,
38
- top_p=0.9,
39
- top_k=50
40
  )
41
 
42
  # Decode and return the generated text
@@ -53,7 +54,7 @@ iface = gr.Interface(
53
  outputs="text",
54
  title="ContactDoctor Medical Assistant",
55
  description="Provide input symptoms or queries and get AI-powered medical advice.",
56
- enable_api=True
57
  )
58
 
59
  # Launch the Gradio app
 
12
  # Load the Hugging Face model and tokenizer with required arguments
13
  tokenizer = AutoTokenizer.from_pretrained(
14
  model_name,
15
+ token=api_token, # Authenticate with Hugging Face token
16
+ trust_remote_code=True # Allow custom code from the repository
17
  )
18
+
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_name,
21
  token=api_token,
22
  trust_remote_code=True,
23
+ device_map="auto", # Efficient device allocation
24
+ torch_dtype=torch.float16 # Mixed precision for faster inference
25
  )
26
 
27
  # Define the function to process user input
 
33
  # Generate a response using the model
34
  outputs = model.generate(
35
  inputs["input_ids"],
36
+ max_length=256, # Limit the output length
37
+ num_return_sequences=1, # Generate a single response
38
+ temperature=0.7, # Adjust for creativity vs. determinism
39
+ top_p=0.9, # Nucleus sampling
40
+ top_k=50 # Top-k sampling
41
  )
42
 
43
  # Decode and return the generated text
 
54
  outputs="text",
55
  title="ContactDoctor Medical Assistant",
56
  description="Provide input symptoms or queries and get AI-powered medical advice.",
57
+ enable_api=True # Enables API for external calls
58
  )
59
 
60
  # Launch the Gradio app