File size: 20,908 Bytes
722ecec
fd10b6c
39dff4c
 
 
28b69ba
4dc9e5f
3eb706b
f877950
ad65b09
f877950
eae970b
 
 
16c1c4f
 
ad65b09
53714db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f4cece
 
7dc22ca
56811e2
a31fde9
fa57d02
91f8c28
 
 
 
f877950
 
967b8e7
 
f877950
7dc22ca
f877950
 
 
 
a31fde9
f877950
a31fde9
abe552f
a31fde9
9bda859
f86940b
f877950
215f2d8
f877950
 
 
215f2d8
 
 
 
f877950
215f2d8
 
f877950
215f2d8
 
 
 
 
 
 
 
 
 
 
 
 
874e011
215f2d8
874e011
215f2d8
 
7bd7744
f877950
4dc9e5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f877950
be77a46
 
 
 
 
 
f877950
be77a46
 
 
 
 
 
 
 
 
 
 
 
 
 
11abe35
7dc22ca
 
 
f877950
c540f1a
c1f218b
 
74f3ed7
be980dd
 
 
f877950
be980dd
f877950
 
be980dd
 
f877950
 
be980dd
f877950
be980dd
 
967b8e7
f89be67
cbb63b6
 
86a924b
cbb63b6
7c30abd
 
 
 
 
 
 
e98c8c0
86a924b
967b8e7
cfcb504
 
19429d3
f89be67
81dcc03
cfcb504
 
f89be67
 
 
 
 
 
19429d3
 
 
 
 
 
93ae82c
cbb63b6
53714db
967b8e7
f877950
415223e
 
 
 
17b13ec
 
415223e
 
 
17b13ec
415223e
fc4944e
415223e
ff4e34f
415223e
ff4e34f
 
 
 
c6ddc86
3661992
f877950
 
 
4854a72
176b9ce
 
f877950
 
176b9ce
 
 
 
4854a72
 
 
176b9ce
d354d71
d1b23d4
176b9ce
 
 
 
 
 
 
 
 
 
 
d50b1d6
176b9ce
 
 
4854a72
f877950
 
176b9ce
 
 
 
 
 
 
 
 
 
d354d71
32cbfb2
176b9ce
4854a72
 
 
 
bb31795
 
176b9ce
4854a72
 
176b9ce
4854a72
176b9ce
4854a72
f2e5be8
722ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eae970b
 
 
 
 
 
f877950
 
eae970b
 
 
 
 
 
 
 
 
f877950
 
eae970b
f877950
eae970b
 
f877950
 
 
eae970b
 
 
 
 
 
 
 
 
f877950
eae970b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
053a38c
 
 
5c14e87
eae970b
 
 
 
 
f877950
23e856e
 
f877950
eae970b
23e856e
f877950
eae970b
 
f123231
eae970b
41cbd00
8de78b2
8586313
1b853a7
2f101a3
1b853a7
 
583b605
 
 
 
 
f877950
6de3447
 
663551b
b8d5b3e
751b072
fddbec9
 
 
f38c30e
 
 
751b072
4dc9e5f
 
9bda859
2f101a3
b8d5b3e
252dd70
6de3447
415223e
 
d456b20
57f52ca
25b2322
 
6de3447
25b2322
6de3447
25b2322
b510b99
2f101a3
0b6ead0
 
 
 
 
 
5c14e87
 
 
 
 
437879c
5c14e87
86a924b
0b6ead0
8eb3297
93ae82c
53714db
ea5be1b
583b605
215f2d8
0b6ead0
5c14e87
 
 
 
 
 
93ae82c
f308688
5c14e87
a2ac4ba
57f52ca
c03a440
86a924b
5c14e87
f877950
0b077bd
86a924b
 
eb0e999
967b8e7
c797359
a783c53
950e427
d2609b3
 
583b605
 
 
53ded4b
583b605
 
 
d2609b3
967b8e7
c797359
81be9c7
a783c53
967b8e7
c797359
 
732e3f7
a783c53
967b8e7
c797359
 
d2609b3
86a924b
 
 
a783c53
86a924b
 
 
 
 
 
967b8e7
00136b0
 
 
dac51d0
86a924b
 
 
80d8737
86a924b
80d8737
a783c53
9db4018
f877950
967b8e7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
from gradio_client import Client
import numpy as np
import gradio as gr
import requests
import json
import dotenv
import soundfile as sf
import time
import textwrap
from PIL import Image
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import torch
import os
import uuid


welcome_message = """
# 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷

🗣️📝 This is an educational and accessible conversational tool.

### How To Use ⚕🗣️😷MultiMed⚕: 

🗣️📝Interact with ⚕🗣️😷MultiMed⚕ in any language using image, audio or text!

📚🌟💼 that uses [Tonic/stablemed](https://huggingface.co/Tonic/stablemed) and [adept/fuyu-8B](https://huggingface.co/adept/fuyu-8b) with [Vectara](https://huggingface.co/vectara) embeddings + retrieval. 
do [get in touch](https://discord.gg/GWpVpekp). You can also use 😷MultiMed⚕️ on your own data & in your own way by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/TeamTonic/MultiMed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
### Join us : 

🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)"             
"""


languages = {
    "English": "eng",
    "Modern Standard Arabic": "arb",
    "Bengali": "ben",
    "Catalan": "cat",
    "Czech": "ces",
    "Mandarin Chinese": "cmn",
    "Welsh": "cym",
    "Danish": "dan",
    "German": "deu",
    "Estonian": "est",
    "Finnish": "fin",
    "French": "fra",
    "Hindi": "hin",
    "Indonesian": "ind",
    "Italian": "ita",
    "Japanese": "jpn",
    "Korean": "kor",
    "Maltese": "mlt",
    "Dutch": "nld",
    "Western Persian": "pes",
    "Polish": "pol",
    "Portuguese": "por",
    "Romanian": "ron",
    "Russian": "rus",
    "Slovak": "slk",
    "Spanish": "spa",
    "Swedish": "swe",
    "Swahili": "swh",
    "Telugu": "tel",
    "Tagalog": "tgl",
    "Thai": "tha",
    "Turkish": "tur",
    "Ukrainian": "ukr",
    "Urdu": "urd",
    "Northern Uzbek": "uzn",
    "Vietnamese": "vie"
}


# Global variables to hold component references
components = {}
dotenv.load_dotenv()
seamless_client = Client("facebook/seamless_m4t")
HuggingFace_Token = os.getenv("HuggingFace_Token")
hf_token = os.getenv("HuggingFace_Token")
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
device = "cuda" if torch.cuda.is_available() else "cpu"

image_description = "" 
# audio_output = ""
# global markdown_output
# global audio_output


def check_hallucination(assertion, citation):
    api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
    header = {"Authorization": f"Bearer {hf_token}"}
    payload = {"inputs": f"{assertion} [SEP] {citation}"}

    response = requests.post(api_url, headers=header, json=payload, timeout=120)
    output = response.json()
    output = output[0][0]["score"]

    return f"**hallucination score:** {output}"


# Define the API parameters
vapi_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"

headers = {"Authorization": f"Bearer {hf_token}"}


# Function to query the API
def query(payload):
    response = requests.post(vapi_url, headers=headers, json=payload)
    return response.json()


# Function to evaluate hallucination
def evaluate_hallucination(input1, input2):
    # Combine the inputs
    combined_input = f"{input1}. {input2}"
    
    # Make the API call
    output = query({"inputs": combined_input})
    
    # Extract the score from the output
    score = output[0][0]['score']
    
    # Generate a label based on the score
    if score < 0.5:
        label = f"🔴 High risk. Score: {score:.2f}"
    else:
        label = f"🟢 Low risk. Score: {score:.2f}"
    
    return label


def save_audio(audio_input, output_dir="saved_audio"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Extract sample rate and audio data
    sample_rate, audio_data = audio_input

    # Generate a unique file name
    file_name = f"audio_{int(time.time())}.wav"
    file_path = os.path.join(output_dir, file_name)

    # Save the audio file
    sf.write(file_path, audio_data, sample_rate)

    return file_path


def save_image(image_input, output_dir="saved_images"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Assuming image_input is a NumPy array
    if isinstance(image_input, np.ndarray):
        # Convert NumPy arrays to PIL Image
        image = Image.fromarray(image_input)

        # Generate a unique file name
        file_name = f"image_{int(time.time())}.png"
        file_path = os.path.join(output_dir, file_name)

        # Save the image file
        image.save(file_path)

        return file_path
    else:
        raise ValueError("Invalid image input type")


def process_speech(input_language, audio_input):
    """
    processing sound using seamless_m4t
    """
    if audio_input is None:
        return "no audio or audio did not save yet \nplease try again ! "
    print(f"audio : {audio_input}")
    print(f"audio type : {type(audio_input)}")
    out = seamless_client.predict(
        "S2TT",
        "file",
        None,
        audio_input,
        "",
        input_language,
        "English",
        api_name="/run",
    )
    out = out[1]  # get the text
    try:
        return f"{out}"
    except Exception as e:
        return f"{e}"


def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
    client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")

    try:
        result = client.predict(
            "T2ST",  
            "",  
            None,  
            None,  
            input_text,  
            source_language,  
            target_language,  
            api_name="/run",  
        )
        
        # Initialize variables
        translated_text = ""
        audio_file_path = ""

        # Process each item in the result
        for item in result:
            if isinstance(item, str):
                # Check if the item is likely a URL
                if item.startswith('http://') or item.startswith('https://'):
                    continue
                # Assign the first non-URL string as the translated text
                if not translated_text:
                    translated_text = item
            elif isinstance(item, tuple) and len(item) == 2:
                # Assuming the item is a tuple containing sample rate and audio data
                audio_file_path = save_audio(item)  # Save the audio file
                break

        return audio_file_path, translated_text

    except Exception as e:
        return None, f"Error in text-to-speech conversion: {str(e)}"


def process_image(image_input):
    # Initialize the Gradio client with the URL of the Gradio server
    client = Client("https://adept-fuyu-8b-demo.hf.space/--replicas/pqjvl/")

    # Assuming image_input is a URL path to the image
    image_path = image_input

    # Call the predict method of the client
    result = client.predict(
        image_path,  # URL of the image
        True,        # Additional parameter for the server (e.g., enable detailed captioning)
        fn_index=2
    )

    return result


def query_vectara(text):
    user_message = text

    # Read authentication parameters from the .env file
    customer_id = os.getenv('CUSTOMER_ID')
    corpus_id = os.getenv('CORPUS_ID')
    api_key = os.getenv('API_KEY')

    # Define the headers
    api_key_header = {
        "customer-id": customer_id,
        "x-api-key": api_key
    }

    # Define the request body in the structure provided in the example
    request_body = {
        "query": [
            {
                "query": user_message,
                "queryContext": "",
                "start": 1,
                "numResults": 25,
                "contextConfig": {
                    "charsBefore": 0,
                    "charsAfter": 0,
                    "sentencesBefore": 2,
                    "sentencesAfter": 2,
                    "startTag": "%START_SNIPPET%",
                    "endTag": "%END_SNIPPET%",
                },
                "rerankingConfig": {
                    "rerankerId": 272725718,
                    "mmrConfig": {
                        "diversityBias": 0.35
                    }
                },
                "corpusKey": [
                    {
                        "customerId": customer_id,
                        "corpusId": corpus_id,
                        "semantics": 0,
                        "metadataFilter": "",
                        "lexicalInterpolationConfig": {
                            "lambda": 0
                        },
                        "dim": []
                    }
                ],
                "summary": [
                    {
                        "maxSummarizedResults": 5,
                        "responseLang": "auto",
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    }
                ]
            }
        ]
    }

    # Make the API request using Gradio
    response = requests.post(
        "https://api.vectara.io/v1/query",
        json=request_body,  # Use json to automatically serialize the request body
        verify=True,
        headers=api_key_header
    )

    if response.status_code == 200:
        query_data = response.json()
        if query_data:
            sources_info = []

            # Extract the summary.
            summary = query_data['responseSet'][0]['summary'][0]['text']

            # Iterate over all response sets
            for response_set in query_data.get('responseSet', []):
                # Extract sources
                # Limit to top 5 sources.
                for source in response_set.get('response', [])[:5]:
                    source_metadata = source.get('metadata', [])
                    source_info = {}

                    for metadata in source_metadata:
                        metadata_name = metadata.get('name', '')
                        metadata_value = metadata.get('value', '')

                        if metadata_name == 'title':
                            source_info['title'] = metadata_value
                        elif metadata_name == 'author':
                            source_info['author'] = metadata_value
                        elif metadata_name == 'pageNumber':
                            source_info['page number'] = metadata_value

                    if source_info:
                        sources_info.append(source_info)

            result = {"summary": summary, "sources": sources_info}
            return f"{json.dumps(result, indent=2)}"
        else:
            return "No data found in the response."
    else:
        return f"Error: {response.status_code}"


# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text


def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):

    # Combine user input and system prompt
    formatted_input = f"{user_input}{system_prompt}"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model //MODEL UNDEFINED, using peft_model instead.
    output = peft_model.generate(
        **model_inputs,
        max_length=512,
        use_cache=True,
        early_stopping=True,
        bos_token_id=peft_model.config.bos_token_id,
        eos_token_id=peft_model.config.eos_token_id,
        pad_token_id=peft_model.config.eos_token_id,
        temperature=0.1,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text


# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Load the PEFT model
peft_config = PeftConfig.from_pretrained("Tonic/stablemed", token=hf_token)
peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "Tonic/stablemed", token=hf_token)


class ChatBot:
    def __init__(self):
        self.history = []
        
    @staticmethod
    def doctor(user_input, system_prompt="You are an expert medical analyst:"):
        formatted_input = f"{system_prompt}{user_input}"
        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
        response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
        return response_text


bot = ChatBot()


def process_summary_with_stablemed(summary):
    system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
    response_text = bot.doctor(summary, system_prompt)
    return response_text

    
# Main function to handle the Gradio interface logic

def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
    try:
        
        combined_text = ""
        markdown_output = ""  
        image_text = ""  
        language_code = None

        # Convert input language to its code
        if input_language and input_language in languages:
            language_code = languages[input_language]

        # Debugging print statement
        print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
        
        # Process image input
        if image_input is not None:
            # Convert image_input to a file path
            image_file_path = save_image(image_input)
            image_text = process_image(image_file_path)
            combined_text += "\n\n**Image Input:**\n" + image_text

        # Process audio input
        elif audio_input is not None:
            audio_file_path = save_audio(audio_input)
            audio_text = process_speech(input_language, audio_file_path)        
            combined_text += "\n\n**Audio Input:**\n" + audio_text

        # Process text input
        elif text_input is not None and text_input.strip():
            combined_text += "The user asks the following to his health adviser: " + text_input

        # Check if combined text is empty
        else:
            return "Error: Please provide some input (text, audio, or image)."

        # Append the original image description in Markdown
        if image_text:
            markdown_output += "\n### Original Image Description\n"
            markdown_output += image_text + "\n"
    
        # Use the text to query Vectara
        vectara_response_json = query_vectara(combined_text)

        # Parse the Vectara response
        vectara_response = json.loads(vectara_response_json)
        summary = vectara_response.get('summary', 'No summary available')
        sources_info = vectara_response.get('sources', [])

        # Format Vectara response in Markdown
        markdown_output = "### Vectara Response Summary\n"
        markdown_output += f"* **Summary**: {summary}\n"
        markdown_output += "### Sources Information\n"
        for source in sources_info:
            markdown_output += f"* {source}\n"

        # Process the summary with Stablemed
        final_response = process_summary_with_stablemed(summary)

        # Convert translated text to speech and get both audio file and text
        target_language = "eng"  # Set the target language for the speech
        audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
        
        # Evaluate hallucination
        hallucination_label = evaluate_hallucination(final_response, summary)

        # Add final response and hallucination label to Markdown output
        markdown_output += "\n### Processed Summary with StableMed\n"
        markdown_output += final_response + "\n"
        markdown_output += "\n### Hallucination Evaluation\n"
        markdown_output += f"* **Label**: {hallucination_label}\n"
        markdown_output += "\n### Translated Text\n"
        markdown_output += translated_text + "\n"

        return markdown_output, audio_output
        
    except Exception as e:
        return f"Error occurred during processing: {e}. No hallucination evaluation.", None


def clear():
    # Return default values for each component
    return "English", None, None, "", None


def create_interface():
    # with gr.Blocks(theme='ParityError/Anime') as iface:
    with gr.Blocks(theme='ParityError/Anime') as interface:
        # Display the welcome message
        gr.Markdown(welcome_message)
        # Extract the full names of the languages
        language_names = list(languages.keys())

        # Add a 'None' or similar option to represent no selection
        input_language_options = ["None"] + language_names

        # Create a dropdown for language selection
        input_language = gr.Dropdown(input_language_options, label="Select the language", value="English", interactive=True)
        
        with gr.Accordion("Use Voice", open=False) as voice_accordion:
            audio_input = gr.Audio(label="Speak")
            audio_output = gr.Markdown(label="Output text")  # Markdown component for audio
            gr.Examples([["audio1.wav"], ["audio2.wav"], ], inputs=[audio_input])

        with gr.Accordion("Use a Picture", open=False) as picture_accordion:
            image_input = gr.Image(label="Upload image")
            image_output = gr.Markdown(label="Output text")  # Markdown component for image
            gr.Examples([["image1.png"], ["image2.jpeg"], ["image3.jpeg"], ], inputs=[image_input])

        with gr.Accordion("MultiMed", open=False) as multimend_accordion:
            text_input = gr.Textbox(label="Use Text", lines=3, placeholder="I have had a sore throat and phlegm for a few days and now my cough has gotten worse!")
        
            gr.Examples([
                ["What is the proper treatment for buccal herpes?"],
                ["I have had a sore throat and hoarse voice for several days and now a strong cough recently "],
                ["How does cellular metabolism work TCA cycle"],
                ["What special care must be provided to children with chicken pox?"],
                ["When and how often should I wash my hands?"],
                ["بکل ہرپس کا صحیح علاج کیا ہے؟"],
                ["구강 헤르페스의 적절한 치료법은 무엇입니까?"],
                ["Je, ni matibabu gani sahihi kwa herpes ya buccal?"],
            ], inputs=[text_input])

        text_output = gr.Markdown(label="MultiMed")  
        audio_output = gr.Audio(label="Audio Out", type="filepath")
        
        text_button = gr.Button("Use MultiMed")
        text_button.click(process_and_query, inputs=[input_language, audio_input, image_input, text_input], outputs=[text_output, audio_output])

        clear_button = gr.Button("Clear")
        clear_button.click(clear, inputs=[], outputs=[input_language, audio_input, image_input, text_output, audio_output])

    return interface


app = create_interface()
app.launch(show_error=True, debug=True)