File size: 25,306 Bytes
0a30342
 
 
bf7aef8
 
0a30342
 
 
 
 
 
 
068cd80
 
 
 
 
 
 
bf7aef8
 
068cd80
bf7aef8
 
068cd80
0a30342
53c6eb3
 
068cd80
 
 
0f942b5
068cd80
bf7aef8
d822584
068cd80
 
bf7aef8
 
068cd80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce88d0
068cd80
 
2ce88d0
068cd80
2ce88d0
068cd80
2ce88d0
068cd80
 
2ce88d0
068cd80
 
2ce88d0
068cd80
2ce88d0
 
 
 
 
 
 
55d2410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85b2a06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
068cd80
 
 
 
 
 
 
8ef92c5
bf7aef8
068cd80
 
 
0a30342
 
068cd80
bf7aef8
068cd80
 
 
 
bf7aef8
068cd80
 
bf7aef8
 
 
 
 
 
 
4b49ce8
bf7aef8
 
 
 
 
 
 
 
c2d2a1c
 
 
 
 
 
 
a75d49d
 
 
 
 
 
bf7aef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a75d49d
 
 
 
bf7aef8
 
a75d49d
e0afec8
bf7aef8
 
 
 
 
 
 
f6e9614
bf7aef8
a75d49d
bf7aef8
 
 
 
 
 
 
 
 
 
 
 
 
 
a75d49d
bf7aef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
068cd80
53c6eb3
068cd80
 
 
bf7aef8
 
53c6eb3
068cd80
 
 
 
f6e9614
068cd80
bf7aef8
 
 
 
 
 
 
 
e0afec8
bf7aef8
 
 
 
 
 
f6e9614
 
 
 
 
 
bf7aef8
 
 
 
 
 
 
 
2d51976
 
 
 
 
 
 
 
bf7aef8
 
 
 
 
 
 
 
 
 
 
 
 
 
068cd80
bf7aef8
 
 
 
 
 
 
068cd80
bf7aef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2d2a1c
a75d49d
e0afec8
c2d2a1c
 
 
 
 
 
 
 
 
f6e9614
b392e8c
f6e9614
e0afec8
 
 
c2d2a1c
313f431
e0afec8
a75d49d
e0afec8
bf7aef8
b37a05b
 
 
bf7aef8
b37a05b
bf7aef8
 
 
 
 
 
 
 
 
 
 
068cd80
bf7aef8
 
 
 
 
 
068cd80
bf7aef8
 
c0121ac
bf7aef8
 
 
 
 
 
 
 
 
b37a05b
 
 
 
bf7aef8
 
068cd80
bf7aef8
b37a05b
 
bf7aef8
 
 
 
068cd80
bf7aef8
068cd80
bf7aef8
 
 
 
068cd80
 
 
 
 
 
bf7aef8
91eb32f
bf7aef8
068cd80
 
 
 
 
 
 
 
 
 
c0121ac
068cd80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6e9614
 
 
 
 
 
068cd80
8fee735
068cd80
 
55d2410
bf7aef8
068cd80
 
 
 
 
 
 
 
8460b0c
05c1b80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
# pylint: skip-file

import subprocess
import json
import requests

subprocess.run(
    f"pip install flash-attn --no-build-isolation",
    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
    shell=True,
)

import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
import wikipedia
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from bs4 import BeautifulSoup
from functools import lru_cache


MAX_MAX_NEW_TOKENS = 4096
DEFAULT_MAX_NEW_TOKENS = 1536
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))

DESCRIPTION = """\
# Playground with Ghost 8B Beta (β, 8k, Online)

**Ghost 8B Beta** model outperforms prominent models such as Llama 3 8B Instruct, GPT 3.5 Turbo in the lc_winrate score. In addition, it also outperforms Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large when comparing the winrate score of AlpacaEval 2.0, [*](https://ghost-x.org/docs/models/ghost-8b-beta/). The model comes in two context length versions, [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), along with multilingual function tools support by default. 

The languages supported are 🇺🇸 English, 🇫🇷 French, 🇮🇹 Italian, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇩🇪 German, 🇻🇳 Vietnamese, 🇰🇷 Korean and 🇨🇳 Chinese.

🗞️ **Updates**
* Jul 23, 2024: added support for tools, now available to search for information on the internet.
"""


PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👻 Ghost 8B Beta</h1>
   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask and share whatever you want ~</p>
</div>
"""

LICENSE = """
<p/>

---
Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
"""

EXAMPLES = [
    [
        "What is the significance of the Higgs boson in the Standard Model of particle physics?"
    ],
    [
        "Qu'est-ce que l'effet fondateur et comment influence-t-il la diversité génétique d'une population?"
    ],
    ["Qual è il principio di Le Chatelier e come si applica agli equilibri chimici?"],
    [
        "¿Qué es una supernova y cuál es su importancia en la formación de elementos pesados en el universo?"
    ],
    [
        "Qual é a definição formal de uma integral de linha e como é utilizada em física?"
    ],
    [
        "Was versteht man unter dem Moho-Diskontinuität und welche Bedeutung hat sie für das Verständnis der Erdkruste?"
    ],
    [
        "Hiện tượng nhà kính là gì và nó ảnh hưởng như thế nào đến biến đổi khí hậu toàn cầu?"
    ],
    [
        "알고리즘의 시간 복잡도가 중요한 이유는 무엇이며, 시간 복잡도를 어떻게 분석하나요?"
    ],
    ["什么是CRISPR-Cas9基因编辑技术,它在现代生物学研究中的作用是什么?"],
    [
        "Create a Python function that takes a list of integers and returns the list sorted in ascending order without using the built-in sort or sorted functions."
    ],
    [
        "Écrivez une fonction en C++ qui trouve le plus long sous-tableau contigu avec une somme égale à zéro."
    ],
    [
        "Scrivi una funzione in Java che calcola il fattoriale di un numero utilizzando la ricorsione."
    ],
    [
        "Desarrolla una función en JavaScript que determine si una cadena de texto es un palíndromo, ignorando espacios y signos de puntuación."
    ],
    ["Implemente uma função em C# que verifique se uma matriz quadrada é simétrica."],
    [
        "Schreiben Sie eine Funktion in Swift, die eine gegebene Zeichenfolge in umgekehrter Reihenfolge zurückgibt, ohne integrierte Funktionen zu verwenden."
    ],
    [
        "Viết một hàm trong PHP để tìm tất cả các số nguyên tố trong một khoảng cho trước."
    ],
    [
        "파이썬을 사용하여 주어진 이진 트리가 이진 탐색 트리인지 확인하는 함수를 작성하십시오."
    ],
    [
        "用 Go 语言编写一个函数,计算给定字符串中每个字符出现的次数,并返回一个包含字符及其出现次数的映射。"
    ],
    [
        "Can you help me design a detailed project plan for developing a machine learning model for predicting stock prices?"
    ],
    [
        "Pouvez-vous m'aider à organiser un emploi du temps hebdomadaire pour maximiser la productivité de mon équipe de développement logiciel?"
    ],
    [
        "Puoi aiutarmi a creare un piano di sviluppo per un'applicazione mobile che gestisce le prenotazioni di ristoranti?"
    ],
    [
        "¿Podrías ayudarme a elaborar un plan detallado para la implementación de un sistema de gestión de contenido (CMS) en una empresa mediana?"
    ],
    [
        "Você pode me ajudar a planejar uma estratégia de desenvolvimento para um sistema de comércio eletrônico escalável?"
    ],
    [
        "Können Sie mir helfen, einen detaillierten Zeitplan für die Implementierung eines neuen ERP-Systems in unserem Unternehmen zu erstellen?"
    ],
    [
        "Bạn có thể giúp tôi xây dựng một kế hoạch phát triển chi tiết cho dự án xây dựng hệ thống quản lý chuỗi cung ứng không?"
    ],
    [
        "신경망 기반 이미지 인식 모델 개발을 위한 세부 프로젝트 계획을 세우는 데 도움을 줄 수 있나요?"
    ],
    ["你能帮我制定一个详细的开发计划,用于创建一个基于区块链的分布式账本系统吗?"],
    [
        "Prove that the sum of the squares of any two sides of a right triangle is equal to the square of the hypotenuse."
    ],
    [
        "Calculez la force gravitationnelle entre deux masses de 10 kg chacune séparées par une distance de 1 mètre."
    ],
    [
        "Determina la formula molecolare di un composto che contiene il 40% di carbonio, il 6.67% di idrogeno e il 53.33% di ossigeno in massa."
    ],
    [
        "Explica la teoría del ciclo económico de Schumpeter y cómo se aplica a la economía moderna."
    ],
    [
        "Calcule a energia potencial gravitacional de um objeto de 5 kg a uma altura de 10 metros acima do solo (g = 9,8 m/s²)."
    ],
    [
        "Beweisen Sie, dass jede Primzahl der Form 4k+1 als Summe zweier Quadrate geschrieben werden kann."
    ],
    [
        "Tính nồng độ mol của dung dịch H₂SO₄ khi hoà tan 98 gam H₂SO₄ vào nước để được 1 lít dung dịch."
    ],
    ["케인스 경제학의 핵심 개념과 그것이 현대 경제 정책에 미치는 영향을 설명하십시오."],
    ["计算一个质量为2 kg的物体在3米高处的重力势能(g = 9.8 m/s²)。"],
    [
        'Identify the author of a novel that features a dystopian society where "Big Brother" watches over its citizens and the protagonist works for the Ministry of Truth.'
    ],
    [
        "Quel est le seul mammifère capable de voler activement, souvent associé à la nuit et capable d'écholocalisation?"
    ],
    [
        "Qual è l'opera letteraria italiana che narra il viaggio immaginario di un poeta attraverso Inferno, Purgatorio e Paradiso, guidato da Virgilio e Beatrice?"
    ],
    [
        "¿Qué insecto es conocido por su organización social compleja, su capacidad para producir miel y su comunicación mediante la danza?"
    ],
    [
        "Qual é o fenômeno atmosférico que ocorre quando uma massa de ar quente se encontra com uma massa de ar frio, resultando em uma violenta tempestade giratória?"
    ],
    [
        "Welches literarische Werk beschreibt die Geschichte eines jungen Mädchens, das durch einen Kaninchenbau in eine fantastische Welt voller skurriler Charaktere fällt?"
    ],
    [
        "Động vật nào có thể tái sinh toàn bộ cơ thể từ một mảnh nhỏ của chính nó, thường sống dưới nước và có thể có nhiều xúc tu?"
    ],
    [
        "어떤 자연 현상은 태양빛이 대기 중의 물방울에 반사되고 굴절되어 발생하며, 하늘에 나타나는 여러 색깔의 아치 형태를 띠나요?"
    ],
    ["这部文学作品讲述了一位绅士和他的侍从的冒险故事,他们在"],
    [
        "Can you derive the Euler-Lagrange equation from the principle of stationary action in classical mechanics?"
    ],
    [
        "Expliquez la notion de « différence ontologique » chez Martin Heidegger et son importance pour la phénoménologie."
    ],
    [
        "Qual è il significato simbolico del colore blu nei dipinti di Giotto di Bondone durante il Rinascimento?"
    ],
    [
        "¿Cómo afecta el cambio de código a la estructura gramatical en comunidades bilingües de habla español-inglés?"
    ],
    [
        "Qual é o impacto da política monetária não convencional no controle da inflação durante uma crise econômica?"
    ],
    [
        "Erklären Sie den Unterschied zwischen deterministischen und nicht-deterministischen endlichen Automaten und ihre Anwendungsbereiche."
    ],
    [
        "Giải thích cơ chế của quá trình phiên mã ngược (reverse transcription) và tầm quan trọng của nó trong nghiên cứu HIV/AIDS."
    ],
    ["조선시대 성리학이 한국 사회와 문화에 미친 영향을 설명하세요."],
    ["如何解释量子纠缠现象,以及它在量子计算中的潜在应用?"],
    [
        "How can you design a daily schedule that maximizes productivity for a remote worker who has multiple meetings and project deadlines?"
    ],
    [
        "Quels sont les meilleures stratégies pour gérer les conflits au sein d'une équipe multiculturelle travaillant sur un projet commun?"
    ],
    [
        "Quali sono i migliori consigli per mantenere un equilibrio tra vita professionale e vita privata in un ambiente lavorativo stressante?"
    ],
    [
        "¿Cómo se puede elaborar un plan financiero personal efectivo que incluya ahorro para la jubilación, inversión y manejo de deudas?"
    ],
    [
        "Quais são as melhores práticas para implementar metodologias ágeis em uma equipe de desenvolvimento de software?"
    ],
    [
        "Welche Strategien können verwendet werden, um ein starkes berufliches Netzwerk aufzubauen und zu pflegen, insbesondere in der Tech-Branche?"
    ],
    [
        "Những bước nào cần thiết để xây dựng một lộ trình phát triển sự nghiệp bền vững trong lĩnh vực công nghệ thông tin?"
    ],
    ["프로젝트의 범위 변동을 효과적으로 관리하기 위한 최고의 방법은 무엇인가요?"],
    ["在快速变化的职场环境中,如何有效地实现工作与生活的平衡?"],
    [
        "Write an argumentative essay discussing the pros and cons of artificial intelligence in the workplace, including potential ethical concerns."
    ],
    [
        "Analysez les impacts sociaux et économiques de la digitalisation sur les petites entreprises en France."
    ],
    [
        "Scrivi un'email formale al direttore di una rivista per proporre un articolo sulla sostenibilità ambientale nelle città italiane."
    ],
    [
        "Elabora un informe detallado sobre los efectos del cambio climático en la biodiversidad de la región amazónica."
    ],
    [
        "Analise criticamente os principais pontos abordados no relatório anual do Banco Mundial sobre a pobreza global."
    ],
    [
        "Erstellen Sie eine technische Dokumentation für die Implementierung eines neuen Software-Features in einer bestehenden Anwendung."
    ],
    [
        "Viết một bài luận phân tích về tác động của cuộc cách mạng công nghiệp 4.0 đối với thị trường lao động Việt Nam."
    ],
    [
        "인공지능의 윤리적 문제에 대한 연구 논문을 작성하고, 다양한 사례를 통해 그 영향을 분석하세요."
    ],
    ["分析鲁迅的小说《阿Q正传》中反映的中国社会问题和作者的批判态度。"],
]

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


if torch.cuda.is_available():
    model_id = "ghost-x/ghost-8b-beta"
    hf_serect = os.getenv("HF_TOKEN", None)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2",
        trust_remote_code=True,
        token=hf_serect,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        trust_remote_code=True,
        token=hf_serect,
    )

waiting_tools_timeout = 5
supported_tools = json.dumps(
    [
        {
            "type": "function",
            "function": {
                "name": "search_on_internet",
                "description": "Use this tool to search for information on the internet to answer questions you are unsure about, don't know or need the latest information (e.g. news, reports, companies, people,...) to give the most accurate results. Note: can only be used or ignored, not asked again",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "keyword": {
                            "type": "string",
                            "description": "Search keywords, rephrase to optimize search results based on questions suitable to the specified search type.",
                            "required": True,
                        },
                        "type": {
                            "type": "string",
                            "description": "Search type, based on the question to determine whether to search for it in 'wikipedia' or 'google', prefer to use wikipedia for information about events, history and people.",
                            "enum": ["wikipedia", "google"],
                            "default": "google",
                            "required": True,
                        },
                        "language": {
                            "type": "string",
                            "description": "Search language, is the user language code with 2 letters, e.g: vi = vietnamese, en = english.",
                            "default": "en",
                            "required": True,
                        },
                    },
                },
            },
        }
    ],
    ensure_ascii=False,
)


@lru_cache(maxsize=128)
def extract_text_from_webpage(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
        tag.extract()
    visible_text = soup.get_text(strip=True, separator=" ")
    return visible_text


def search_with_wikipedia(
    query: str,
    language: str = "en",
):
    all_results = []
    try:
        wikipedia.set_lang(language)
        all_results.append(wikipedia.summary(query))
    except Exception as e:
        pass
    return all_results


def search_with_google(
    query: str,
    num_results: int = 3,
    timeout: int = 5,
    language: str = "en",
    ssl_verify: bool = None,
):
    all_results = []
    max_chars_per_page = 4096
    with requests.Session() as session:
        resp = session.get(
            url="https://www.google.com/search",
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
            },
            params={
                "q": query,
                "num": num_results,
                "udm": 14,
                "hl": language,
            },
            timeout=timeout,
            verify=ssl_verify,
        )
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        result_block = soup.find_all("div", attrs={"class": "g"})
        for result in result_block:
            link = result.find("a", href=True)
            if link:
                link = link["href"]
                try:
                    webpage = session.get(
                        link,
                        headers={
                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
                        },
                    )
                    webpage.raise_for_status()
                    visible_text = extract_text_from_webpage(webpage.text)
                    if len(visible_text) > max_chars_per_page:
                        visible_text = visible_text[:max_chars_per_page]
                    all_results.append({"link": link, "text": visible_text})
                except requests.exceptions.RequestException as e:
                    print(f"Error fetching or processing {link}: {e}")
                    pass
            else:
                pass
    return all_results


@spaces.GPU(duration=120)
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    allow_used_tools: bool = True,
    system_prompt: str = "",
    max_new_tokens: int = 1536,
    temperature: float = 0.4,
    top_p: float = 0.95,
    top_k: int = 50,
    repetition_penalty: float = 1.0,
    other_client_info: str = None,
) -> Iterator[str]:
    # print()
    # print("allow_used_tools:\n", allow_used_tools)
    # print("system_prompt:\n", system_prompt)
    # print("max_new_tokens:\n", max_new_tokens)
    # print("temperature:\n", temperature)

    def build_input_ids(
        apply_tools: bool = None,
        references=None,
    ):
        conversation = []
        if system_prompt:
            conversation.append({"role": "system", "content": system_prompt})
        if apply_tools is True:
            conversation.append({"role": "tools", "content": supported_tools})

        if references is None:
            references = [other_client_info]
        else:
            references.insert(0, other_client_info)

        if (
            references is not None
            and isinstance(references, list)
            and len(references) > 0
        ):
            conversation.append(
                {
                    "role": "refs",
                    "content": json.dumps(
                        {
                            "instructions": "These are only general documents used for reference to give the most accurate and honest answers possible. Ignore it if it's irrelevant and don't overuse it.",
                            "documents": references,
                        },
                        indent=2,
                        ensure_ascii=False,
                    ),
                }
            )

        for user, assistant in chat_history:
            conversation.extend(
                [
                    {"role": "user", "content": user},
                    {"role": "assistant", "content": assistant},
                ]
            )
        conversation.append({"role": "user", "content": message})

        input_ids = tokenizer.apply_chat_template(
            conversation, add_generation_prompt=True, return_tensors="pt"
        )
        input_ids = input_ids.to(model.device)
        if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
            input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
            gr.Warning(
                f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens."
            )
        return input_ids

    def generate_chat_responses(
        previous_response: str = None,
    ):
        document_references = []
        if previous_response is not None:
            scheduled_tools_runs = None
            try:
                scheduled_tools_runs = json.loads(previous_response)
                if scheduled_tools_runs["type"] == "function" and scheduled_tools_runs[
                    "name"
                ] in ["search_on_internet"]:
                    pass
                else:
                    scheduled_tools_runs = None
            except Exception as e:
                print(e)
                pass

            if (
                scheduled_tools_runs is not None
                and scheduled_tools_runs["name"] == "search_on_internet"
            ):
                keyword = scheduled_tools_runs["arguments"]["keyword"]
                search_type = scheduled_tools_runs["arguments"]["type"]
                language = scheduled_tools_runs["arguments"]["language"]
                print("scheduled_tools_runs:", scheduled_tools_runs)
                if search_type == "wikipedia":
                    gr.Info(
                        "Searching for information on the Wikipedia.",
                        duration=5,
                        visible=True,
                    )
                    document_references.extend(
                        search_with_wikipedia(query=keyword, language=language)
                    )

                gr.Info("Searching for information on the Google.")
                document_references.extend(
                    search_with_google(
                        query=keyword,
                        language=language,
                        num_results=3,
                        # num_results=2 if search_type == "wikipedia" else 3,
                    )
                )
                print("document_references:", document_references)

        apply_tools = (
            True if allow_used_tools is True and previous_response is None else False
        )
        input_ids = build_input_ids(
            apply_tools=apply_tools,
            references=document_references,
        )
        streamer = TextIteratorStreamer(
            tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
        )
        generate_kwargs = dict(
            input_ids=input_ids,
            streamer=streamer,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            repetition_penalty=repetition_penalty,
        )
        if temperature == 0:
            generate_kwargs["do_sample"] = False
        else:
            generate_kwargs["temperature"] = temperature
            generate_kwargs["top_p"] = top_p
            generate_kwargs["top_k"] = top_k

        t = Thread(target=model.generate, kwargs=generate_kwargs)
        t.start()

        state = {
            "mark": None,
            "respond": False,
        }
        outputs = []
        for text in streamer:
            if state["mark"] is None:
                state["mark"] = time.time()
            outputs.append(text)
            if (
                apply_tools is False
                or state["mark"] + waiting_tools_timeout < time.time()
            ):
                state["respond"] = True
                yield "".join(outputs)

        if (
            apply_tools is True
            and state["respond"] is False
            and state["mark"] + waiting_tools_timeout > time.time()
        ):
            previous_response = "".join(outputs)
            yield from generate_chat_responses(previous_response=previous_response)

    yield from generate_chat_responses(previous_response=None)


chatbot = gr.Chatbot(
    height=500, placeholder=PLACEHOLDER, label="Ghost 8B Beta", show_copy_button=True
)

chat_interface = gr.ChatInterface(
    fn=generate,
    chatbot=chatbot,
    fill_height=True,
    additional_inputs=[
        gr.Checkbox(
            label="Allow used tools (available: search on internet)", value=False
        ),
        gr.Textbox(label="System prompt", lines=6),
        gr.Slider(
            label="Max new tokens",
            minimum=1,
            maximum=MAX_MAX_NEW_TOKENS,
            step=1,
            value=DEFAULT_MAX_NEW_TOKENS,
        ),
        gr.Slider(
            label="Temperature",
            minimum=0.0,
            maximum=2.0,
            step=0.1,
            value=0.4,
        ),
        gr.Slider(
            label="Top-p (nucleus sampling)",
            minimum=0.05,
            maximum=1.0,
            step=0.05,
            value=0.95,
        ),
        gr.Slider(
            label="Top-k",
            minimum=1,
            maximum=100,
            step=1,
            value=50,
        ),
        gr.Slider(
            label="Repetition penalty",
            minimum=1.0,
            maximum=2.0,
            step=0.05,
            value=1.0,
        ),
        gr.Textbox(
            label="Other client information",
            lines=1,
            value="This user's current time: {}".format(time.strftime("%Y-%m-%d")),
            visible=False,
        ),
    ],
    stop_btn="Stop",
    cache_examples=False,
    examples=EXAMPLES,
    examples_per_page=9,
    concurrency_limit=100,
)

with gr.Blocks(fill_height=True, css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    chat_interface.render()
    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.queue(max_size=20).launch(share=True)