Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- .gitignore +1 -0
- README.md +6 -2
- app/_config.py +3 -7
- app/components/llm/component.py +1 -1
- app/main.py +0 -4
- app/server/chat/service.py +3 -12
- app/ui/ui.py +59 -6
- imgs.py +29 -0
- index.sh +22 -0
- pyproject.toml +8 -6
.gitignore
CHANGED
@@ -178,3 +178,4 @@ coverage_report/
|
|
178 |
local_data/
|
179 |
models/
|
180 |
.DS_Store
|
|
|
|
178 |
local_data/
|
179 |
models/
|
180 |
.DS_Store
|
181 |
+
/app/ui/multimodalchatbot
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: discord-bot
|
3 |
-
app_file: app/
|
4 |
sdk: gradio
|
5 |
-
sdk_version: 4.
|
6 |
---
|
7 |
# Capstone Project
|
8 |
|
@@ -40,6 +40,10 @@ Download embedding and(or) LLM models
|
|
40 |
```shell
|
41 |
bash prestart.sh
|
42 |
```
|
|
|
|
|
|
|
|
|
43 |
|
44 |
### Install `pre-commit` hooks
|
45 |
|
|
|
1 |
---
|
2 |
title: discord-bot
|
3 |
+
app_file: app/__main__.py
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.26.0
|
6 |
---
|
7 |
# Capstone Project
|
8 |
|
|
|
40 |
```shell
|
41 |
bash prestart.sh
|
42 |
```
|
43 |
+
Download web images and creating indices
|
44 |
+
```shell
|
45 |
+
bash index.sh
|
46 |
+
```
|
47 |
|
48 |
### Install `pre-commit` hooks
|
49 |
|
app/_config.py
CHANGED
@@ -15,8 +15,9 @@ class Settings(BaseSettings):
|
|
15 |
|
16 |
WEAVIATE_CLIENT_URL: str = "http://localhost:8080"
|
17 |
|
18 |
-
LLM_MODE: Literal["openai", "mock", "local"] = "
|
19 |
-
EMBEDDING_MODE: Literal["openai", "mock", "local"] = "
|
|
|
20 |
|
21 |
LOCAL_DATA_FOLDER: str = "local_data/test"
|
22 |
|
@@ -44,11 +45,6 @@ class Settings(BaseSettings):
|
|
44 |
IS_UI_ENABLED: bool = True
|
45 |
UI_PATH: str = "/"
|
46 |
|
47 |
-
# Rerank
|
48 |
-
IS_RERANK_ENABLED: bool = True
|
49 |
-
RERANK_TOP_N: int = 3
|
50 |
-
RERANK_MODEL_NAME: str = "cross-encoder/ms-marco-MiniLM-L-2-v2"
|
51 |
-
|
52 |
class Config:
|
53 |
case_sensitive = True
|
54 |
env_file_encoding = "utf-8"
|
|
|
15 |
|
16 |
WEAVIATE_CLIENT_URL: str = "http://localhost:8080"
|
17 |
|
18 |
+
LLM_MODE: Literal["openai", "mock", "local"] = "local"
|
19 |
+
EMBEDDING_MODE: Literal["openai", "mock", "local"] = "local"
|
20 |
+
IMG_DATASET: Literal["growstuff"] = "growstuff"
|
21 |
|
22 |
LOCAL_DATA_FOLDER: str = "local_data/test"
|
23 |
|
|
|
45 |
IS_UI_ENABLED: bool = True
|
46 |
UI_PATH: str = "/"
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
class Config:
|
49 |
case_sensitive = True
|
50 |
env_file_encoding = "utf-8"
|
app/components/llm/component.py
CHANGED
@@ -42,7 +42,7 @@ class LLMComponent:
|
|
42 |
# set to at least 1 to use GPU
|
43 |
# set to -1 for all gpu
|
44 |
# set to 0 for cpu
|
45 |
-
model_kwargs={"n_gpu_layers":
|
46 |
# transform inputs into Llama2 format
|
47 |
messages_to_prompt=messages_to_prompt,
|
48 |
completion_to_prompt=completion_to_prompt,
|
|
|
42 |
# set to at least 1 to use GPU
|
43 |
# set to -1 for all gpu
|
44 |
# set to 0 for cpu
|
45 |
+
model_kwargs={"n_gpu_layers": -1},
|
46 |
# transform inputs into Llama2 format
|
47 |
messages_to_prompt=messages_to_prompt,
|
48 |
completion_to_prompt=completion_to_prompt,
|
app/main.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import logging
|
2 |
|
3 |
from fastapi import FastAPI
|
4 |
-
import gradio as gr
|
5 |
|
6 |
from app._config import settings
|
7 |
from app.components.embedding.component import EmbeddingComponent
|
@@ -37,6 +36,3 @@ if settings.IS_UI_ENABLED:
|
|
37 |
|
38 |
ui = PrivateGptUi(ingest_service, chat_service)
|
39 |
ui.mount_in_app(app, settings.UI_PATH)
|
40 |
-
|
41 |
-
io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox")
|
42 |
-
app = gr.mount_gradio_app(app, io, settings.UI_PATH)
|
|
|
1 |
import logging
|
2 |
|
3 |
from fastapi import FastAPI
|
|
|
4 |
|
5 |
from app._config import settings
|
6 |
from app.components.embedding.component import EmbeddingComponent
|
|
|
36 |
|
37 |
ui = PrivateGptUi(ingest_service, chat_service)
|
38 |
ui.mount_in_app(app, settings.UI_PATH)
|
|
|
|
|
|
app/server/chat/service.py
CHANGED
@@ -3,11 +3,9 @@ from dataclasses import dataclass
|
|
3 |
from llama_index import ServiceContext, StorageContext, VectorStoreIndex
|
4 |
from llama_index.chat_engine import ContextChatEngine
|
5 |
from llama_index.chat_engine.types import BaseChatEngine
|
6 |
-
from llama_index.core.postprocessor import SentenceTransformerRerank
|
7 |
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
8 |
from llama_index.llms import ChatMessage, MessageRole
|
9 |
|
10 |
-
from app._config import settings
|
11 |
from app.components.embedding.component import EmbeddingComponent
|
12 |
from app.components.llm.component import LLMComponent
|
13 |
from app.components.node_store.component import NodeStoreComponent
|
@@ -79,20 +77,13 @@ class ChatService:
|
|
79 |
index=self.index
|
80 |
)
|
81 |
|
82 |
-
node_postprocessors = [
|
83 |
-
MetadataReplacementPostProcessor(target_metadata_key="window")
|
84 |
-
]
|
85 |
-
if settings.IS_RERANK_ENABLED:
|
86 |
-
rerank = SentenceTransformerRerank(
|
87 |
-
top_n=settings.RERANK_TOP_N, model=settings.RERANK_MODEL_NAME
|
88 |
-
)
|
89 |
-
node_postprocessors.append(rerank)
|
90 |
-
|
91 |
return ContextChatEngine.from_defaults(
|
92 |
system_prompt=system_prompt,
|
93 |
retriever=vector_index_retriever,
|
94 |
service_context=self.service_context,
|
95 |
-
node_postprocessors=
|
|
|
|
|
96 |
)
|
97 |
|
98 |
def chat(self, messages: list[ChatMessage]):
|
|
|
3 |
from llama_index import ServiceContext, StorageContext, VectorStoreIndex
|
4 |
from llama_index.chat_engine import ContextChatEngine
|
5 |
from llama_index.chat_engine.types import BaseChatEngine
|
|
|
6 |
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
|
7 |
from llama_index.llms import ChatMessage, MessageRole
|
8 |
|
|
|
9 |
from app.components.embedding.component import EmbeddingComponent
|
10 |
from app.components.llm.component import LLMComponent
|
11 |
from app.components.node_store.component import NodeStoreComponent
|
|
|
77 |
index=self.index
|
78 |
)
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
return ContextChatEngine.from_defaults(
|
81 |
system_prompt=system_prompt,
|
82 |
retriever=vector_index_retriever,
|
83 |
service_context=self.service_context,
|
84 |
+
node_postprocessors=[
|
85 |
+
MetadataReplacementPostProcessor(target_metadata_key="window"),
|
86 |
+
],
|
87 |
)
|
88 |
|
89 |
def chat(self, messages: list[ChatMessage]):
|
app/ui/ui.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
"""This file should be imported only and only if you want to run the UI locally."""
|
|
|
2 |
import itertools
|
3 |
import logging
|
4 |
from pathlib import Path
|
|
|
5 |
from typing import Any
|
|
|
|
|
6 |
|
7 |
import gradio as gr
|
8 |
from fastapi import FastAPI
|
9 |
from gradio.themes.utils.colors import slate
|
10 |
-
from llama_index.llms import ChatMessage, MessageRole
|
11 |
|
|
|
12 |
from app._config import settings
|
13 |
from app.components.embedding.component import EmbeddingComponent
|
14 |
from app.components.llm.component import LLMComponent
|
@@ -18,6 +22,7 @@ from app.enums import PROJECT_ROOT_PATH
|
|
18 |
from app.server.chat.service import ChatService
|
19 |
from app.server.ingest.service import IngestService
|
20 |
from app.ui.schemas import Source
|
|
|
21 |
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
@@ -28,6 +33,9 @@ UI_TAB_TITLE = "Agriculture Chatbot"
|
|
28 |
|
29 |
SOURCES_SEPARATOR = "\n\n Sources: \n"
|
30 |
|
|
|
|
|
|
|
31 |
|
32 |
class PrivateGptUi:
|
33 |
def __init__(
|
@@ -40,11 +48,43 @@ class PrivateGptUi:
|
|
40 |
|
41 |
# Cache the UI blocks
|
42 |
self._ui_block = None
|
43 |
-
|
44 |
# Initialize system prompt
|
45 |
self._system_prompt = self._get_default_system_prompt()
|
46 |
|
47 |
-
def _chat(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def build_history() -> list[ChatMessage]:
|
49 |
history_messages: list[ChatMessage] = list(
|
50 |
itertools.chain(
|
@@ -53,7 +93,11 @@ class PrivateGptUi:
|
|
53 |
ChatMessage(content=interaction[0], role=MessageRole.USER),
|
54 |
ChatMessage(
|
55 |
# Remove from history content the Sources information
|
56 |
-
content=
|
|
|
|
|
|
|
|
|
57 |
role=MessageRole.ASSISTANT,
|
58 |
),
|
59 |
]
|
@@ -142,6 +186,12 @@ class PrivateGptUi:
|
|
142 |
|
143 |
with gr.Row(equal_height=False):
|
144 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
upload_button = gr.components.UploadButton(
|
146 |
"Upload File(s)",
|
147 |
type="filepath",
|
@@ -172,7 +222,6 @@ class PrivateGptUi:
|
|
172 |
interactive=True,
|
173 |
render=False,
|
174 |
)
|
175 |
-
|
176 |
# On blur, set system prompt to use in queries
|
177 |
system_prompt_input.blur(
|
178 |
self._set_system_prompt,
|
@@ -192,7 +241,11 @@ class PrivateGptUi:
|
|
192 |
AVATAR_BOT,
|
193 |
),
|
194 |
),
|
195 |
-
additional_inputs=[
|
|
|
|
|
|
|
|
|
196 |
)
|
197 |
return blocks
|
198 |
|
|
|
1 |
"""This file should be imported only and only if you want to run the UI locally."""
|
2 |
+
|
3 |
import itertools
|
4 |
import logging
|
5 |
from pathlib import Path
|
6 |
+
import subprocess
|
7 |
from typing import Any
|
8 |
+
import os
|
9 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
10 |
|
11 |
import gradio as gr
|
12 |
from fastapi import FastAPI
|
13 |
from gradio.themes.utils.colors import slate
|
|
|
14 |
|
15 |
+
from llama_index.llms import MessageRole, ChatMessage
|
16 |
from app._config import settings
|
17 |
from app.components.embedding.component import EmbeddingComponent
|
18 |
from app.components.llm.component import LLMComponent
|
|
|
22 |
from app.server.chat.service import ChatService
|
23 |
from app.server.ingest.service import IngestService
|
24 |
from app.ui.schemas import Source
|
25 |
+
from app.paths import local_data_path
|
26 |
|
27 |
logger = logging.getLogger(__name__)
|
28 |
|
|
|
33 |
|
34 |
SOURCES_SEPARATOR = "\n\n Sources: \n"
|
35 |
|
36 |
+
model_name = "VietAI/envit5-translation"
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
38 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
39 |
|
40 |
class PrivateGptUi:
|
41 |
def __init__(
|
|
|
48 |
|
49 |
# Cache the UI blocks
|
50 |
self._ui_block = None
|
|
|
51 |
# Initialize system prompt
|
52 |
self._system_prompt = self._get_default_system_prompt()
|
53 |
|
54 |
+
def _chat(
|
55 |
+
self,
|
56 |
+
message: str,
|
57 |
+
history: list[list[str]],
|
58 |
+
upload_button: Any,
|
59 |
+
system_prompt_input: Any,
|
60 |
+
# show_image: bool,
|
61 |
+
) -> Any:
|
62 |
+
# logger.info(f"Show image = {show_image}")
|
63 |
+
if "#ảnh" in message:
|
64 |
+
message = message.replace("#ảnh","")
|
65 |
+
vi_message = "vi: " + message
|
66 |
+
outputs = model.generate(tokenizer([vi_message], return_tensors="pt", padding=True).input_ids, max_length=512)
|
67 |
+
en_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].replace('en:','')
|
68 |
+
command = f"""
|
69 |
+
cd {local_data_path}
|
70 |
+
clip-retrieval filter --query "{en_message}" --output_folder "retrieved_folder" --indice_folder "index_folder" --num_results 1
|
71 |
+
"""
|
72 |
+
logger.info(command)
|
73 |
+
subprocess.run(command, shell=True, check=True)
|
74 |
+
|
75 |
+
folder_path = f"{local_data_path}/retrieved_folder"
|
76 |
+
files = os.listdir(folder_path)
|
77 |
+
# sort images by most lately retrieved. Keep the old images to show them in chat history
|
78 |
+
files.sort(
|
79 |
+
key=lambda x: os.path.getctime(os.path.join(folder_path, x)),
|
80 |
+
reverse=True,
|
81 |
+
)
|
82 |
+
newest_image = files[0]
|
83 |
+
logger.info(f"Retrieve image {newest_image}")
|
84 |
+
|
85 |
+
return (os.path.relpath(f"{folder_path}/{newest_image}", PROJECT_ROOT_PATH),)
|
86 |
+
|
87 |
+
|
88 |
def build_history() -> list[ChatMessage]:
|
89 |
history_messages: list[ChatMessage] = list(
|
90 |
itertools.chain(
|
|
|
93 |
ChatMessage(content=interaction[0], role=MessageRole.USER),
|
94 |
ChatMessage(
|
95 |
# Remove from history content the Sources information
|
96 |
+
content=(
|
97 |
+
"[Image Output]"
|
98 |
+
if isinstance(interaction[1], tuple)
|
99 |
+
else (interaction[1]).split(SOURCES_SEPARATOR)[0]
|
100 |
+
),
|
101 |
role=MessageRole.ASSISTANT,
|
102 |
),
|
103 |
]
|
|
|
186 |
|
187 |
with gr.Row(equal_height=False):
|
188 |
with gr.Column(scale=3):
|
189 |
+
# image_checkbox = gr.Checkbox(
|
190 |
+
# label="Show Image",
|
191 |
+
# info="Do you want to output relevant image?",
|
192 |
+
# value=False,
|
193 |
+
# interactive=True,
|
194 |
+
# )
|
195 |
upload_button = gr.components.UploadButton(
|
196 |
"Upload File(s)",
|
197 |
type="filepath",
|
|
|
222 |
interactive=True,
|
223 |
render=False,
|
224 |
)
|
|
|
225 |
# On blur, set system prompt to use in queries
|
226 |
system_prompt_input.blur(
|
227 |
self._set_system_prompt,
|
|
|
241 |
AVATAR_BOT,
|
242 |
),
|
243 |
),
|
244 |
+
additional_inputs=[
|
245 |
+
upload_button,
|
246 |
+
system_prompt_input,
|
247 |
+
# image_checkbox,
|
248 |
+
],
|
249 |
)
|
250 |
return blocks
|
251 |
|
imgs.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app._config import settings
|
2 |
+
import logging
|
3 |
+
import requests
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
match settings.IMG_DATASET:
|
7 |
+
case "growstuff":
|
8 |
+
global urls, keyword
|
9 |
+
urls = [
|
10 |
+
"https://www.growstuff.org/harvests.json",
|
11 |
+
"https://www.growstuff.org/crops.json",
|
12 |
+
"https://www.growstuff.org/seeds.json"
|
13 |
+
]
|
14 |
+
keyword = "thumbnail_url"
|
15 |
+
|
16 |
+
thumbnail_urls = set()
|
17 |
+
|
18 |
+
for url in urls:
|
19 |
+
response = requests.get(url)
|
20 |
+
if response.status_code == 200:
|
21 |
+
data = response.json()['query']
|
22 |
+
thumbnail_urls.update(item.get(keyword) for item in data if item and item.get(keyword))
|
23 |
+
else:
|
24 |
+
logger.info(f"Failed to retrieve data from {url}.")
|
25 |
+
thumbnail_urls = list(thumbnail_urls)
|
26 |
+
with open(f'{settings.LOCAL_DATA_FOLDER}/myimglist.txt', 'w') as file:
|
27 |
+
for url in thumbnail_urls:
|
28 |
+
file.write(url + '\n')
|
29 |
+
logger.info(f"Retrieved {len(thumbnail_urls)} image urls and written to {settings.LOCAL_DATA_FOLDER}/myimglist.txt")
|
index.sh
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env bash
|
2 |
+
|
3 |
+
#pull images url from the web
|
4 |
+
python imgs.py
|
5 |
+
local_data_folder=$(python -c "from app._config import settings; print(settings.LOCAL_DATA_FOLDER)")
|
6 |
+
|
7 |
+
cd "$local_data_folder"
|
8 |
+
#remove if folders exists
|
9 |
+
rm -rf embeddings_folder
|
10 |
+
rm -rf image_folder
|
11 |
+
rm -rf index_folder
|
12 |
+
rm -rf retrieved_folder
|
13 |
+
|
14 |
+
echo "download image urls into image folder"
|
15 |
+
img2dataset --url_list=myimglist.txt --output_folder=image_folder --thread_count=64 --image_size=256
|
16 |
+
|
17 |
+
echo "create embedding folder"
|
18 |
+
#change --num_prepro_workers > 0 to enable multiprocessing
|
19 |
+
clip-retrieval inference --input_dataset image_folder --output_folder embeddings_folder --enable_text False --num_prepro_workers 0
|
20 |
+
|
21 |
+
echo "create indices from embedding folder"
|
22 |
+
clip-retrieval index --embeddings_folder embeddings_folder --index_folder index_folder
|
pyproject.toml
CHANGED
@@ -4,7 +4,10 @@ version = "0.1.0"
|
|
4 |
description = ""
|
5 |
authors = ["PhucVu <[email protected]>"]
|
6 |
readme = "README.md"
|
7 |
-
|
|
|
|
|
|
|
8 |
[tool.poetry.dependencies]
|
9 |
python = "^3.10"
|
10 |
llama-index = "^0.9.22"
|
@@ -15,15 +18,14 @@ uvicorn = "^0.25.0"
|
|
15 |
pydantic = "^2.5.3"
|
16 |
gradio = "^4.12.0"
|
17 |
|
18 |
-
# reranker
|
19 |
-
torch = {version="^2.3.0", optional=true}
|
20 |
-
sentence-transformers = {version="^2.7.0", optional=true}
|
21 |
-
|
22 |
[tool.poetry.group.local]
|
23 |
optional = true
|
24 |
[tool.poetry.group.local.dependencies]
|
|
|
|
|
|
|
|
|
25 |
transformers = "^4.36.2"
|
26 |
-
torch = "^2.1.2"
|
27 |
llama-cpp-python = "^0.2.29"
|
28 |
|
29 |
[build-system]
|
|
|
4 |
description = ""
|
5 |
authors = ["PhucVu <[email protected]>"]
|
6 |
readme = "README.md"
|
7 |
+
packages = [
|
8 |
+
{ include = "app" },
|
9 |
+
{ include = "app/**/*.py" },
|
10 |
+
]
|
11 |
[tool.poetry.dependencies]
|
12 |
python = "^3.10"
|
13 |
llama-index = "^0.9.22"
|
|
|
18 |
pydantic = "^2.5.3"
|
19 |
gradio = "^4.12.0"
|
20 |
|
|
|
|
|
|
|
|
|
21 |
[tool.poetry.group.local]
|
22 |
optional = true
|
23 |
[tool.poetry.group.local.dependencies]
|
24 |
+
torch = "1.13.1"
|
25 |
+
clip-retrieval = "^2.44.0"
|
26 |
+
img2dataset = "^1.44.1"
|
27 |
+
ipython = "^8.20.0"
|
28 |
transformers = "^4.36.2"
|
|
|
29 |
llama-cpp-python = "^0.2.29"
|
30 |
|
31 |
[build-system]
|