ruslanmv commited on
Commit
70c89ac
·
0 Parent(s):

Initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. Dockerfile +25 -0
  3. README.md +13 -0
  4. app.py +187 -0
  5. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ # Set up a new user named "user" with user ID 1000
10
+ RUN useradd -m -u 1000 user
11
+ # Switch to the "user" user
12
+ USER user
13
+ # Set home to the user's home directory
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ # Set the working directory to the user's home directory
18
+ WORKDIR $HOME/app
19
+
20
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
21
+ COPY --chown=user . $HOME/app
22
+
23
+ EXPOSE 7860
24
+
25
+ CMD python3 -u app.py
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Milvus Client Embedding
3
+ emoji: 📚
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: apache-2.0
10
+ ---
11
+
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, OpenAIEmbeddings
4
+ from pymilvus import Collection, connections
5
+ import json
6
+ import os
7
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
+
9
+
10
+ MILVUS_COLLECTION = os.environ.get("MILVUS_COLLECTION", "LangChainCollection")
11
+
12
+ MILVUS_HOST = os.environ.get("MILVUS_HOST", "")
13
+ MILVUS_PORT = "19530"
14
+
15
+ EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "hkunlp/instructor-large")
16
+ EMBEDDING_LOADER = os.environ.get("EMBEDDING_LOADER", "HuggingFaceInstructEmbeddings")
17
+ EMBEDDING_LIST = ["HuggingFaceInstructEmbeddings", "HuggingFaceEmbeddings"]
18
+
19
+ # return top-k text chunks from vector store
20
+ TOP_K_DEFAULT = 15
21
+ TOP_K_MAX = 30
22
+ SCORE_DEFAULT = 0.33
23
+
24
+ BUTTON_MIN_WIDTH = 100
25
+
26
+ global g_emb
27
+ g_emb = None
28
+ global g_col
29
+ g_col = None
30
+
31
+ def init_emb(emb_name, emb_loader, db_col_textbox):
32
+
33
+ global g_emb
34
+ global g_col
35
+
36
+ g_emb = eval(emb_loader)(model_name=emb_name)
37
+
38
+ connections.connect(
39
+ host=MILVUS_HOST,
40
+ port=MILVUS_PORT
41
+ )
42
+
43
+ g_col = Collection(db_col_textbox)
44
+
45
+ g_col.load()
46
+
47
+ return (str(g_emb), str(g_col))
48
+
49
+
50
+ def get_emb():
51
+ return g_emb
52
+
53
+ def get_col():
54
+ return g_col
55
+
56
+
57
+ def remove_duplicates(documents, score_min):
58
+ seen_content = set()
59
+ unique_documents = []
60
+ for (doc, score) in documents:
61
+ if (doc.page_content not in seen_content) and (score >= score_min):
62
+ seen_content.add(doc.page_content)
63
+ unique_documents.append(doc)
64
+ return unique_documents
65
+
66
+
67
+ def get_data(query, top_k, score, db_col, db_index):
68
+ if not query:
69
+ return "Please init db in configuration"
70
+
71
+ embed_query = g_emb.embed_query(query)
72
+
73
+ search_params = {"metric_type": "L2",
74
+ "params": {"nprobe": 1},
75
+ "offset": 0}
76
+
77
+
78
+ results = g_col.search(
79
+ data=[embed_query],
80
+ anns_field="vector",
81
+ param=search_params,
82
+ limit=top_k,
83
+ expr=None,
84
+ output_fields=['source', 'text'],
85
+ consistency_level="Strong"
86
+ )
87
+
88
+ jsons = json.dumps([{'source': hit.entity.get('source'),
89
+ 'text': hit.entity.get('text')}
90
+ for hit in results[0]],
91
+ indent=0)
92
+
93
+ return jsons
94
+
95
+ with gr.Blocks(
96
+ title = "3GPP Database",
97
+ theme = "Base",
98
+ css = """.bigbox {
99
+ min-height:250px;
100
+ }
101
+ """) as demo:
102
+ with gr.Tab("Matching"):
103
+ with gr.Accordion("Vector similarity"):
104
+ with gr.Row():
105
+ with gr.Column():
106
+ top_k = gr.Slider(1,
107
+ TOP_K_MAX,
108
+ value=TOP_K_DEFAULT,
109
+ step=1,
110
+ label="Vector similarity top_k",
111
+ interactive=True)
112
+ with gr.Column():
113
+ score = gr.Slider(0.01,
114
+ 0.99,
115
+ value=SCORE_DEFAULT,
116
+ step=0.01,
117
+ label="Vector similarity score",
118
+ interactive=True)
119
+
120
+ with gr.Row():
121
+ with gr.Column(scale=10):
122
+ input_box = gr.Textbox(label = "Input", placeholder="What are you looking for?")
123
+ with gr.Column(scale=1, min_width=BUTTON_MIN_WIDTH):
124
+ btn_run = gr.Button("Run", variant="primary")
125
+
126
+ output_box = gr.JSON(label = "Output")
127
+
128
+
129
+ with gr.Tab("Configuration"):
130
+ with gr.Row():
131
+ btn_init = gr.Button("Init")
132
+
133
+ load_emb = gr.Textbox(get_emb, label = 'Embedding Client', show_label=True)
134
+ load_col = gr.Textbox(get_col, label = 'Milvus Collection', show_label=True)
135
+
136
+ with gr.Accordion("Embedding"):
137
+
138
+ with gr.Row():
139
+ with gr.Column():
140
+ emb_textbox = gr.Textbox(
141
+ label = "Embedding Model",
142
+ # show_label = False,
143
+ value = EMBEDDING_MODEL,
144
+ placeholder = "Paste Your Embedding Model Repo on HuggingFace",
145
+ lines=1,
146
+ interactive=True,
147
+ type='email')
148
+
149
+ with gr.Column():
150
+ emb_dropdown = gr.Dropdown(
151
+ EMBEDDING_LIST,
152
+ value=EMBEDDING_LOADER,
153
+ multiselect=False,
154
+ interactive=True,
155
+ label="Embedding Loader")
156
+
157
+ with gr.Accordion("Milvus Database"):
158
+ with gr.Row():
159
+ db_col_textbox = gr.Textbox(
160
+ label = "Milvus Collection",
161
+ # show_label = False,
162
+ value = MILVUS_COLLECTION,
163
+ placeholder = "Paste Your Milvus Collection (xx-xx-xx) and Hit ENTER",
164
+ lines=1,
165
+ interactive=True,
166
+ type='email')
167
+ db_index_textbox = gr.Textbox(
168
+ label = "Milvus Host",
169
+ # show_label = False,
170
+ value = MILVUS_HOST,
171
+ placeholder = "Paste Your Milvus Index (xxxx) and Hit ENTER",
172
+ lines=1,
173
+ interactive=True,
174
+ type='password')
175
+
176
+ btn_init.click(fn=init_emb,
177
+ inputs=[emb_textbox, emb_dropdown, db_col_textbox],
178
+ outputs=[load_emb, load_col])
179
+ btn_run.click(fn=get_data,
180
+ inputs=[input_box, top_k, score, db_col_textbox, db_index_textbox],
181
+ outputs=[output_box])
182
+
183
+ if __name__ == "__main__":
184
+ demo.queue()
185
+ demo.launch(server_name="0.0.0.0",
186
+ server_port=7860)
187
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pymilvus
2
+ langchain
3
+ gradio
4
+ InstructorEmbedding
5
+ sentence_transformers