asynchronousai commited on
Commit
9b0ab3a
·
verified ·
1 Parent(s): 7a42459

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -54
app.py CHANGED
@@ -1,62 +1,30 @@
1
  import gradio as gr
2
- import numpy as np
3
- import json
4
- import pickle as pkl
5
- from transformers import AutoTokenizer
6
- import re
7
- # Vector Loader
8
- vectors = pkl.load(open("vectors.pkl", "rb"))
9
- vocab = [word.lower() for word in vectors.keys()]
10
 
11
- # Tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
13
- def make_alphanumeric(input_string):
14
- return re.sub(r'[^a-zA-Z0-9 ]', '', input_string)
15
 
16
- def tokenize(text):
17
- # Check data
18
- if len(text) == 0:
19
- gr.Error("No text provided.")
20
- elif len(text) > 4096:
21
- gr.Error("Text too long.")
22
-
23
- # Filter
24
- text = make_alphanumeric(text.lower())
25
- pre_tokenize_result = tokenizer._tokenizer.pre_tokenizer.pre_tokenize_str(text)
26
- pre_tokenized_text = [word for word, offset in pre_tokenize_result]
27
-
28
- tokens = []
29
- for word in pre_tokenized_text:
30
- if word in vocab:
31
- tokens.append(word)
32
- return tokens
33
 
 
 
 
 
34
 
35
- # Interface
36
- def onInput(paragraph, progress = gr.Progress()):
37
- tokens = tokenize(paragraph)
38
-
39
- if not tokens: # Handle case with no tokens found
40
- return np.zeros(300).tolist() # Return a zero vector of appropriate dimension
41
-
42
- merged_vector = np.zeros(300) # Assuming vectors are 300-dimensional
43
 
44
- # Merge vectors using NumPy
45
- totalTokens = len(tokens)
46
- for ind, token in enumerate(tokens):
47
- completion = 0.2*((ind+1)/totalTokens)
48
- progress(0.6 + completion, f"Merging {token}, Token #{tokens.index(token)+1}/{len(tokens)}")
49
 
50
- if token not in vectors:
51
- continue
52
-
53
- vector = vectors[token]
54
- merged_vector += vector
55
-
56
- # Normalize
57
- merged_vector /= len(tokens)
58
-
59
- return merged_vector.tolist(), json.dumps(tokens)
60
 
61
- demo = gr.Interface(fn=onInput, inputs="text", outputs=["text", "json"])
62
- demo.launch()
 
1
  import gradio as gr
2
+ from vectordb import Memory
 
 
 
 
 
 
 
3
 
4
+ # Initialize Memory
5
+ memory = Memory()
 
 
6
 
7
+ # Save some example data
8
+ memory.save(
9
+ ["apples are green", "oranges are orange"], # save your text content
10
+ [{"url": "https://apples.com"}, {"url": "https://oranges.com"}], # associate metadata
11
+ )
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Define a function for querying
14
+ def search_query(query):
15
+ results = memory.search(query, top_n=1) # Search for top result
16
+ return results
17
 
18
+ # Create Gradio interface
19
+ with gr.Blocks() as demo:
20
+ gr.Markdown("### VectorDB Search")
 
 
 
 
 
21
 
22
+ with gr.Row():
23
+ input_query = gr.Textbox(label="Enter your query")
24
+ output_result = gr.Textbox(label="Search Results", interactive=False)
 
 
25
 
26
+ search_button = gr.Button("Search")
27
+ search_button.click(search_query, inputs=input_query, outputs=output_result)
 
 
 
 
 
 
 
 
28
 
29
+ # Run the Gradio app
30
+ demo.launch()