File size: 4,209 Bytes
6b43b0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f68fb03
5ed0967
f68fb03
 
40c0bf4
6b43b0b
 
f68fb03
 
 
 
 
6b43b0b
 
 
 
 
f68fb03
 
 
 
 
 
 
 
 
 
 
 
6b43b0b
 
f68fb03
 
 
 
 
 
6b43b0b
 
 
f68fb03
6b43b0b
 
f68fb03
 
6b43b0b
f68fb03
40c0bf4
ca15e30
6b43b0b
f68fb03
5ed0967
f68fb03
 
 
 
 
5ed0967
6b43b0b
f68fb03
5ed0967
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# import os
# import gradio as gr
# import numpy as np
# from transformers import AutoTokenizer, AutoModel
# import time 
# # :white_check_mark: Setup environment
# os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
# hf_token = os.environ.get("HF_TOKEN")
# if not hf_token:
#     raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")
# # :white_check_mark: Load model and tokenizer
# text_tokenizer = AutoTokenizer.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# )
# text_model = AutoModel.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# )
# # :white_check_mark: Embedding function
# def get_text_embeddings(text):
#     """
#     Converts input text into a dense embedding using the Nomic embedding model.
#     These embeddings are used to query Qdrant for semantically relevant document chunks.
#     """
#     inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
#     outputs = text_model(**inputs)
#     embeddings = outputs.last_hidden_state.mean(dim=1)
#     return embeddings[0].detach().numpy()
# # :white_check_mark: Gradio interface function
# def embed_text_interface(text):
#     strt_time=time.time()
#     embedding = get_text_embeddings(text)
#     print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
#     return str(embedding)
# # :white_check_mark: Gradio UI
# interface = gr.Interface(
#     fn=embed_text_interface,
#     inputs=gr.Textbox(label="Enter text to embed", lines=5),
#     outputs=gr.Textbox(label="Embedding vector"),
#     title="Text Embedding with Nomic AI",
#     description="Enter some text, and get its embedding vector using Nomic's embedding model."
# )
# # :white_check_mark: Launch the app
# if __name__ == "__main__":
#     interface.launch()


import os
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModel
import time 
import torch

# :white_check_mark: Setup environment
os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
    raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# :white_check_mark: Load model and tokenizer
text_tokenizer = AutoTokenizer.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
)
text_model = AutoModel.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
).to(device)  # Move model to GPU if available

# :white_check_mark: Embedding function
def get_text_embeddings(text):
    """
    Converts input text into a dense embedding using the Nomic embedding model.
    These embeddings are used to query Qdrant for semantically relevant document chunks.
    """
    inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)  # Move inputs to same device as model
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = text_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings[0].cpu().numpy()  # Move back to CPU for numpy conversion

# :white_check_mark: Gradio interface function
def embed_text_interface(text):
    strt_time = time.time()
    embedding = get_text_embeddings(text)
    print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
    return embedding

# :white_check_mark: Gradio UI
interface = gr.Interface(
    fn=embed_text_interface,
    inputs=gr.Textbox(label="Enter text to embed", lines=5),
    outputs=gr.Textbox(label="Embedding vector"),
    title="Text Embedding with Nomic AI",
    description="Enter some text, and get its embedding vector using Nomic's embedding model."
)

# :white_check_mark: Launch the app
if __name__ == "__main__":
    interface.launch()