# import os # import gradio as gr # import numpy as np # from transformers import AutoTokenizer, AutoModel # import time # # :white_check_mark: Setup environment # os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True) # hf_token = os.environ.get("HF_TOKEN") # if not hf_token: # raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.") # # :white_check_mark: Load model and tokenizer # text_tokenizer = AutoTokenizer.from_pretrained( # "nomic-ai/nomic-embed-text-v1.5", # trust_remote_code=True, # token=hf_token, # cache_dir=os.environ["HF_HOME"] # ) # text_model = AutoModel.from_pretrained( # "nomic-ai/nomic-embed-text-v1.5", # trust_remote_code=True, # token=hf_token, # cache_dir=os.environ["HF_HOME"] # ) # # :white_check_mark: Embedding function # def get_text_embeddings(text): # """ # Converts input text into a dense embedding using the Nomic embedding model. # These embeddings are used to query Qdrant for semantically relevant document chunks. # """ # inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True) # outputs = text_model(**inputs) # embeddings = outputs.last_hidden_state.mean(dim=1) # return embeddings[0].detach().numpy() # # :white_check_mark: Gradio interface function # def embed_text_interface(text): # strt_time=time.time() # embedding = get_text_embeddings(text) # print(f"Total time taken by nomic to embed: {time.time()-strt_time}") # return str(embedding) # # :white_check_mark: Gradio UI # interface = gr.Interface( # fn=embed_text_interface, # inputs=gr.Textbox(label="Enter text to embed", lines=5), # outputs=gr.Textbox(label="Embedding vector"), # title="Text Embedding with Nomic AI", # description="Enter some text, and get its embedding vector using Nomic's embedding model." # ) # # :white_check_mark: Launch the app # if __name__ == "__main__": # interface.launch() import os import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModel import time import torch # :white_check_mark: Setup environment os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True) hf_token = os.environ.get("HF_TOKEN") if not hf_token: raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.") # Check for GPU availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # :white_check_mark: Load model and tokenizer text_tokenizer = AutoTokenizer.from_pretrained( "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True, token=hf_token, cache_dir=os.environ["HF_HOME"] ) text_model = AutoModel.from_pretrained( "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True, token=hf_token, cache_dir=os.environ["HF_HOME"] ).to(device) # Move model to GPU if available # :white_check_mark: Embedding function def get_text_embeddings(text): """ Converts input text into a dense embedding using the Nomic embedding model. These embeddings are used to query Qdrant for semantically relevant document chunks. """ inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) # Move inputs to same device as model with torch.no_grad(): # Disable gradient calculation for inference outputs = text_model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings[0].detach().numpy() # :white_check_mark: Gradio interface function def embed_text_interface(text): strt_time = time.time() embedding = get_text_embeddings(text) print(f"Total time taken by nomic to embed: {time.time()-strt_time}") return embedding # :white_check_mark: Gradio UI interface = gr.Interface( fn=embed_text_interface, inputs=gr.Textbox(label="Enter text to embed", lines=5), outputs=gr.Textbox(label="Embedding vector"), title="Text Embedding with Nomic AI", description="Enter some text, and get its embedding vector using Nomic's embedding model." ) # :white_check_mark: Launch the app if __name__ == "__main__": interface.launch()