Spaces:

Abhijit-192-168-1-1
/

example_LLM2Vec

Running

App Files Files Community

Abhijit-192-168-1-1 commited on Jul 15, 2024

Commit

c51e482

1 Parent(s): a05b3ab

added app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import gradio as gr
+from llm2vec import LLM2Vec
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+from peft import PeftModel
+import torch
+import os
+torch.backends.cuda.enable_mem_efficient_sdp(False)
+torch.backends.cuda.enable_flash_sdp(False)
+# Read tokens from environment variables
+GROQ_API_KEY = os.getenv('GROQ_API_KEY')
+HF_TOKEN = os.getenv('HF_TOKEN')
+if not GROQ_API_KEY or not HF_TOKEN:
+    raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")
+os.environ['GROQ_API_KEY'] = GROQ_API_KEY
+os.environ['HF_TOKEN'] = HF_TOKEN
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
+config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
+model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")
+model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
+model = model.merge_and_unload()
+# Load unsupervised SimCSE model
+model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")
+# Wrapper for encoding and pooling operations
+l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)
+def encode_text(input_text):
+    encoding = l2v.encode(input_text)
+    return encoding
+# Define Gradio interface
+iface = gr.Interface(
+    fn=encode_text,
+    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
+    outputs=gr.outputs.JSON()
+)
+# Launch Gradio app
+iface.launch(share=True)