Spaces:
Runtime error
Runtime error
File size: 2,249 Bytes
4d82c20 166c1e2 d99b731 714ea85 d99b731 1716bb2 d99b731 4d82c20 d99b731 1716bb2 9bf2bb2 1716bb2 d99b731 1716bb2 d99b731 714ea85 d99b731 714ea85 8f1d664 d99b731 4d82c20 b74c568 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import os
import pinecone
import time
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import torch
import sentence_transformers
from langchain.vectorstores import Pinecone
from langchain.llms.huggingface_text_gen_inference import HuggingFaceTextGenInference
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
# device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
embed_model = HuggingFaceEmbeddings(
model_name=embed_model_id,
# model_kwargs={'device': device},
# encode_kwargs={'device': device, 'batch_size': 32}
)
# get API key from app.pinecone.io and environment from console
pinecone.init(
api_key=os.environ.get('PINECONE_API_KEY'),
environment=os.environ.get('PINECONE_ENVIRONMENT')
)
docs = [
"this is one document",
"and another document"
]
embeddings = embed_model.embed_documents(docs)
index_name = 'llama-rag'
# if index_name not in pinecone.list_indexes():
# pinecone.create_index(
# index_name,
# dimension=len(embeddings[0]),
# metric='cosine'
# )
# # wait for index to finish initialization
# while not pinecone.describe_index(index_name).status['ready']:
# time.sleep(1)
index = pinecone.Index(index_name)
index.describe_index_stats()
text_field = 'text' # field in metadata that contains text content
vectorstore = Pinecone(
index, embed_model.embed_query, text_field
)
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
llm = HuggingFaceTextGenInference(
inference_server_url=API_URL,
max_new_tokens=512,
top_k=10,
top_p=0.95,
typical_p=0.95,
temperature=0.01,
repetition_penalty=1.03,
)
rag_pipeline = RetrievalQA.from_chain_type(
llm=llm, chain_type='stuff',
retriever=vectorstore.as_retriever()
)
def question(question):
global chatbot
answer = rag_pipeline(question)
chatbot = answer
return answer['result']
demo = gr.Interface(fn=question, inputs="text", outputs="text")
if __name__ == "__main__":
demo.launch() |