from gradio_client import Client import gradio as gr import requests # from langchain.chains import RetrievalQA # import pinecone # from langchain.vectorstores import Pinecone import os # from langchain.embeddings.huggingface import HuggingFaceEmbeddings # import time API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"} retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/") # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2' # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' # embed_model = HuggingFaceEmbeddings( # model_name=embed_model_id, # # model_kwargs={'device': device}, # # encode_kwargs={'device': device, 'batch_size': 32} # ) # pinecone.init( # api_key=os.environ.get('PINECONE_API_KEY'), # environment=os.environ.get('PINECONE_ENVIRONMENT') # ) # index_name = 'llama-rag' # index = pinecone.Index(index_name) # text_field = 'text' # field in metadata that contains text content # docs = [ # "this is one document", # "and another document" # ] # embeddings = embed_model.embed_documents(docs) # if index_name not in pinecone.list_indexes(): # pinecone.create_index( # index_name, # dimension=len(embeddings[0]), # metric='cosine' # ) # # wait for index to finish initialization # while not pinecone.describe_index(index_name).status['ready']: # time.sleep(1) # vectorstore = Pinecone( # index, embed_model.embed_query, text_field # ) def call_llm_api(input_text,context): payload = { "inputs": f'question: {input_text}, context: {context}' } response = requests.post(API_URL, headers=headers, json=payload) return response.json() # Adjust as needed based on your API response format # rag_pipeline = RetrievalQA.from_chain_type( # llm=call_llm_api, chain_type='stuff', # retriever=vectorstore.as_retriever() # ) def main(question): # return rag_pipeline(question) global chatbot context = retrieval.predict(question, api_name = "/predict") answer=call_llm_api(question,context) chatbot = answer[1] return answer[0]['generated_text'] demo = gr.Interface(main, inputs = "text", outputs = "text") if __name__ == "__main__": demo.launch()