Spaces:

ishaan-mital
/

ncert-helper

Runtime error

App Files Files Community

ishaan-mital commited on Nov 13, 2023

Commit

21ed16a

1 Parent(s): 096f133

a

Browse files

Files changed (1) hide show

app.py +61 -24

app.py CHANGED Viewed

@@ -8,28 +8,28 @@ import os
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 import time
-API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
 headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
 retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
-# embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
 # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
-# embed_model = HuggingFaceEmbeddings(
-#     model_name=embed_model_id,
-#     # model_kwargs={'device': device},
-#     # encode_kwargs={'device': device, 'batch_size': 32}
-# )
-# pinecone.init(
-#     api_key=os.environ.get('PINECONE_API_KEY'),
-#     environment=os.environ.get('PINECONE_ENVIRONMENT')
-# )
-# index_name = 'llama-rag'
-# index = pinecone.Index(index_name)
-# text_field = 'text'  # field in metadata that contains text content
 # docs = [
 #     "this is one document",
 #     "and another document"
@@ -45,32 +45,69 @@ retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--repli
 #     # wait for index to finish initialization
 #     while not pinecone.describe_index(index_name).status['ready']:
 #         time.sleep(1)
-# vectorstore = Pinecone(
-#     index, embed_model.embed_query, text_field
-# )
 def call_llm_api(input_text,context):
     init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
     info="Information: \n"
     payload = {
         "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
             }
     response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()  # Adjust as needed based on your API response format
-# rag_pipeline = RetrievalQA.from_chain_type(
-#     llm=call_llm_api, chain_type='stuff',
-#     retriever=vectorstore.as_retriever()
 # )
 def main(question):
     # return rag_pipeline(question)
     global chatbot
     context = retrieval.predict(question, api_name = "/predict")
-    answer=call_llm_api(question,context)
     chatbot = answer
-    return answer[1][0][1]
 demo = gr.Interface(main, inputs = "text", outputs = "text")

 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 import time
+API_URL = " "
 headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
 retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
+embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
 # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
+embed_model = HuggingFaceEmbeddings(
+    model_name=embed_model_id,
+    # model_kwargs={'device': device},
+    # encode_kwargs={'device': device, 'batch_size': 32}
+)
+pinecone.init(
+    api_key=os.environ.get('PINECONE_API_KEY'),
+    environment=os.environ.get('PINECONE_ENVIRONMENT')
+)
+index_name = 'llama-rag'
+index = pinecone.Index(index_name)
+text_field = 'text'  # field in metadata that contains text content
 # docs = [
 #     "this is one document",
 #     "and another document"
 #     # wait for index to finish initialization
 #     while not pinecone.describe_index(index_name).status['ready']:
 #         time.sleep(1)
+vectorstore = Pinecone(
+    index, embed_model.embed_query, text_field
+)
 def call_llm_api(input_text,context):
     init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
     info="Information: \n"
     payload = {
         "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
             }
+    l=len(init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:")
     response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json(),l  # Adjust as needed based on your API response format
+# import requests
+# from langchain.llms import Runnable
+# class HuggingFaceApiWrapper(Runnable):
+#     def __init__(self, api_endpoint):
+#         self.api_endpoint = api_endpoint
+#     def run(self, prompt):
+#         payload = {
+#             "prompt": prompt,
+#             "temperature": 0.7,
+#             "max_new_tokens": 512,
+#             "repetition_penalty": 1.1,
+#             "do_sample": True
+#             # Add any other parameters needed by your API
+#         }
+#         # Make a POST request to the Hugging Face model API
+#         response = requests.post(self.api_endpoint, json=payload)
+#         # Check if the request was successful
+#         if response.status_code == 200:
+#             result = response.json()
+#             # Extract and return the generated text from the API response
+#             return result.get("generated_text", "")
+#         else:
+#             # Handle error cases
+#             print(f"Error: {response.status_code}")
+#             return None
+# # Example usage
+# API_ENDPOINT = "https://your-hugging-face-api-endpoint.com"
+# hugging_face_api_wrapper = HuggingFaceApiWrapper(api_endpoint=API_ENDPOINT)
+# # Now you can use hugging_face_api_wrapper as llm in RetrievalQA
+# rag_pipeline = RetrievalQA(
+#     llm=hugging_face_api_wrapper, chain_type = "stuff",
+#     retriever=vectorstore.as_retriever(),
+#     # llm_result_processor=your_result_processor_function
 # )
 def main(question):
     # return rag_pipeline(question)
     global chatbot
     context = retrieval.predict(question, api_name = "/predict")
+    answer,l=call_llm_api(question,context)
     chatbot = answer
+    return answer[0]['generated_text'][l:]
 demo = gr.Interface(main, inputs = "text", outputs = "text")