ishaan-mital commited on
Commit
21ed16a
·
1 Parent(s): 096f133
Files changed (1) hide show
  1. app.py +61 -24
app.py CHANGED
@@ -8,28 +8,28 @@ import os
8
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
  import time
10
 
11
- API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
- # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
  # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
- # embed_model = HuggingFaceEmbeddings(
19
- # model_name=embed_model_id,
20
- # # model_kwargs={'device': device},
21
- # # encode_kwargs={'device': device, 'batch_size': 32}
22
- # )
23
 
24
 
25
- # pinecone.init(
26
- # api_key=os.environ.get('PINECONE_API_KEY'),
27
- # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
- # )
29
 
30
- # index_name = 'llama-rag'
31
- # index = pinecone.Index(index_name)
32
- # text_field = 'text' # field in metadata that contains text content
33
  # docs = [
34
  # "this is one document",
35
  # "and another document"
@@ -45,32 +45,69 @@ retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--repli
45
  # # wait for index to finish initialization
46
  # while not pinecone.describe_index(index_name).status['ready']:
47
  # time.sleep(1)
48
- # vectorstore = Pinecone(
49
- # index, embed_model.embed_query, text_field
50
- # )
51
-
52
  def call_llm_api(input_text,context):
53
  init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
54
  info="Information: \n"
55
  payload = {
56
  "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
57
  }
 
58
  response = requests.post(API_URL, headers=headers, json=payload)
59
- return response.json() # Adjust as needed based on your API response format
60
 
61
- # rag_pipeline = RetrievalQA.from_chain_type(
62
- # llm=call_llm_api, chain_type='stuff',
63
- # retriever=vectorstore.as_retriever()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # )
65
 
66
 
 
67
  def main(question):
68
  # return rag_pipeline(question)
69
  global chatbot
70
  context = retrieval.predict(question, api_name = "/predict")
71
- answer=call_llm_api(question,context)
72
  chatbot = answer
73
- return answer[1][0][1]
74
 
75
  demo = gr.Interface(main, inputs = "text", outputs = "text")
76
 
 
8
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
  import time
10
 
11
+ API_URL = " "
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
+ embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
  # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
+ embed_model = HuggingFaceEmbeddings(
19
+ model_name=embed_model_id,
20
+ # model_kwargs={'device': device},
21
+ # encode_kwargs={'device': device, 'batch_size': 32}
22
+ )
23
 
24
 
25
+ pinecone.init(
26
+ api_key=os.environ.get('PINECONE_API_KEY'),
27
+ environment=os.environ.get('PINECONE_ENVIRONMENT')
28
+ )
29
 
30
+ index_name = 'llama-rag'
31
+ index = pinecone.Index(index_name)
32
+ text_field = 'text' # field in metadata that contains text content
33
  # docs = [
34
  # "this is one document",
35
  # "and another document"
 
45
  # # wait for index to finish initialization
46
  # while not pinecone.describe_index(index_name).status['ready']:
47
  # time.sleep(1)
48
+ vectorstore = Pinecone(
49
+ index, embed_model.embed_query, text_field
50
+ )
 
51
  def call_llm_api(input_text,context):
52
  init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
53
  info="Information: \n"
54
  payload = {
55
  "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
56
  }
57
+ l=len(init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:")
58
  response = requests.post(API_URL, headers=headers, json=payload)
59
+ return response.json(),l # Adjust as needed based on your API response format
60
 
61
+ # import requests
62
+ # from langchain.llms import Runnable
63
+
64
+ # class HuggingFaceApiWrapper(Runnable):
65
+ # def __init__(self, api_endpoint):
66
+ # self.api_endpoint = api_endpoint
67
+
68
+ # def run(self, prompt):
69
+ # payload = {
70
+ # "prompt": prompt,
71
+ # "temperature": 0.7,
72
+ # "max_new_tokens": 512,
73
+ # "repetition_penalty": 1.1,
74
+ # "do_sample": True
75
+ # # Add any other parameters needed by your API
76
+ # }
77
+
78
+ # # Make a POST request to the Hugging Face model API
79
+ # response = requests.post(self.api_endpoint, json=payload)
80
+
81
+ # # Check if the request was successful
82
+ # if response.status_code == 200:
83
+ # result = response.json()
84
+ # # Extract and return the generated text from the API response
85
+ # return result.get("generated_text", "")
86
+ # else:
87
+ # # Handle error cases
88
+ # print(f"Error: {response.status_code}")
89
+ # return None
90
+
91
+ # # Example usage
92
+ # API_ENDPOINT = "https://your-hugging-face-api-endpoint.com"
93
+ # hugging_face_api_wrapper = HuggingFaceApiWrapper(api_endpoint=API_ENDPOINT)
94
+
95
+ # # Now you can use hugging_face_api_wrapper as llm in RetrievalQA
96
+ # rag_pipeline = RetrievalQA(
97
+ # llm=hugging_face_api_wrapper, chain_type = "stuff",
98
+ # retriever=vectorstore.as_retriever(),
99
+ # # llm_result_processor=your_result_processor_function
100
  # )
101
 
102
 
103
+
104
  def main(question):
105
  # return rag_pipeline(question)
106
  global chatbot
107
  context = retrieval.predict(question, api_name = "/predict")
108
+ answer,l=call_llm_api(question,context)
109
  chatbot = answer
110
+ return answer[0]['generated_text'][l:]
111
 
112
  demo = gr.Interface(main, inputs = "text", outputs = "text")
113