ishaan-mital commited on
Commit
d1289ae
·
1 Parent(s): e665513
Files changed (1) hide show
  1. app.py +43 -41
app.py CHANGED
@@ -12,63 +12,65 @@ API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-b
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
- embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
- # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
- embed_model = HuggingFaceEmbeddings(
19
- model_name=embed_model_id,
20
- # model_kwargs={'device': device},
21
- # encode_kwargs={'device': device, 'batch_size': 32}
22
- )
23
 
24
 
25
- pinecone.init(
26
- api_key=os.environ.get('PINECONE_API_KEY'),
27
- environment=os.environ.get('PINECONE_ENVIRONMENT')
28
- )
29
 
30
- index_name = 'llama-rag'
31
- index = pinecone.Index(index_name)
32
- text_field = 'text' # field in metadata that contains text content
33
- docs = [
34
- "this is one document",
35
- "and another document"
36
- ]
37
 
38
- embeddings = embed_model.embed_documents(docs)
39
- if index_name not in pinecone.list_indexes():
40
- pinecone.create_index(
41
- index_name,
42
- dimension=len(embeddings[0]),
43
- metric='cosine'
44
- )
45
- # wait for index to finish initialization
46
- while not pinecone.describe_index(index_name).status['ready']:
47
- time.sleep(1)
48
- vectorstore = Pinecone(
49
- index, embed_model.embed_query, text_field
50
- )
51
 
52
  def call_llm_api(input_text,context):
 
 
53
  payload = {
54
- "inputs": f'question: {input_text}, context: {context}'
55
  }
56
  response = requests.post(API_URL, headers=headers, json=payload)
57
  return response.json() # Adjust as needed based on your API response format
58
 
59
- rag_pipeline = RetrievalQA.from_chain_type(
60
- llm=call_llm_api, chain_type='stuff',
61
- retriever=vectorstore.as_retriever()
62
- )
63
 
64
 
65
  def main(question):
66
  # return rag_pipeline(question)
67
- # global chatbot
68
- # context = retrieval.predict(question, api_name = "/predict")
69
- # answer=call_llm_api(question,context)
70
- # # chatbot = answer[1]
71
- # return answer[0]
72
 
73
  demo = gr.Interface(main, inputs = "text", outputs = "text")
74
 
 
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
+ # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
+ # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
+ # embed_model = HuggingFaceEmbeddings(
19
+ # model_name=embed_model_id,
20
+ # # model_kwargs={'device': device},
21
+ # # encode_kwargs={'device': device, 'batch_size': 32}
22
+ # )
23
 
24
 
25
+ # pinecone.init(
26
+ # api_key=os.environ.get('PINECONE_API_KEY'),
27
+ # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
+ # )
29
 
30
+ # index_name = 'llama-rag'
31
+ # index = pinecone.Index(index_name)
32
+ # text_field = 'text' # field in metadata that contains text content
33
+ # docs = [
34
+ # "this is one document",
35
+ # "and another document"
36
+ # ]
37
 
38
+ # embeddings = embed_model.embed_documents(docs)
39
+ # if index_name not in pinecone.list_indexes():
40
+ # pinecone.create_index(
41
+ # index_name,
42
+ # dimension=len(embeddings[0]),
43
+ # metric='cosine'
44
+ # )
45
+ # # wait for index to finish initialization
46
+ # while not pinecone.describe_index(index_name).status['ready']:
47
+ # time.sleep(1)
48
+ # vectorstore = Pinecone(
49
+ # index, embed_model.embed_query, text_field
50
+ # )
51
 
52
  def call_llm_api(input_text,context):
53
+ init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
54
+ info="Information: \n"
55
  payload = {
56
+ "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
57
  }
58
  response = requests.post(API_URL, headers=headers, json=payload)
59
  return response.json() # Adjust as needed based on your API response format
60
 
61
+ # rag_pipeline = RetrievalQA.from_chain_type(
62
+ # llm=call_llm_api, chain_type='stuff',
63
+ # retriever=vectorstore.as_retriever()
64
+ # )
65
 
66
 
67
  def main(question):
68
  # return rag_pipeline(question)
69
+ global chatbot
70
+ context = retrieval.predict(question, api_name = "/predict")
71
+ answer=call_llm_api(question,context)
72
+ chatbot = answer
73
+ return answer
74
 
75
  demo = gr.Interface(main, inputs = "text", outputs = "text")
76