ishaan-mital commited on
Commit
8e3e8e3
·
1 Parent(s): 077ae05

initial commit

Browse files
Files changed (1) hide show
  1. app.py +55 -60
app.py CHANGED
@@ -9,72 +9,67 @@ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
  import time
10
 
11
  API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
12
- # retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bg26k5/")
13
-
14
- embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
15
- # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
16
-
17
- embed_model = HuggingFaceEmbeddings(
18
- model_name=embed_model_id,
19
- # model_kwargs={'device': device},
20
- # encode_kwargs={'device': device, 'batch_size': 32}
21
- )
22
-
23
-
24
- pinecone.init(
25
- api_key=os.environ.get('PINECONE_API_KEY'),
26
- environment=os.environ.get('PINECONE_ENVIRONMENT')
27
- )
28
-
29
- index_name = 'llama-rag'
30
- index = pinecone.Index(index_name)
31
- text_field = 'text' # field in metadata that contains text content
32
- docs = [
33
- "this is one document",
34
- "and another document"
35
- ]
36
-
37
- embeddings = embed_model.embed_documents(docs)
38
- if index_name not in pinecone.list_indexes():
39
- pinecone.create_index(
40
- index_name,
41
- dimension=len(embeddings[0]),
42
- metric='cosine'
43
- )
44
- # wait for index to finish initialization
45
- while not pinecone.describe_index(index_name).status['ready']:
46
- time.sleep(1)
47
- vectorstore = Pinecone(
48
- index, embed_model.embed_query, text_field
49
- )
50
-
51
- def call_llm_api(input_text):
52
- headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
53
- payload = {"input": input_text}
 
 
 
 
54
  response = requests.post(API_URL, headers=headers, json=payload)
55
  return response.json() # Adjust as needed based on your API response format
56
 
57
-
58
- from langchain.llms import Runnable
59
-
60
- class APIRunnable(Runnable):
61
- def __init__(self, api_func):
62
- self.api_func = api_func
63
-
64
- def run(self, input_text):
65
- return self.api_func(input_text)
66
-
67
- api_runnable = APIRunnable(api_func=call_llm_api)
68
-
69
- rag_pipeline = RetrievalQA.from_chain_type(
70
- llm=api_runnable, chain_type='stuff',
71
- retriever=vectorstore.as_retriever()
72
- )
73
 
74
 
75
  def main(question):
76
- global chatbot
77
- return rag_pipeline(question)
 
 
 
78
 
79
  demo = gr.Interface(main, inputs = "text", outputs = "text")
80
 
 
9
  import time
10
 
11
  API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
12
+ headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
+ retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bg26k5/")
14
+
15
+ # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
+ # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
+
18
+ # embed_model = HuggingFaceEmbeddings(
19
+ # model_name=embed_model_id,
20
+ # # model_kwargs={'device': device},
21
+ # # encode_kwargs={'device': device, 'batch_size': 32}
22
+ # )
23
+
24
+
25
+ # pinecone.init(
26
+ # api_key=os.environ.get('PINECONE_API_KEY'),
27
+ # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
+ # )
29
+
30
+ # index_name = 'llama-rag'
31
+ # index = pinecone.Index(index_name)
32
+ # text_field = 'text' # field in metadata that contains text content
33
+ # docs = [
34
+ # "this is one document",
35
+ # "and another document"
36
+ # ]
37
+
38
+ # embeddings = embed_model.embed_documents(docs)
39
+ # if index_name not in pinecone.list_indexes():
40
+ # pinecone.create_index(
41
+ # index_name,
42
+ # dimension=len(embeddings[0]),
43
+ # metric='cosine'
44
+ # )
45
+ # # wait for index to finish initialization
46
+ # while not pinecone.describe_index(index_name).status['ready']:
47
+ # time.sleep(1)
48
+ # vectorstore = Pinecone(
49
+ # index, embed_model.embed_query, text_field
50
+ # )
51
+
52
+ def call_llm_api(input_text,context):
53
+ payload = {"input": {
54
+ "question": input_text,
55
+ "context": context
56
+ }
57
+ }
58
  response = requests.post(API_URL, headers=headers, json=payload)
59
  return response.json() # Adjust as needed based on your API response format
60
 
61
+ # rag_pipeline = RetrievalQA.from_chain_type(
62
+ # llm=call_llm_api, chain_type='stuff',
63
+ # retriever=vectorstore.as_retriever()
64
+ # )
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  def main(question):
68
+ # return rag_pipeline(question)
69
+ context = retrieval.predict(question, api_name = "/predict")
70
+ answer=call_llm_api(question,context)
71
+ chatbot = answer[1]
72
+ return answer[1][0][1]
73
 
74
  demo = gr.Interface(main, inputs = "text", outputs = "text")
75