ishaan-mital commited on
Commit
54228d8
1 Parent(s): 4b68cf1
Files changed (2) hide show
  1. app.py +58 -47
  2. requirements.txt +7 -7
app.py CHANGED
@@ -1,68 +1,79 @@
1
  from gradio_client import Client
2
  import gradio as gr
3
- import requests
4
- from langchain.chains import RetrievalQA
5
- import pinecone
6
- from langchain.vectorstores import Pinecone
7
- import os
8
- import openai
9
- import time
10
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
- import transformers
12
- from langchain.chains import RetrievalQA
13
 
14
- API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
15
- headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
16
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/7f5fz9pvt/")
 
 
17
 
18
- embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
 
 
19
 
20
- embed_model = HuggingFaceEmbeddings(
21
- model_name=embed_model_id,
22
- )
23
 
 
 
 
 
24
 
25
- pinecone.init(
26
- api_key=os.environ.get('PINECONE_API_KEY'),
27
- environment=os.environ.get('PINECONE_ENVIRONMENT')
28
- )
29
 
30
- index_name='llama-rag'
31
- index = pinecone.Index(index_name)
32
- index.describe_index_stats()
33
- text_field = 'text'
34
 
35
- vectorstore = Pinecone(
36
- index, embed_model.embed_query, text_field
37
- )
38
 
39
-
40
- headers = {"Authorization": "Bearer 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"}
41
- generate_text = transformers.pipeline(
42
- model="HuggingFaceH4/zephyr-7b-beta",
43
- return_full_text=True, # langchain expects the full text
44
- task='text-generation',
45
- # we pass model parameters here too
46
- temperature=0.7, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
47
- max_new_tokens=512, # mex number of tokens to generate in the output
48
- repetition_penalty=1.1, # without this output begins repeating
49
- do_sample=True
50
- )
51
 
52
- from langchain.llms import HuggingFacePipeline
53
 
54
- llm = HuggingFacePipeline(pipeline=generate_text)
55
 
56
- rag_pipeline = RetrievalQA.from_chain_type(
57
- llm=llm, chain_type='stuff',
58
- retriever=vectorstore.as_retriever()
59
- )
60
 
61
  def main(question):
62
  # return rag_pipeline(question)
63
  global chatbot
64
- chatbot = requests.post(API_URL, headers=headers, json={"inputs": question}).json()
65
- return rag_pipeline(question)
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  demo = gr.Interface(main, inputs = "text", outputs = "text")
68
 
 
1
  from gradio_client import Client
2
  import gradio as gr
3
+ # import requests
4
+ # from langchain.chains import RetrievalQA
5
+ # import pinecone
6
+ # from langchain.vectorstores import Pinecone
7
+ # import os
8
+ # import openai
9
+ # import time
10
+ # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
+ # import transformers
12
+ # from langchain.chains import RetrievalQA
13
 
14
+ # API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
15
+ # headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
16
  retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/7f5fz9pvt/")
17
+ llm = Client("https://library-samples-zephyr-7b.hf.space/--replicas/b7p4f/")
18
+ # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
19
 
20
+ # embed_model = HuggingFaceEmbeddings(
21
+ # model_name=embed_model_id,
22
+ # )
23
 
 
 
 
24
 
25
+ # pinecone.init(
26
+ # api_key=os.environ.get('PINECONE_API_KEY'),
27
+ # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
+ # )
29
 
30
+ # index_name='llama-rag'
31
+ # index = pinecone.Index(index_name)
32
+ # index.describe_index_stats()
33
+ # text_field = 'text'
34
 
35
+ # vectorstore = Pinecone(
36
+ # index, embed_model.embed_query, text_field
37
+ # )
 
38
 
 
 
 
39
 
40
+ # headers = {"Authorization": "Bearer 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"}
41
+ # generate_text = transformers.pipeline(
42
+ # model="HuggingFaceH4/zephyr-7b-beta",
43
+ # return_full_text=True, # langchain expects the full text
44
+ # task='text-generation',
45
+ # # we pass model parameters here too
46
+ # temperature=0.7, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
47
+ # max_new_tokens=512, # mex number of tokens to generate in the output
48
+ # repetition_penalty=1.1, # without this output begins repeating
49
+ # do_sample=True
50
+ # )
 
51
 
52
+ # from langchain.llms import HuggingFacePipeline
53
 
54
+ # llm = HuggingFacePipeline(pipeline=generate_text)
55
 
56
+ # rag_pipeline = RetrievalQA.from_chain_type(
57
+ # llm=llm, chain_type='stuff',
58
+ # retriever=vectorstore.as_retriever()
59
+ # )
60
 
61
  def main(question):
62
  # return rag_pipeline(question)
63
  global chatbot
64
+ context = retrieval.predict(question)
65
+ answer = llm.predict(
66
+ f'Question: {question} and context: {context}',
67
+ "NCERT Helper!!", # str in 'System prompt' Textbox component
68
+ 2048, # float (numeric value between 1 and 2048) in 'Max new tokens' Slider component
69
+ 0.1, # float (numeric value between 0.1 and 4.0) in 'Temperature' Slider component
70
+ 0.05, # float (numeric value between 0.05 and 1.0) in 'Top-p (nucleus sampling)' Slider component
71
+ 3, # float (numeric value between 1 and 1000) in 'Top-k' Slider component
72
+ 1, # float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
73
+ api_name="/chat"
74
+ )
75
+ chatbot = answer
76
+ return answer
77
 
78
  demo = gr.Interface(main, inputs = "text", outputs = "text")
79
 
requirements.txt CHANGED
@@ -2,10 +2,10 @@ hugchat
2
  gradio
3
  gradio_client
4
  gtts
5
- openai==0.28
6
- pydantic==1.10.9
7
- langchain
8
- pinecone-client==2.2.2
9
- faiss-cpu
10
- sentence_transformers
11
- transformers
 
2
  gradio
3
  gradio_client
4
  gtts
5
+ # openai==0.28
6
+ # pydantic==1.10.9
7
+ # langchain
8
+ # pinecone-client==2.2.2
9
+ # faiss-cpu
10
+ # sentence_transformers
11
+ # transformers