DeepSoft-Tech commited on
Commit
ee01867
·
verified ·
1 Parent(s): 2723949

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. app.py +78 -0
  3. gitattributes +35 -0
  4. requirements.txt +6 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: DeepChat PDF
3
- emoji:
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: streamlit
7
- sdk_version: 1.31.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: ChatPdf
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: streamlit
7
+ sdk_version: 1.29.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.callbacks import get_openai_callback
9
+ from langchain import HuggingFaceHub, LLMChain
10
+ from langchain.embeddings import HuggingFaceHubEmbeddings,HuggingFaceInferenceAPIEmbeddings
11
+ token = os.environ['HF_TOKEN']
12
+ repo_id = "sentence-transformers/all-mpnet-base-v2"
13
+ hf = HuggingFaceHubEmbeddings(
14
+ repo_id=repo_id,
15
+ task="feature-extraction",
16
+ huggingfacehub_api_token= token,
17
+ )
18
+
19
+ from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
20
+
21
+ embeddings = HuggingFaceInferenceAPIEmbeddings(
22
+ api_key=token, model_name="sentence-transformers/all-MiniLM-l6-v2"
23
+ )
24
+
25
+
26
+ def main():
27
+
28
+ st.set_page_config(page_title="Ask your PDF")
29
+ st.header("Ask your PDF 💬")
30
+
31
+ # upload file
32
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
33
+
34
+ # extract the text
35
+ if pdf is not None:
36
+ pdf_reader = PdfReader(pdf)
37
+ text = ""
38
+ for page in pdf_reader.pages:
39
+ text += page.extract_text()
40
+
41
+ # split into chunks
42
+ text_splitter = CharacterTextSplitter(
43
+ separator="\n",
44
+ chunk_size=1000,
45
+ chunk_overlap=200,
46
+ length_function=len
47
+ )
48
+ chunks = text_splitter.split_text(text)
49
+
50
+ # create embeddings
51
+ # embeddings = OpenAIEmbeddings()
52
+ # embeddings = query(chunks)
53
+ # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
54
+
55
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
56
+
57
+ # show user input
58
+ user_question = st.text_input("Ask a question about your PDF:")
59
+ if user_question:
60
+ docs = knowledge_base.similarity_search(user_question)
61
+
62
+ # llm = OpenAI()
63
+
64
+ hub_llm = HuggingFaceHub(
65
+ repo_id='HuggingFaceH4/zephyr-7b-beta',
66
+ model_kwargs={'temperature':0.01,"max_length": 2048,},
67
+ huggingfacehub_api_token=token)
68
+ llm = hub_llm
69
+ chain = load_qa_chain(llm, chain_type="stuff")
70
+ with get_openai_callback() as cb:
71
+ response = chain.run(input_documents=docs, question=user_question)
72
+ print(cb)
73
+
74
+ st.write(response)
75
+
76
+
77
+ if __name__ == '__main__':
78
+ main()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pydantic
2
+ langchain
3
+ PyPDF2
4
+ faiss-cpu
5
+ altair<5
6
+ huggingface_hub