Kabirsingla commited on
Commit
3d180a4
·
1 Parent(s): fc519f5

Upload 3 files

Browse files
Files changed (3) hide show
  1. budget_speech.pdf +0 -0
  2. pdf_chat.py +80 -0
  3. requirements.txt +7 -0
budget_speech.pdf ADDED
Binary file (472 kB). View file
 
pdf_chat.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores.cassandra import Cassandra
2
+ from langchain.indexes.vectorstore import VectorStoreIndexWrapper
3
+ from langchain.llms import OpenAI
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+
6
+ from datasets import load_dataset
7
+
8
+ import cassio
9
+ import streamlit as st
10
+ from PyPDF2 import PdfReader
11
+
12
+ ASTRA_DB_APPLICATION_TOKEN = "AstraCS:UPkfqhgxqlGClRZQaoNRZTIP:22e71b1cb4a916d3722697a89237aed24cc6b872b72bad42ee11d8c26133710e"
13
+ ASTRA_DB_ID = "4e301076-f4ed-46a6-af16-1ae99fc5b780"
14
+ OPENAI_API_KEY = "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"
15
+
16
+ pdfreader = PdfReader("budget_speech.pdf")
17
+
18
+ from typing_extensions import Concatenate
19
+
20
+ raw_text = ""
21
+
22
+ for i, page in enumerate(pdfreader.pages):
23
+ content = page.extract_text()
24
+ if content:
25
+ raw_text += content
26
+
27
+
28
+ cassio.init(token = ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)
29
+
30
+
31
+ llm = OpenAI(api_key=OPENAI_API_KEY, temperature=0.6)
32
+ embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
33
+
34
+ # Function to load OpenAI model and get response
35
+ def get_openAI_respnse(question):
36
+ llm = OpenAI(model_name="text-davinci-003", temperature=0.5)
37
+ response = llm(question)
38
+ return response
39
+
40
+
41
+ astra_vector_store = Cassandra(
42
+ embedding=embedding,
43
+ table_name = "mini_qa_demo",
44
+ session = None,
45
+ keyspace = None
46
+ )
47
+
48
+
49
+ from langchain.text_splitter import CharacterTextSplitter
50
+
51
+ text_splitter = CharacterTextSplitter(
52
+ separator="\n",
53
+ chunk_size = 800,
54
+ chunk_overlap = 200,
55
+ length_function = len
56
+ )
57
+
58
+ texts = text_splitter.split_text(raw_text)
59
+
60
+
61
+ astra_vector_store.add_texts(texts)
62
+ astra_vextor_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)
63
+
64
+
65
+
66
+
67
+ ## Intitialize Streamlit app
68
+ st.set_page_config(page_title = "Ask Questions from the India Budget 2023 PDF")
69
+ st.header("PDF_QA")
70
+
71
+ input = st.text_input("Enter your question here", key="input").strip()
72
+ response = astra_vextor_index.query(input, llm=llm)
73
+
74
+ submit = st.button("Generate")
75
+
76
+
77
+ #If submit button is clicked
78
+ if submit:
79
+ st.subheader("The response is")
80
+ st.write(response)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ cassio
2
+ datasets
3
+ langchain
4
+ openai
5
+ tiktoken
6
+ streamlit
7
+ PyPDF2