AyushS9020 commited on
Commit
b6b32c0
·
verified ·
1 Parent(s): 6047a4e

Upload 34 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ uploads/Adani_Ports.pdf filter=lfs diff=lfs merge=lfs -text
37
+ uploads/Asian_Paints.pdf filter=lfs diff=lfs merge=lfs -text
38
+ uploads/Axis_Bank.pdf filter=lfs diff=lfs merge=lfs -text
39
+ uploads/Bajaj_Finance.pdf filter=lfs diff=lfs merge=lfs -text
40
+ uploads/Bajaj_Finserv.pdf filter=lfs diff=lfs merge=lfs -text
41
+ uploads/Bharti_Airtel.pdf filter=lfs diff=lfs merge=lfs -text
42
+ uploads/HCL_Tech.pdf filter=lfs diff=lfs merge=lfs -text
43
+ uploads/HDFC_Bank.pdf filter=lfs diff=lfs merge=lfs -text
44
+ uploads/Hindustan_Unilever.pdf filter=lfs diff=lfs merge=lfs -text
45
+ uploads/ICICI_Bank.pdf filter=lfs diff=lfs merge=lfs -text
46
+ uploads/IndusInd_Bank.pdf filter=lfs diff=lfs merge=lfs -text
47
+ uploads/Infosys.pdf filter=lfs diff=lfs merge=lfs -text
48
+ uploads/ITC.pdf filter=lfs diff=lfs merge=lfs -text
49
+ uploads/JSW_Steel.pdf filter=lfs diff=lfs merge=lfs -text
50
+ uploads/Kotak_Bank.pdf filter=lfs diff=lfs merge=lfs -text
51
+ uploads/L&T.pdf filter=lfs diff=lfs merge=lfs -text
52
+ uploads/Mahindra&Mahindra.pdf filter=lfs diff=lfs merge=lfs -text
53
+ uploads/Maruti_Suzuki.pdf filter=lfs diff=lfs merge=lfs -text
54
+ uploads/Nestle.pdf filter=lfs diff=lfs merge=lfs -text
55
+ uploads/NTPC.pdf filter=lfs diff=lfs merge=lfs -text
56
+ uploads/Powergrid.pdf filter=lfs diff=lfs merge=lfs -text
57
+ uploads/Reliance.pdf filter=lfs diff=lfs merge=lfs -text
58
+ uploads/SBI_Bank.pdf filter=lfs diff=lfs merge=lfs -text
59
+ uploads/Sun_Pharma.pdf filter=lfs diff=lfs merge=lfs -text
60
+ uploads/Tata_Motors.pdf filter=lfs diff=lfs merge=lfs -text
61
+ uploads/Tata_Steel.pdf filter=lfs diff=lfs merge=lfs -text
62
+ uploads/TCS.pdf filter=lfs diff=lfs merge=lfs -text
63
+ uploads/Tech_Mahindra.pdf filter=lfs diff=lfs merge=lfs -text
64
+ uploads/Titan.pdf filter=lfs diff=lfs merge=lfs -text
65
+ uploads/Ultratech.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from tqdm import tqdm
4
+
5
+ from unstructured.partition.pdf import partition_pdf
6
+ from langchain.schema.document import Document
7
+ import google.generativeai as genai
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ import shutil
11
+
12
+ from wasabi import msg
13
+
14
+ from PIL import Image
15
+
16
+ genai.configure(api_key = 'AIzaSyB342Fh-nkRaO38BshbyI4-s0T9orVpsMw')
17
+ model = genai.GenerativeModel('gemini-1.5-flash')
18
+
19
+ files = os.listdir('uploads')
20
+ files = [f'uploads/{file}' for file in files]
21
+
22
+ documents = []
23
+
24
+ for file in tqdm(files , total = len(files) , leave = False) :
25
+
26
+ elements = partition_pdf(
27
+ filename = file ,
28
+ extract_images_in_pdf = True ,
29
+ infer_table_structure = True ,
30
+ chunking_strategy = 'by_title' ,
31
+ max_characters = 4000 ,
32
+ new_after_n_chars = 3800 ,
33
+ combine_text_under_n_chars = 2000 ,
34
+ extract_image_block_output_dir = 'outputs'
35
+ )
36
+
37
+ for element in elements :
38
+
39
+ element = element.to_dict()
40
+
41
+ metadata = element['metadata']
42
+
43
+ if 'text_as_html' in metadata : documents.append(
44
+ Document(
45
+ page_content = metadata['text_as_html'] ,
46
+ metadata = {
47
+ 'type' : 'text' ,
48
+ 'metadata' : element
49
+ }
50
+ )
51
+ )
52
+
53
+ else : documents.append(
54
+ Document(
55
+ page_content = element['text'] ,
56
+ metadata = {
57
+ 'type' : 'text' ,
58
+ 'metadata' : element
59
+ }
60
+ )
61
+ )
62
+
63
+ images = os.listdir('outputs')
64
+ images = [f'outputs/{image}' for image in images]
65
+
66
+ for image in tqdm(images , total = len(images) , leave = False) :
67
+
68
+ image = Image.open(image)
69
+
70
+ try :
71
+
72
+ response = model.generate_content([
73
+ image ,
74
+ 'Explain the Image'
75
+ ])
76
+
77
+ response = response.text
78
+
79
+ except Exception as e : msg.fail(f'----| FAIL : COULDNT CALL THE IMAGE DESCRIPTION API : {e}') ; response = 'COuldnt Call Model for this'
80
+
81
+ documents.append(
82
+ Document(
83
+ page_content = response ,
84
+ metadata = {
85
+ 'type' : 'image' ,
86
+ 'metadata' : {
87
+ 'image' : image
88
+ }
89
+ }
90
+ )
91
+ )
92
+
93
+ shutil.rmtree('uploads')
94
+
95
+ vc = FAISS.from_documents(
96
+ documents = documents ,
97
+ embedding = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')
98
+ )
99
+
100
+ def run_rag(query) :
101
+
102
+ similar_docs = vc.similarity_search(query , k = 4)
103
+ context = [doc.page_content for doc in similar_docs]
104
+
105
+ prompt = f'''
106
+ You are a Helpfull Chatbot that helps users with their queries
107
+
108
+ - You will be provided with a query
109
+ - You will be provided with a context as well
110
+
111
+ Your task is to generate a response to the query based on the context provided
112
+
113
+ Context : {context}
114
+
115
+ Query : {query}
116
+
117
+ '''
118
+
119
+ response = model.generate_content(prompt)
120
+
121
+ return response.text
122
+
123
+ demo = gr.Interface(
124
+ fn = run_rag ,
125
+ inputs = 'text' ,
126
+ outputs = 'text'
127
+ )
128
+
129
+ demo.launch()
outputs/Screenshot (3).png ADDED
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poppler-utils
2
+ tesseract-ocr
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ unstructured
2
+ unstructured_inference
3
+ unstructured_pytesseract
4
+ pytesseract
5
+ pi_heif
6
+ pdfminer.six
7
+ langchain
8
+ langchain_community
9
+ langchain_huggingface
10
+ sentence-transformers
11
+ faiss-cpu
12
+ groq
13
+ google
14
+ google-generativeai
15
+ gradio
16
+ tqdm
17
+ Pillow
18
+ wasabi
uploads/Adani_Ports.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14479f84d48f530cf11f37b8b45f327b98908fe4c714c09c042f44e1b40e15d
3
+ size 23441567
uploads/Asian_Paints.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1375324a65d21104e91597c2a8c974f29da03c6e275c47456eb9ee3f317858
3
+ size 26576588
uploads/Axis_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da21de351524368d87dd801a2c97b86de799bd3d6fde21c94a57686e41e8196d
3
+ size 11263914
uploads/Bajaj_Finance.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49071340391999e6863619b33ceb8a52c5c3a6d19cf94a7f5577fd6ea2304075
3
+ size 19813034
uploads/Bajaj_Finserv.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8743975cc9720e2db0f861da7ddc45b97c85ae5bea2202a91a9e0eb230604edf
3
+ size 22926656
uploads/Bharti_Airtel.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab401f9504fd5fd7cf840235ecd411cf69eb82d4a853a2e6a9d70e4724801e89
3
+ size 25376207
uploads/HCL_Tech.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf28874294660cb882e56c84021ce910fea0a1d5a150649604d051e72174d21a
3
+ size 8340206
uploads/HDFC_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd7ef37de26997fed3bcf2b7cfb9eb90f08d4e2617f5f3f967a1ac621c2a73e
3
+ size 17350949
uploads/Hindustan_Unilever.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb3d9f6bde2d6fa5882011ad4d394f1d6ddbdfcd3d6080617d652a90fa10595
3
+ size 21616344
uploads/ICICI_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a62858135454fc5f6e09b018102cba98813928d2b2e72023e86194c0087cd09
3
+ size 11777065
uploads/ITC.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cef9cf09cefa71094985018a88b936733ff2c3a02b7cc6cd772fdeb8b75a9c97
3
+ size 24199409
uploads/IndusInd_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24102ada097cfc42f46cc2fb4d3f520fae76e01749075491cca4e6d0be91ea93
3
+ size 19735093
uploads/Infosys.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a9bb9e802aff5f09733b8c78c88e9878732ac46e0fb29754c6da87ad47326a
3
+ size 11441269
uploads/JSW_Steel.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5946803f37a1c099be7b58fb4a8746f57ebd475ef7b673994e93746ffc62b508
3
+ size 17148166
uploads/Kotak_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea21e25405bc8c9badb4085b42eade96559f4b915a2f383b4fee33a151d47602
3
+ size 18338231
uploads/L&T.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86677b95602b7ebafcd16295deafbd7a2cba4a90c502aed37f05dee9dafcf8a
3
+ size 18910726
uploads/Mahindra&Mahindra.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46740998cf312d94a1dd6a4d6f9ee1bc73cbd8b9efa4f974687f1017fb736e7
3
+ size 24944665
uploads/Maruti_Suzuki.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e298354886131e8e253df60ac25a725e140f89452ad6a2f4c0e857074107548c
3
+ size 23558415
uploads/NTPC.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb943a9cb4db67822a4f1657454cd476e08d73259a18e990a6a36e29c78ec8aa
3
+ size 23736915
uploads/Nestle.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bd86b483d58de5cb6fac36113ff1851d39d14d666c33f24c8cac382f180dfa
3
+ size 8982580
uploads/Powergrid.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb943a9cb4db67822a4f1657454cd476e08d73259a18e990a6a36e29c78ec8aa
3
+ size 21240959
uploads/Reliance.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7d6038b4d6957981fb422bc501ea893765c24611ea1fa8a5d3cd9425541eae
3
+ size 12354677
uploads/SBI_Bank.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aacda3744fdcc9aea893c0c0cd35c22e5f30a93513e37f86da29f30770961dc
3
+ size 16162498
uploads/Sun_Pharma.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb9d8021b365cc095ef9e5c6917e4ec4fe7cca079672ece3beb6284128c9343
3
+ size 10123693
uploads/TCS.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:992b3cb2a47a9ce84b127d23d7dc1fb22967d7592d1e8af2c2120b6764da8651
3
+ size 10182737
uploads/Tata_Motors.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c7610ebbea4ff3dabcb1c63d673b8b2e3c6c0f45d17daf1a88908fbf8f04a25
3
+ size 20573259
uploads/Tata_Steel.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:870be0ca867c1c562337a5319d09cac353a85dbb7907dce7c13ee47ad7af3e21
3
+ size 26006738
uploads/Tech_Mahindra.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:304878c8f4fba07d1eb1d120556c067b46d56d23993c56b5a3f552b4f109b477
3
+ size 17536967
uploads/Titan.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1979b0e5973d1c07ebf1a76d5580554c45acd67242746c6673a0ab276d1de6e3
3
+ size 21978349
uploads/Ultratech.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e250887b207b20483cf040589ab9723a935dca317ce9bccd050cfdef059c0a8b
3
+ size 29394403