AyushS9020 commited on
Commit
0fbe4cf
·
verified ·
1 Parent(s): edd608b

Upload 24 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ uploads/Atttention[[:space:]]Is[[:space:]]All[[:space:]]You[[:space:]]Need.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from tqdm import tqdm
4
+
5
+ from unstructured.partition.pdf import partition_pdf
6
+ from langchain.schema.document import Document
7
+ import google.generativeai as genai
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ import shutil
11
+
12
+ from wasabi import msg
13
+
14
+ from PIL import Image
15
+
16
+ genai.configure(api_key = 'AIzaSyB342Fh-nkRaO38BshbyI4-s0T9orVpsMw')
17
+ model = genai.GenerativeModel('gemini-1.5-flash')
18
+
19
+ files = os.listdir('uploads')
20
+ files = [f'uploads/{file}' for file in files]
21
+
22
+ documents = []
23
+
24
+ for file in tqdm(files , total = len(files) , leave = False) :
25
+
26
+ elements = partition_pdf(
27
+ filename = file ,
28
+ extract_images_in_pdf = True ,
29
+ infer_table_structure = True ,
30
+ chunking_strategy = 'by_title' ,
31
+ max_characters = 4000 ,
32
+ new_after_n_chars = 3800 ,
33
+ combine_text_under_n_chars = 2000 ,
34
+ extract_image_block_output_dir = 'outputs'
35
+ )
36
+
37
+ for element in elements :
38
+
39
+ element = element.to_dict()
40
+
41
+ metadata = element['metadata']
42
+
43
+ if 'text_as_html' in metadata : documents.append(
44
+ Document(
45
+ page_content = metadata['text_as_html'] ,
46
+ metadata = {
47
+ 'type' : 'text' ,
48
+ 'metadata' : element
49
+ }
50
+ )
51
+ )
52
+
53
+ else : documents.append(
54
+ Document(
55
+ page_content = element['text'] ,
56
+ metadata = {
57
+ 'type' : 'text' ,
58
+ 'metadata' : element
59
+ }
60
+ )
61
+ )
62
+
63
+ images = os.listdir('outputs')
64
+ images = [f'outputs/{image}' for image in images]
65
+
66
+ for image in tqdm(images , total = len(images) , leave = False) :
67
+
68
+ image = Image.open(image)
69
+
70
+ try :
71
+
72
+ response = model.generate_content([
73
+ image ,
74
+ 'Explain the Image'
75
+ ])
76
+
77
+ response = response.text
78
+
79
+ except Exception as e : msg.fail(f'----| FAIL : COULDNT CALL THE IMAGE DESCRIPTION API : {e}') ; response = 'COuldnt Call Model for this'
80
+
81
+ documents.append(
82
+ Document(
83
+ page_content = response ,
84
+ metadata = {
85
+ 'type' : 'image' ,
86
+ 'metadata' : {
87
+ 'image' : image
88
+ }
89
+ }
90
+ )
91
+ )
92
+
93
+ shutil.rmtree('uploads')
94
+
95
+ vc = FAISS.from_documents(
96
+ documents = documents ,
97
+ embedding = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')
98
+ )
99
+
100
+ def run_rag(query) :
101
+
102
+ similar_docs = vc.similarity_search(query , k = 4)
103
+ context = [doc.page_content for doc in similar_docs]
104
+
105
+ prompt = f'''
106
+ You are a Helpfull Chatbot that helps users with their queries
107
+
108
+ - You will be provided with a query
109
+ - You will be provided with a context as well
110
+
111
+ Your task is to generate a response to the query based on the context provided
112
+
113
+ Context : {context}
114
+
115
+ Query : {query}
116
+
117
+ '''
118
+
119
+ response = model.generate_content(prompt)
120
+
121
+ return response.text
122
+
123
+ demo = gr.Interface(
124
+ fn = run_rag ,
125
+ inputs = 'text' ,
126
+ outputs = 'text'
127
+ )
128
+
129
+ demo.launch()
outputs/figure-1-1.jpg ADDED
outputs/figure-1-2.jpg ADDED
outputs/figure-1-3.jpg ADDED
outputs/figure-1-4.jpg ADDED
outputs/figure-1-5.jpg ADDED
outputs/figure-2-6.jpg ADDED
outputs/figure-2-7.jpg ADDED
outputs/figure-2-8.jpg ADDED
outputs/figure-2-9.jpg ADDED
outputs/figure-3-10.jpg ADDED
outputs/figure-3-11.jpg ADDED
outputs/figure-3-12.jpg ADDED
outputs/figure-3-13.jpg ADDED
outputs/figure-3-14.jpg ADDED
outputs/figure-4-15.jpg ADDED
outputs/figure-4-16.jpg ADDED
outputs/figure-4-17.jpg ADDED
outputs/figure-4-18.jpg ADDED
outputs/figure-4-19.jpg ADDED
outputs/figure-4-20.jpg ADDED
pakages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poppler-utils
2
+ tesseract-ocr
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ unstructured
2
+ unstructured_inference
3
+ unstructured_pytesseract
4
+ pytesseract
5
+ pi_heif
6
+ pdfminer.six
7
+ langchain
8
+ langchain_community
9
+ langchain_huggingface
10
+ sentence-transformers
11
+ faiss-cpu
12
+ groq
13
+ google
14
+ google-generativeai
15
+ gradio
16
+ tqdm
17
+ Pillow
18
+ wasabi
uploads/Atttention Is All You Need.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdfaa68d8984f0dc02beaca527b76f207d99b666d31d1da728ee0728182df697
3
+ size 2215244