AyushS9020 commited on Dec 25, 2024

Commit

0fbe4cf

verified ·

1 Parent(s): edd608b

Upload 24 files

Browse files

Files changed (25) hide show

.gitattributes +1 -0
app.py +129 -0
outputs/figure-1-1.jpg +0 -0
outputs/figure-1-2.jpg +0 -0
outputs/figure-1-3.jpg +0 -0
outputs/figure-1-4.jpg +0 -0
outputs/figure-1-5.jpg +0 -0
outputs/figure-2-6.jpg +0 -0
outputs/figure-2-7.jpg +0 -0
outputs/figure-2-8.jpg +0 -0
outputs/figure-2-9.jpg +0 -0
outputs/figure-3-10.jpg +0 -0
outputs/figure-3-11.jpg +0 -0
outputs/figure-3-12.jpg +0 -0
outputs/figure-3-13.jpg +0 -0
outputs/figure-3-14.jpg +0 -0
outputs/figure-4-15.jpg +0 -0
outputs/figure-4-16.jpg +0 -0
outputs/figure-4-17.jpg +0 -0
outputs/figure-4-18.jpg +0 -0
outputs/figure-4-19.jpg +0 -0
outputs/figure-4-20.jpg +0 -0
pakages.txt +2 -0
requirements.txt +18 -0
uploads/Atttention Is All You Need.pdf +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+uploads/Atttention[[:space:]]Is[[:space:]]All[[:space:]]You[[:space:]]Need.pdf filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import gradio as gr
+import os
+from tqdm import tqdm
+from unstructured.partition.pdf import partition_pdf
+from langchain.schema.document import Document
+import google.generativeai as genai
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+import shutil
+from wasabi import msg
+from PIL import Image
+genai.configure(api_key = 'AIzaSyB342Fh-nkRaO38BshbyI4-s0T9orVpsMw')
+model = genai.GenerativeModel('gemini-1.5-flash')
+files = os.listdir('uploads')
+files = [f'uploads/{file}' for file in files]
+documents = []
+for file in tqdm(files , total = len(files) , leave = False) :
+    elements = partition_pdf(
+        filename = file ,
+        extract_images_in_pdf = True ,
+        infer_table_structure = True ,
+        chunking_strategy = 'by_title' ,
+        max_characters = 4000 ,
+        new_after_n_chars = 3800 ,
+        combine_text_under_n_chars = 2000 ,
+        extract_image_block_output_dir = 'outputs'
+    )
+    for element in elements :
+        element = element.to_dict()
+        metadata = element['metadata']
+        if 'text_as_html' in metadata : documents.append(
+            Document(
+                page_content = metadata['text_as_html'] ,
+                metadata = {
+                    'type' : 'text' ,
+                    'metadata' : element
+                }
+            )
+        )
+        else : documents.append(
+            Document(
+                page_content = element['text'] ,
+                metadata = {
+                    'type' : 'text' ,
+                    'metadata' : element
+                }
+            )
+        )
+    images = os.listdir('outputs')
+    images = [f'outputs/{image}' for image in images]
+    for image in tqdm(images , total = len(images) , leave = False) :
+        image = Image.open(image)
+        try :
+            response = model.generate_content([
+                image ,
+                'Explain the Image'
+            ])
+            response = response.text
+        except Exception as e : msg.fail(f'----| FAIL : COULDNT CALL THE IMAGE DESCRIPTION API : {e}') ; response = 'COuldnt Call Model for this'
+        documents.append(
+            Document(
+                page_content = response ,
+                metadata = {
+                    'type' : 'image' ,
+                    'metadata' : {
+                        'image' : image
+                    }
+                }
+            )
+        )
+    shutil.rmtree('uploads')
+vc = FAISS.from_documents(
+    documents = documents ,
+    embedding = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')
+)
+def run_rag(query) :
+    similar_docs = vc.similarity_search(query , k = 4)
+    context = [doc.page_content for doc in similar_docs]
+    prompt = f'''
+    You are a Helpfull Chatbot that helps users with their queries
+    - You will be provided with a query
+    - You will be provided with a context as well
+    Your task is to generate a response to the query based on the context provided
+    Context : {context}
+    Query : {query}
+    '''
+    response = model.generate_content(prompt)
+    return response.text
+demo = gr.Interface(
+    fn = run_rag ,
+    inputs = 'text' ,
+    outputs = 'text'
+)
+demo.launch()

outputs/figure-1-1.jpg ADDED Viewed

outputs/figure-1-2.jpg ADDED Viewed

outputs/figure-1-3.jpg ADDED Viewed

outputs/figure-1-4.jpg ADDED Viewed

outputs/figure-1-5.jpg ADDED Viewed

outputs/figure-2-6.jpg ADDED Viewed

outputs/figure-2-7.jpg ADDED Viewed

outputs/figure-2-8.jpg ADDED Viewed

outputs/figure-2-9.jpg ADDED Viewed

outputs/figure-3-10.jpg ADDED Viewed

outputs/figure-3-11.jpg ADDED Viewed

outputs/figure-3-12.jpg ADDED Viewed

outputs/figure-3-13.jpg ADDED Viewed

outputs/figure-3-14.jpg ADDED Viewed

outputs/figure-4-15.jpg ADDED Viewed

outputs/figure-4-16.jpg ADDED Viewed

outputs/figure-4-17.jpg ADDED Viewed

outputs/figure-4-18.jpg ADDED Viewed

outputs/figure-4-19.jpg ADDED Viewed

outputs/figure-4-20.jpg ADDED Viewed

pakages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ poppler-utils
2	+ tesseract-ocr

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+unstructured
+unstructured_inference
+unstructured_pytesseract
+pytesseract
+pi_heif
+pdfminer.six
+langchain
+langchain_community
+langchain_huggingface
+sentence-transformers
+faiss-cpu
+groq
+google
+google-generativeai
+gradio
+tqdm
+Pillow
+wasabi

uploads/Atttention Is All You Need.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdfaa68d8984f0dc02beaca527b76f207d99b666d31d1da728ee0728182df697
+size 2215244