Upload 9 files
Browse files- initate.py +10 -0
- user.py +250 -0
- utils/doi.py +97 -0
- utils/ingest_image.py +51 -0
- utils/ingest_image2.py +50 -0
- utils/ingest_text.py +106 -0
- utils/ingest_video.py +122 -0
- utils/llm_ag.py +60 -0
- video/temp.py +0 -0
initate.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.ingest_image import extract_and_store_images
|
2 |
+
from utils.ingest_text import create_vector_database
|
3 |
+
from utils.ingest_video import intiate_video
|
4 |
+
|
5 |
+
def process_pdf(pdf_path):
|
6 |
+
image_collection = extract_and_store_images(pdf_path)
|
7 |
+
text_collection = create_vector_database(pdf_path)
|
8 |
+
video_collection = intiate_video()
|
9 |
+
return image_collection, text_collection, video_collection
|
10 |
+
|
user.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from PIL import Image as PILImage
|
4 |
+
from PIL import Image as pilImage
|
5 |
+
import base64
|
6 |
+
import io
|
7 |
+
import chromadb
|
8 |
+
from initate import process_pdf
|
9 |
+
from utils.llm_ag import intiate_convo
|
10 |
+
from utils.doi import process_image_and_get_description
|
11 |
+
|
12 |
+
path = "mm_vdb2"
|
13 |
+
client = chromadb.PersistentClient(path=path)
|
14 |
+
import streamlit as st
|
15 |
+
from PIL import Image as PILImage
|
16 |
+
|
17 |
+
def display_images(image_collection, query_text, max_distance=None, debug=False):
|
18 |
+
"""
|
19 |
+
Display images in a Streamlit app based on a query.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
image_collection: The image collection object for querying.
|
23 |
+
query_text (str): The text query for images.
|
24 |
+
max_distance (float, optional): Maximum allowable distance for filtering.
|
25 |
+
debug (bool, optional): Whether to print debug information.
|
26 |
+
"""
|
27 |
+
results = image_collection.query(
|
28 |
+
query_texts=[query_text],
|
29 |
+
n_results=10,
|
30 |
+
include=['uris', 'distances']
|
31 |
+
)
|
32 |
+
|
33 |
+
uris = results['uris'][0]
|
34 |
+
distances = results['distances'][0]
|
35 |
+
|
36 |
+
# Combine uris and distances, then sort by URI in ascending order
|
37 |
+
sorted_results = sorted(zip(uris, distances), key=lambda x: x[0])
|
38 |
+
|
39 |
+
# Filter and display images
|
40 |
+
for uri, distance in sorted_results:
|
41 |
+
if max_distance is None or distance <= max_distance:
|
42 |
+
if debug:
|
43 |
+
st.write(f"URI: {uri} - Distance: {distance}")
|
44 |
+
try:
|
45 |
+
img = PILImage.open(uri)
|
46 |
+
st.image(img, width=300)
|
47 |
+
except Exception as e:
|
48 |
+
st.error(f"Error loading image {uri}: {e}")
|
49 |
+
else:
|
50 |
+
if debug:
|
51 |
+
st.write(f"URI: {uri} - Distance: {distance} (Filtered out)")
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
def display_videos_streamlit(video_collection, query_text, max_distance=None, max_results=5, debug=False):
|
56 |
+
"""
|
57 |
+
Display videos in a Streamlit app based on a query.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
video_collection: The video collection object for querying.
|
61 |
+
query_text (str): The text query for videos.
|
62 |
+
max_distance (float, optional): Maximum allowable distance for filtering.
|
63 |
+
max_results (int, optional): Maximum number of results to display.
|
64 |
+
debug (bool, optional): Whether to print debug information.
|
65 |
+
"""
|
66 |
+
# Deduplication set
|
67 |
+
displayed_videos = set()
|
68 |
+
|
69 |
+
# Query the video collection with the specified text
|
70 |
+
results = video_collection.query(
|
71 |
+
query_texts=[query_text],
|
72 |
+
n_results=max_results, # Adjust the number of results if needed
|
73 |
+
include=['uris', 'distances', 'metadatas']
|
74 |
+
)
|
75 |
+
|
76 |
+
# Extract URIs, distances, and metadatas from the result
|
77 |
+
uris = results['uris'][0]
|
78 |
+
distances = results['distances'][0]
|
79 |
+
metadatas = results['metadatas'][0]
|
80 |
+
|
81 |
+
# Display the videos that meet the distance criteria
|
82 |
+
for uri, distance, metadata in zip(uris, distances, metadatas):
|
83 |
+
video_uri = metadata['video_uri']
|
84 |
+
|
85 |
+
# Check if a max_distance filter is applied and the distance is within the allowed range
|
86 |
+
if (max_distance is None or distance <= max_distance) and video_uri not in displayed_videos:
|
87 |
+
if debug:
|
88 |
+
st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance}")
|
89 |
+
st.video(video_uri) # Display video in Streamlit
|
90 |
+
displayed_videos.add(video_uri) # Add to the set to prevent duplication
|
91 |
+
else:
|
92 |
+
if debug:
|
93 |
+
st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance} (Filtered out)")
|
94 |
+
|
95 |
+
|
96 |
+
def image_uris(image_collection,query_text, max_distance=None, max_results=5):
|
97 |
+
results = image_collection.query(
|
98 |
+
query_texts=[query_text],
|
99 |
+
n_results=max_results,
|
100 |
+
include=['uris', 'distances']
|
101 |
+
)
|
102 |
+
|
103 |
+
filtered_uris = []
|
104 |
+
for uri, distance in zip(results['uris'][0], results['distances'][0]):
|
105 |
+
if max_distance is None or distance <= max_distance:
|
106 |
+
filtered_uris.append(uri)
|
107 |
+
|
108 |
+
return filtered_uris
|
109 |
+
|
110 |
+
def text_uris(text_collection,query_text, max_distance=None, max_results=5):
|
111 |
+
results = text_collection.query(
|
112 |
+
query_texts=[query_text],
|
113 |
+
n_results=max_results,
|
114 |
+
include=['documents', 'distances']
|
115 |
+
)
|
116 |
+
|
117 |
+
filtered_texts = []
|
118 |
+
for doc, distance in zip(results['documents'][0], results['distances'][0]):
|
119 |
+
if max_distance is None or distance <= max_distance:
|
120 |
+
filtered_texts.append(doc)
|
121 |
+
|
122 |
+
return filtered_texts
|
123 |
+
|
124 |
+
def frame_uris(video_collection,query_text, max_distance=None, max_results=5):
|
125 |
+
results = video_collection.query(
|
126 |
+
query_texts=[query_text],
|
127 |
+
n_results=max_results,
|
128 |
+
include=['uris', 'distances']
|
129 |
+
)
|
130 |
+
|
131 |
+
filtered_uris = []
|
132 |
+
seen_folders = set()
|
133 |
+
|
134 |
+
for uri, distance in zip(results['uris'][0], results['distances'][0]):
|
135 |
+
if max_distance is None or distance <= max_distance:
|
136 |
+
folder = os.path.dirname(uri)
|
137 |
+
if folder not in seen_folders:
|
138 |
+
filtered_uris.append(uri)
|
139 |
+
seen_folders.add(folder)
|
140 |
+
|
141 |
+
if len(filtered_uris) == max_results:
|
142 |
+
break
|
143 |
+
|
144 |
+
return filtered_uris
|
145 |
+
|
146 |
+
def image_uris2(image_collection2,query_text, max_distance=None, max_results=5):
|
147 |
+
results = image_collection2.query(
|
148 |
+
query_texts=[query_text],
|
149 |
+
n_results=max_results,
|
150 |
+
include=['uris', 'distances']
|
151 |
+
)
|
152 |
+
|
153 |
+
filtered_uris = []
|
154 |
+
for uri, distance in zip(results['uris'][0], results['distances'][0]):
|
155 |
+
if max_distance is None or distance <= max_distance:
|
156 |
+
filtered_uris.append(uri)
|
157 |
+
|
158 |
+
return filtered_uris
|
159 |
+
|
160 |
+
|
161 |
+
def format_prompt_inputs(image_collection, text_collection, video_collection, user_query):
|
162 |
+
frame_candidates = frame_uris(video_collection, user_query, max_distance=1.55)
|
163 |
+
image_candidates = image_uris(image_collection, user_query, max_distance=1.5)
|
164 |
+
texts = text_uris(text_collection, user_query, max_distance=1.3)
|
165 |
+
|
166 |
+
inputs = {"query": user_query, "texts": texts}
|
167 |
+
frame = frame_candidates[0] if frame_candidates else ""
|
168 |
+
inputs["frame"] = frame
|
169 |
+
|
170 |
+
if image_candidates:
|
171 |
+
image = image_candidates[0]
|
172 |
+
with PILImage.open(image) as img:
|
173 |
+
img = img.resize((img.width // 6, img.height // 6))
|
174 |
+
img = img.convert("L")
|
175 |
+
with io.BytesIO() as output:
|
176 |
+
img.save(output, format="JPEG", quality=60)
|
177 |
+
compressed_image_data = output.getvalue()
|
178 |
+
|
179 |
+
inputs["image_data_1"] = base64.b64encode(compressed_image_data).decode('utf-8')
|
180 |
+
else:
|
181 |
+
inputs["image_data_1"] = ""
|
182 |
+
|
183 |
+
return inputs
|
184 |
+
|
185 |
+
def page_1():
|
186 |
+
st.title("Page 1: Upload and Process PDF")
|
187 |
+
|
188 |
+
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
189 |
+
if uploaded_file:
|
190 |
+
pdf_path = f"/tmp/{uploaded_file.name}"
|
191 |
+
with open(pdf_path, "wb") as f:
|
192 |
+
f.write(uploaded_file.getbuffer())
|
193 |
+
|
194 |
+
try:
|
195 |
+
image_collection, text_collection, video_collection = process_pdf(pdf_path)
|
196 |
+
st.session_state.image_collection = image_collection
|
197 |
+
st.session_state.text_collection = text_collection
|
198 |
+
st.session_state.video_collection = video_collection
|
199 |
+
|
200 |
+
st.success("PDF processed successfully! Collections saved to session state.")
|
201 |
+
except Exception as e:
|
202 |
+
st.error(f"Error processing PDF: {e}")
|
203 |
+
|
204 |
+
def page_2():
|
205 |
+
st.title("Page 2: Query and Use Processed Collections")
|
206 |
+
|
207 |
+
if "image_collection" in st.session_state and "text_collection" in st.session_state and "video_collection" in st.session_state:
|
208 |
+
image_collection = st.session_state.image_collection
|
209 |
+
text_collection = st.session_state.text_collection
|
210 |
+
video_collection = st.session_state.video_collection
|
211 |
+
st.success("Collections loaded successfully.")
|
212 |
+
|
213 |
+
query = st.text_input("Enter your query", value="Example Query")
|
214 |
+
if query:
|
215 |
+
inputs = format_prompt_inputs(image_collection, text_collection, video_collection, query)
|
216 |
+
texts = inputs["texts"]
|
217 |
+
image_data_1 = inputs["image_data_1"]
|
218 |
+
|
219 |
+
if image_data_1:
|
220 |
+
image_data_1 = process_image_and_get_description(image_data_1)
|
221 |
+
|
222 |
+
response = intiate_convo(query, image_data_1, texts)
|
223 |
+
st.write("Response:", response)
|
224 |
+
|
225 |
+
st.markdown("### Images")
|
226 |
+
display_images(image_collection, query, max_distance=1.55, debug=True)
|
227 |
+
|
228 |
+
st.markdown("### Videos")
|
229 |
+
frame = inputs["frame"]
|
230 |
+
if frame:
|
231 |
+
video_path = f"StockVideos-CC0/{os.path.basename(frame).split('/')[0]}.mp4"
|
232 |
+
if os.path.exists(video_path):
|
233 |
+
st.video(video_path)
|
234 |
+
else:
|
235 |
+
st.write("No related videos found.")
|
236 |
+
else:
|
237 |
+
st.error("Collections not found in session state. Please process the PDF on Page 1.")
|
238 |
+
|
239 |
+
# --- Navigation ---
|
240 |
+
|
241 |
+
PAGES = {
|
242 |
+
"Upload and Process PDF": page_1,
|
243 |
+
"Query and Use Processed Collections": page_2
|
244 |
+
}
|
245 |
+
|
246 |
+
# Select page
|
247 |
+
selected_page = st.sidebar.selectbox("Choose a page", options=list(PAGES.keys()))
|
248 |
+
|
249 |
+
# Render selected page
|
250 |
+
PAGES[selected_page]()
|
utils/doi.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
# Configure logging
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
14 |
+
handlers=[
|
15 |
+
logging.StreamHandler(), # Log to console
|
16 |
+
logging.FileHandler("api_request_logs.log") # Log to a file
|
17 |
+
]
|
18 |
+
)
|
19 |
+
|
20 |
+
# Get the API key from environment variable
|
21 |
+
GROQ_API_KEY = "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"
|
22 |
+
if not GROQ_API_KEY:
|
23 |
+
raise ValueError("GROQ_API_KEY is not set in the .env file")
|
24 |
+
|
25 |
+
def process_image_and_get_description(image_path, model="llama-3.2-90b-vision-preview", retries=3):
|
26 |
+
"""
|
27 |
+
Process the image using the Groq API and get a description.
|
28 |
+
Retries in case of failure.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
image_path (str): Path to the image.
|
32 |
+
model (str): Model to use for processing.
|
33 |
+
retries (int): Number of retries before giving up.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
str: Description of the image or an error message.
|
37 |
+
"""
|
38 |
+
encoded_image = image_path
|
39 |
+
# # Encode the image to base64
|
40 |
+
# try:
|
41 |
+
# with open(image_path, "rb") as image_file:
|
42 |
+
# encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
|
43 |
+
# logging.info("Successfully encoded the image to base64.")
|
44 |
+
# except Exception as e:
|
45 |
+
# logging.error(f"Error encoding the image: {e}")
|
46 |
+
# return "Error encoding the image."
|
47 |
+
|
48 |
+
# Prepare the message payload
|
49 |
+
messages = [
|
50 |
+
{
|
51 |
+
"role": "user",
|
52 |
+
"content": [
|
53 |
+
{"type": "text", "text": "Analyze the image to identify what is happening, describe the overall context, and perform OCR to extract any visible text. Additionally, specify whether the subject is a human, animal, or object, and provide a detailed description of any object the human is holding or their specific actions."},
|
54 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
|
55 |
+
]
|
56 |
+
}
|
57 |
+
]
|
58 |
+
|
59 |
+
for attempt in range(1, retries + 1):
|
60 |
+
try:
|
61 |
+
logging.info(f"Attempt {attempt} to process the image with Groq API.")
|
62 |
+
|
63 |
+
# Make the API request
|
64 |
+
response = requests.post(
|
65 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
66 |
+
json={
|
67 |
+
"model": model,
|
68 |
+
"messages": messages,
|
69 |
+
"max_tokens": 4096,
|
70 |
+
"stop": None,
|
71 |
+
"stream": False
|
72 |
+
},
|
73 |
+
headers={
|
74 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
75 |
+
"Content-Type": "application/json"
|
76 |
+
},
|
77 |
+
timeout=30
|
78 |
+
)
|
79 |
+
|
80 |
+
# Process the response
|
81 |
+
if response.status_code == 200:
|
82 |
+
result = response.json()
|
83 |
+
answer = result["choices"][0]["message"]["content"]
|
84 |
+
logging.info("Successfully processed the image and received a response.")
|
85 |
+
return answer
|
86 |
+
else:
|
87 |
+
logging.warning(f"Received error response: {response.status_code} - {response.text}")
|
88 |
+
except requests.RequestException as e:
|
89 |
+
logging.error(f"RequestException on attempt {attempt}: {e}")
|
90 |
+
|
91 |
+
logging.error("All attempts to process the image failed.")
|
92 |
+
return "Error: Unable to process the image after multiple attempts."
|
93 |
+
|
94 |
+
# # Example usage
|
95 |
+
# image_path = r"/content/temp.jpeg"
|
96 |
+
# description = process_image_and_get_description(image_path)
|
97 |
+
# print(description)
|
utils/ingest_image.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#inget image
|
2 |
+
import os
|
3 |
+
import fitz
|
4 |
+
import chromadb
|
5 |
+
from chromadb.utils.data_loaders import ImageLoader
|
6 |
+
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction # type: ignore
|
7 |
+
|
8 |
+
path = "mm_vdb2"
|
9 |
+
client = chromadb.PersistentClient(path=path)
|
10 |
+
|
11 |
+
def extract_and_store_images(pdf_path,images_dir=r'extracted_images'):
|
12 |
+
# Step 1: Extract images from PDF
|
13 |
+
pdf_document = fitz.open(pdf_path)
|
14 |
+
os.makedirs(images_dir, exist_ok=True)
|
15 |
+
|
16 |
+
for page_num in range(len(pdf_document)):
|
17 |
+
page = pdf_document.load_page(page_num)
|
18 |
+
image_list = page.get_images(full=True)
|
19 |
+
|
20 |
+
for image_index, img in enumerate(image_list):
|
21 |
+
xref = img[0]
|
22 |
+
base_image = pdf_document.extract_image(xref)
|
23 |
+
image_bytes = base_image["image"]
|
24 |
+
image_ext = base_image["ext"]
|
25 |
+
image_filename = f"{images_dir}/page_{page_num+1}_img_{image_index+1}.{image_ext}"
|
26 |
+
|
27 |
+
with open(image_filename, "wb") as image_file:
|
28 |
+
image_file.write(image_bytes)
|
29 |
+
print(f"Saved: {image_filename}")
|
30 |
+
|
31 |
+
print("Image extraction complete.")
|
32 |
+
|
33 |
+
# Step 2: Add extracted images to ChromaDB
|
34 |
+
image_loader = ImageLoader()
|
35 |
+
CLIP = OpenCLIPEmbeddingFunction()
|
36 |
+
image_collection = client.get_or_create_collection(name="image", embedding_function=CLIP, data_loader=image_loader)
|
37 |
+
|
38 |
+
ids = []
|
39 |
+
uris = []
|
40 |
+
|
41 |
+
for i, filename in enumerate(sorted(os.listdir(images_dir))):
|
42 |
+
if filename.endswith('.jpeg') or filename.endswith('.png'):
|
43 |
+
file_path = os.path.join(images_dir, filename)
|
44 |
+
ids.append(str(i))
|
45 |
+
uris.append(file_path)
|
46 |
+
|
47 |
+
image_collection.add(ids=ids, uris=uris)
|
48 |
+
print("Images added to the database.")
|
49 |
+
# return image_vdb
|
50 |
+
return image_collection
|
51 |
+
|
utils/ingest_image2.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pdf2image import convert_from_path
|
2 |
+
import os
|
3 |
+
#inget image
|
4 |
+
import os
|
5 |
+
import fitz
|
6 |
+
import chromadb
|
7 |
+
from chromadb.utils.data_loaders import ImageLoader
|
8 |
+
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction # type: ignore
|
9 |
+
|
10 |
+
path = "mm_vdb2"
|
11 |
+
client = chromadb.PersistentClient(path=path)
|
12 |
+
|
13 |
+
def extract_and_store_images2(pdf_path,images_dir=r'extracted_images2'):
|
14 |
+
|
15 |
+
output_dir = 'extracted_images2' # Directory to save images
|
16 |
+
|
17 |
+
# Ensure the output directory exists
|
18 |
+
os.makedirs(output_dir, exist_ok=True)
|
19 |
+
|
20 |
+
# Convert PDF to a list of images (one per page)
|
21 |
+
pages = convert_from_path(pdf_path, 300) # 300 dpi is a good resolution
|
22 |
+
|
23 |
+
# Save each page as an image (screenshot)
|
24 |
+
for i, page in enumerate(pages):
|
25 |
+
output_path = os.path.join(output_dir, f'page_{i + 1}.png')
|
26 |
+
page.save(output_path, 'PNG')
|
27 |
+
print(f"Saved: {output_path}")
|
28 |
+
|
29 |
+
print("Image extraction complete.")
|
30 |
+
|
31 |
+
# Step 2: Add extracted images to ChromaDB
|
32 |
+
image_loader = ImageLoader()
|
33 |
+
CLIP = OpenCLIPEmbeddingFunction()
|
34 |
+
image_collection2 = client.get_or_create_collection(name="image2", embedding_function=CLIP, data_loader=image_loader)
|
35 |
+
|
36 |
+
ids = []
|
37 |
+
uris = []
|
38 |
+
|
39 |
+
for i, filename in enumerate(sorted(os.listdir(images_dir))):
|
40 |
+
if filename.endswith('.jpeg') or filename.endswith('.png'):
|
41 |
+
file_path = os.path.join(images_dir, filename)
|
42 |
+
ids.append(str(i))
|
43 |
+
uris.append(file_path)
|
44 |
+
|
45 |
+
image_collection2.add(ids=ids, uris=uris)
|
46 |
+
print("Images added to the database.")
|
47 |
+
# return image_vdb
|
48 |
+
return image_collection2
|
49 |
+
|
50 |
+
|
utils/ingest_text.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
|
3 |
+
import os
|
4 |
+
|
5 |
+
# Ingest Text
|
6 |
+
from llama_parse import LlamaParse
|
7 |
+
from langchain.document_loaders import PyMuPDFLoader
|
8 |
+
from langchain.text_splitter import CharacterTextSplitter
|
9 |
+
import os
|
10 |
+
import pickle
|
11 |
+
import nest_asyncio
|
12 |
+
|
13 |
+
nest_asyncio.apply()
|
14 |
+
|
15 |
+
path = "mm_vdb2"
|
16 |
+
client = chromadb.PersistentClient(path=path)
|
17 |
+
|
18 |
+
llamaparse_api_key = "llx-qXMliHH4UOphFaahO8HEqR5wOj1U6T7oxqC4DoLiik7UvKkJ"
|
19 |
+
groq_api_key = "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"
|
20 |
+
|
21 |
+
parsed_data_file = r"parsed_data.pkl"
|
22 |
+
output_md = r"output.md"
|
23 |
+
loki = r"data"
|
24 |
+
|
25 |
+
# Define a function to load parsed data if available, or parse if not
|
26 |
+
def load_or_parse_data(loc):
|
27 |
+
data_file = parsed_data_file
|
28 |
+
|
29 |
+
if os.path.exists(data_file):
|
30 |
+
# Load the parsed data from the file
|
31 |
+
with open(data_file, "rb") as f:
|
32 |
+
parsed_data = pickle.load(f)
|
33 |
+
else:
|
34 |
+
# Perform the parsing step and store the result in llama_parse_documents
|
35 |
+
parsingInstructiontest10k = """The provided document is an user guide or a manual.
|
36 |
+
It contains many images and tables.
|
37 |
+
Try to be precise while answering the questions"""
|
38 |
+
parser = LlamaParse(api_key=llamaparse_api_key, result_type="markdown", parsing_instruction=parsingInstructiontest10k) # type: ignore
|
39 |
+
llama_parse_documents = parser.load_data(loc)
|
40 |
+
|
41 |
+
# Save the parsed data to a file
|
42 |
+
with open(data_file, "wb") as f:
|
43 |
+
pickle.dump(llama_parse_documents, f)
|
44 |
+
|
45 |
+
# Set the parsed data to the variable
|
46 |
+
parsed_data = llama_parse_documents
|
47 |
+
|
48 |
+
return parsed_data
|
49 |
+
|
50 |
+
|
51 |
+
# Create vector database
|
52 |
+
def create_vector_database(loc):
|
53 |
+
"""
|
54 |
+
Creates a vector database using document loaders and embeddings.
|
55 |
+
|
56 |
+
This function loads urls,
|
57 |
+
splits the loaded documents into chunks, transforms them into embeddings using OllamaEmbeddings,
|
58 |
+
and finally persists the embeddings into a Chroma vector database.
|
59 |
+
"""
|
60 |
+
# Call the function to either load or parse the data
|
61 |
+
data = loc
|
62 |
+
loader = PyMuPDFLoader(file_path=data)
|
63 |
+
docs = loader.load() # This returns a list of pages/documents
|
64 |
+
|
65 |
+
print(f"Number of documents: {len(docs)}")
|
66 |
+
|
67 |
+
print("Vector DB started!")
|
68 |
+
|
69 |
+
# Initialize a list for document content and IDs
|
70 |
+
document_contents = []
|
71 |
+
ids = []
|
72 |
+
|
73 |
+
# Generate unique IDs for each document, with PDF page number first
|
74 |
+
for i, doc in enumerate(docs):
|
75 |
+
# Print metadata to understand its structure
|
76 |
+
print(f"Metadata for document {i+1}: {doc.metadata}")
|
77 |
+
|
78 |
+
# Try to extract the page number from metadata or use a default
|
79 |
+
page_num = doc.metadata.get('page_number', f'unknown_{i+1}') # Use i+1 to ensure uniqueness
|
80 |
+
|
81 |
+
# Extract text from each page
|
82 |
+
page_content = doc.page_content # Get the content of the page
|
83 |
+
|
84 |
+
# Split the content into chunks based on the text splitter
|
85 |
+
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
|
86 |
+
doc_chunks = text_splitter.split_text(page_content)
|
87 |
+
|
88 |
+
# Add chunk contents and corresponding page-based IDs
|
89 |
+
for chunk_idx, chunk in enumerate(doc_chunks):
|
90 |
+
document_contents.append(chunk) # Add the chunk content
|
91 |
+
ids.append(f"page_{page_num}_chunk_{i+1}_{chunk_idx+1}") # Add a unique chunk ID
|
92 |
+
|
93 |
+
# Ensure the number of ids matches the number of documents (contents)
|
94 |
+
assert len(ids) == len(document_contents), "Mismatch between number of ids and document contents"
|
95 |
+
|
96 |
+
# Create or get the text collection
|
97 |
+
text_collection = client.get_or_create_collection(name="text_collection")
|
98 |
+
|
99 |
+
# Add documents and their embeddings to the collection
|
100 |
+
text_collection.add(
|
101 |
+
documents=document_contents, # All the chunk-level content
|
102 |
+
ids=ids # Matching IDs for each chunk content
|
103 |
+
)
|
104 |
+
|
105 |
+
print('Vector DB created successfully!')
|
106 |
+
return text_collection
|
utils/ingest_video.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gdown
|
2 |
+
import zipfile
|
3 |
+
import os
|
4 |
+
import chromadb
|
5 |
+
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
|
6 |
+
from chromadb.utils.data_loaders import ImageLoader
|
7 |
+
|
8 |
+
import cv2
|
9 |
+
|
10 |
+
path = "mm_vdb2"
|
11 |
+
client = chromadb.PersistentClient(path=path)
|
12 |
+
|
13 |
+
image_loader = ImageLoader()
|
14 |
+
CLIP = OpenCLIPEmbeddingFunction()
|
15 |
+
video_collection = client.get_or_create_collection(
|
16 |
+
name='video_collection',
|
17 |
+
embedding_function=CLIP,
|
18 |
+
data_loader=image_loader
|
19 |
+
)
|
20 |
+
|
21 |
+
def unzip_file(zip_path, extract_to):
|
22 |
+
"""
|
23 |
+
Unzips a zip file to the specified directory.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
zip_path (str): Path to the zip file.
|
27 |
+
extract_to (str): Directory where the contents should be extracted.
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
# Ensure the destination directory exists
|
31 |
+
os.makedirs(extract_to, exist_ok=True)
|
32 |
+
|
33 |
+
# Open the zip file
|
34 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
35 |
+
# Extract all the contents
|
36 |
+
zip_ref.extractall(extract_to)
|
37 |
+
print(f"Successfully extracted {zip_path} to {extract_to}")
|
38 |
+
except Exception as e:
|
39 |
+
print(f"An error occurred: {e}")
|
40 |
+
|
41 |
+
|
42 |
+
def extract_frames(video_folder, output_folder):
|
43 |
+
if not os.path.exists(output_folder):
|
44 |
+
os.makedirs(output_folder)
|
45 |
+
|
46 |
+
for video_filename in os.listdir(video_folder):
|
47 |
+
if video_filename.endswith('.mp4'):
|
48 |
+
video_path = os.path.join(video_folder, video_filename)
|
49 |
+
video_capture = cv2.VideoCapture(video_path)
|
50 |
+
fps = video_capture.get(cv2.CAP_PROP_FPS)
|
51 |
+
frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
52 |
+
duration = frame_count / fps
|
53 |
+
|
54 |
+
output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0])
|
55 |
+
if not os.path.exists(output_subfolder):
|
56 |
+
os.makedirs(output_subfolder)
|
57 |
+
|
58 |
+
success, image = video_capture.read()
|
59 |
+
frame_number = 0
|
60 |
+
while success:
|
61 |
+
if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
|
62 |
+
frame_time = frame_number / fps
|
63 |
+
output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
|
64 |
+
cv2.imwrite(output_frame_filename, image)
|
65 |
+
|
66 |
+
success, image = video_capture.read()
|
67 |
+
frame_number += 1
|
68 |
+
|
69 |
+
video_capture.release()
|
70 |
+
|
71 |
+
def add_frames_to_chromadb(video_dir, frames_dir):
|
72 |
+
# Dictionary to hold video titles and their corresponding frames
|
73 |
+
video_frames = {}
|
74 |
+
|
75 |
+
# Process each video and associate its frames
|
76 |
+
for video_file in os.listdir(video_dir):
|
77 |
+
if video_file.endswith('.mp4'):
|
78 |
+
video_title = video_file[:-4]
|
79 |
+
frame_folder = os.path.join(frames_dir, video_title)
|
80 |
+
if os.path.exists(frame_folder):
|
81 |
+
# List all jpg files in the folder
|
82 |
+
video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
|
83 |
+
|
84 |
+
# Prepare ids, uris and metadatas
|
85 |
+
ids = []
|
86 |
+
uris = []
|
87 |
+
metadatas = []
|
88 |
+
|
89 |
+
for video_title, frames in video_frames.items():
|
90 |
+
video_path = os.path.join(video_dir, f"{video_title}.mp4")
|
91 |
+
for frame in frames:
|
92 |
+
frame_id = f"{frame[:-4]}_{video_title}"
|
93 |
+
frame_path = os.path.join(frames_dir, video_title, frame)
|
94 |
+
ids.append(frame_id)
|
95 |
+
uris.append(frame_path)
|
96 |
+
metadatas.append({'video_uri': video_path})
|
97 |
+
|
98 |
+
video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
|
99 |
+
|
100 |
+
# Running it
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
def intiate_video():
|
105 |
+
file_id = "1Fm8Cge1VM4w8fmE0cZfRKhIQV0UgBXzp"
|
106 |
+
output_file = r"video\StockVideos-CC01.zip"
|
107 |
+
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)
|
108 |
+
|
109 |
+
print(f"File downloaded successfully: {output_file}")
|
110 |
+
# Example Usage
|
111 |
+
zip_file_path = r"video\StockVideos-CC01.zip"
|
112 |
+
destination_folder = r"video"
|
113 |
+
unzip_file(zip_file_path, destination_folder)
|
114 |
+
print("Unzipped")
|
115 |
+
video_folder_path = r'video\StockVideos-CC0'
|
116 |
+
output_folder_path = r'video\StockVideos-CC0-frames'
|
117 |
+
|
118 |
+
extract_frames(video_folder_path, output_folder_path)
|
119 |
+
|
120 |
+
add_frames_to_chromadb(video_folder_path, output_folder_path)
|
121 |
+
return video_collection
|
122 |
+
|
utils/llm_ag.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
# Get the API key from environment variable
|
8 |
+
GROQ_API_KEY = "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"
|
9 |
+
if not GROQ_API_KEY:
|
10 |
+
raise ValueError("GROQ_API_KEY is not set in the .env file")
|
11 |
+
|
12 |
+
def intiate_convo(user_query, image_description, additional_text, model="mixtral-8x7b-32768"):
|
13 |
+
# Prepare the message payload
|
14 |
+
messages = [
|
15 |
+
{
|
16 |
+
"role": "system",
|
17 |
+
"content": """You are a AI Assistant for training. Given an image description, additional context, and a user query, respond with a detailed long answer with steps, ,be polite.
|
18 |
+
IMPORTANT: When referring to the image, subtly acknowledge it by saying "as I see here" rather than explicitly mentioning "image" or "photo."
|
19 |
+
Your tone should be natural and conversational. Keep it detailed , engaging, and relevant to the query, using both the image description and the additional context as reference points."""
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"role": "user",
|
23 |
+
"content": f"Image description: {image_description}. Additional context: {additional_text}. User query: {user_query}. Provide a detaile response like an ai assistant."
|
24 |
+
}
|
25 |
+
]
|
26 |
+
|
27 |
+
# Make the API request
|
28 |
+
response = requests.post(
|
29 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
30 |
+
json={
|
31 |
+
"model": model,
|
32 |
+
"messages": messages,
|
33 |
+
"max_tokens": 32768,
|
34 |
+
"stop": None,
|
35 |
+
"stream": False
|
36 |
+
},
|
37 |
+
headers={
|
38 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
39 |
+
"Content-Type": "application/json"
|
40 |
+
},
|
41 |
+
timeout=60
|
42 |
+
)
|
43 |
+
|
44 |
+
# Process the response
|
45 |
+
if response.status_code == 200:
|
46 |
+
result = response.json()
|
47 |
+
answer = result["choices"][0]["message"]["content"]
|
48 |
+
return answer
|
49 |
+
else:
|
50 |
+
return f"Error from LLM API: {response.status_code} - {response.text}"
|
51 |
+
|
52 |
+
# # Example usage
|
53 |
+
# # Define the inputs
|
54 |
+
# user_query = "Can you tell me more about the person in this description?"
|
55 |
+
# image_description = """The main subject of the image is a person with dark complexion, short black hair, and white-framed glasses, wearing a dark-colored shirt or jacket. They are looking directly at the camera with a subtle expression."""
|
56 |
+
# additional_text = """This individual is a software engineer specializing in AI development. They are known for their expertise in computer vision and enjoy photography as a hobby."""
|
57 |
+
|
58 |
+
# # Get the LLM response
|
59 |
+
# response = intiate_convo(user_query, image_description, additional_text)
|
60 |
+
# print(response)
|
video/temp.py
ADDED
File without changes
|