Spaces:
Sleeping
Sleeping
HARISH20205
commited on
Commit
·
c8c7a9e
1
Parent(s):
168162d
first
Browse files- .dockerignore +2 -0
- .gitignore +8 -0
- Dockerfile +39 -0
- firebase.py +15 -0
- main.py +49 -0
- requirements.txt +219 -0
- response.py +104 -0
- source.py +195 -0
- src/audio_processor.py +53 -0
- src/image_processor.py +55 -0
- src/text_processor.py +85 -0
- src/video_processor.py +23 -0
.dockerignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
myenv/
|
2 |
+
__pycache__/
|
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
myenv
|
2 |
+
__pycache__
|
3 |
+
services
|
4 |
+
scrap.txt
|
5 |
+
.env
|
6 |
+
code.ipynb
|
7 |
+
try.py
|
8 |
+
files
|
Dockerfile
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image with Python
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV PYTHONDONTWRITEBYTECODE 1
|
6 |
+
ENV PYTHONUNBUFFERED 1
|
7 |
+
|
8 |
+
# Set working directory
|
9 |
+
WORKDIR /app
|
10 |
+
|
11 |
+
# Copy the requirements file
|
12 |
+
COPY requirements.txt /app/
|
13 |
+
|
14 |
+
# Install Python dependencies
|
15 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
16 |
+
pip install --no-cache-dir -r requirements.txt
|
17 |
+
|
18 |
+
# Install system dependencies individually
|
19 |
+
RUN apt-get update && apt-get install -y libgl1-mesa-glx
|
20 |
+
RUN apt-get install -y libglib2.0-0
|
21 |
+
RUN apt-get install -y libsm6
|
22 |
+
RUN apt-get install -y libxrender1
|
23 |
+
RUN apt-get install -y libxext6
|
24 |
+
RUN apt-get install -y ffmpeg
|
25 |
+
|
26 |
+
# Clean up apt cache
|
27 |
+
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
# Copy the application code
|
32 |
+
COPY . /app/
|
33 |
+
|
34 |
+
# Expose the port
|
35 |
+
EXPOSE 7860
|
36 |
+
|
37 |
+
# Start the application
|
38 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
39 |
+
|
firebase.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import firebase_admin
|
2 |
+
# from firebase_admin import credentials, firestore
|
3 |
+
# import os
|
4 |
+
|
5 |
+
# credPath = os.path.join("services","firebase_credentials.json")
|
6 |
+
# # Path to your Firebase credentials JSON file
|
7 |
+
# cred = credentials.Certificate(credPath)
|
8 |
+
|
9 |
+
# # Initialize the Firebase app with storageBucket
|
10 |
+
# firebase_admin.initialize_app(cred, {
|
11 |
+
# 'storageBucket': 'verbisense.appspot.com' # Replace with your bucket name
|
12 |
+
# })
|
13 |
+
|
14 |
+
# # Initialize Firestore DB
|
15 |
+
# db = firestore.client()
|
main.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
# from firebase import db
|
4 |
+
# from firebase_admin import auth, storage
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from typing import Dict, List
|
7 |
+
import os
|
8 |
+
from source import main
|
9 |
+
|
10 |
+
app = FastAPI()
|
11 |
+
|
12 |
+
app.add_middleware(
|
13 |
+
CORSMiddleware,
|
14 |
+
allow_origins=['*'], # Allow only localhost:5173
|
15 |
+
allow_credentials=True,
|
16 |
+
allow_methods=['*'],
|
17 |
+
allow_headers=['*']
|
18 |
+
)
|
19 |
+
|
20 |
+
class QueryChat(BaseModel):
|
21 |
+
userId: str
|
22 |
+
files: List
|
23 |
+
query: str
|
24 |
+
|
25 |
+
|
26 |
+
# bucket = storage.bucket("verbisense.appspot.com")
|
27 |
+
|
28 |
+
@app.get("/")
|
29 |
+
def read_root():
|
30 |
+
return {"message": "Welcome to Verbisense!"}
|
31 |
+
|
32 |
+
@app.post("/chat")
|
33 |
+
async def chat(data: QueryChat):
|
34 |
+
try:
|
35 |
+
print("userId : ",data.userId)
|
36 |
+
print("files : ",data.files)
|
37 |
+
print("query : ",data.query)
|
38 |
+
|
39 |
+
response = main(data.files,data.query)
|
40 |
+
|
41 |
+
print("\n" + "="*50)
|
42 |
+
print(response)
|
43 |
+
print("="*50)
|
44 |
+
if not response:
|
45 |
+
return False
|
46 |
+
return {"query":data.query,"response":response}
|
47 |
+
|
48 |
+
except Exception as e:
|
49 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
requirements.txt
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohappyeyeballs==2.4.0
|
3 |
+
aiohttp==3.10.5
|
4 |
+
aiosignal==1.3.1
|
5 |
+
altair==5.4.1
|
6 |
+
annotated-types==0.7.0
|
7 |
+
anyio==4.4.0
|
8 |
+
asttokens==2.4.1
|
9 |
+
attrs==24.2.0
|
10 |
+
backcall==0.2.0
|
11 |
+
beautifulsoup4==4.12.3
|
12 |
+
bleach==6.1.0
|
13 |
+
blis==0.7.11
|
14 |
+
CacheControl==0.14.0
|
15 |
+
cachetools==5.5.0
|
16 |
+
catalogue==2.0.10
|
17 |
+
certifi==2024.8.30
|
18 |
+
cffi==1.17.1
|
19 |
+
charset-normalizer==3.3.2
|
20 |
+
click==8.1.7
|
21 |
+
cloudpathlib==0.19.0
|
22 |
+
colorama==0.4.6
|
23 |
+
comm==0.2.2
|
24 |
+
confection==0.1.5
|
25 |
+
contourpy==1.3.0
|
26 |
+
cryptography==43.0.1
|
27 |
+
cycler==0.12.1
|
28 |
+
cymem==2.0.8
|
29 |
+
debugpy==1.8.5
|
30 |
+
decorator==5.1.1
|
31 |
+
defusedxml==0.7.1
|
32 |
+
diskcache==5.6.3
|
33 |
+
docopt==0.6.2
|
34 |
+
easyocr==1.7.1
|
35 |
+
executing==2.1.0
|
36 |
+
fastapi==0.114.0
|
37 |
+
fastjsonschema==2.20.0
|
38 |
+
ffmpeg-python==0.2.0
|
39 |
+
ffmpy==0.4.0
|
40 |
+
filelock==3.16.0
|
41 |
+
firebase-admin==6.5.0
|
42 |
+
fonttools==4.53.1
|
43 |
+
frozenlist==1.4.1
|
44 |
+
fsspec==2024.9.0
|
45 |
+
future==1.0.0
|
46 |
+
gemini-api==0.1.6
|
47 |
+
google-ai-generativelanguage==0.6.10
|
48 |
+
google-api-core==2.19.2
|
49 |
+
google-api-python-client==2.144.0
|
50 |
+
google-auth==2.34.0
|
51 |
+
google-auth-httplib2==0.2.0
|
52 |
+
google-cloud-core==2.4.1
|
53 |
+
google-cloud-firestore==2.18.0
|
54 |
+
google-cloud-storage==2.18.2
|
55 |
+
google-crc32c==1.6.0
|
56 |
+
google-generativeai==0.8.2
|
57 |
+
google-resumable-media==2.7.2
|
58 |
+
googleapis-common-protos==1.65.0
|
59 |
+
gradio==3.38.0
|
60 |
+
gradio_client==1.4.2
|
61 |
+
greenlet==3.1.0
|
62 |
+
grpcio==1.66.1
|
63 |
+
grpcio-status==1.66.1
|
64 |
+
h11==0.14.0
|
65 |
+
httpcore==1.0.5
|
66 |
+
httplib2==0.22.0
|
67 |
+
httpx==0.27.2
|
68 |
+
huggingface-hub==0.24.6
|
69 |
+
idna==3.8
|
70 |
+
imageio==2.35.1
|
71 |
+
importlib_resources==6.4.5
|
72 |
+
ipykernel==6.29.5
|
73 |
+
ipython==8.12.3
|
74 |
+
ipywidgets==8.1.5
|
75 |
+
jedi==0.19.1
|
76 |
+
Jinja2==3.1.4
|
77 |
+
joblib==1.4.2
|
78 |
+
jsonpatch==1.33
|
79 |
+
jsonpointer==3.0.0
|
80 |
+
jsonschema==4.23.0
|
81 |
+
jsonschema-specifications==2023.12.1
|
82 |
+
jupyter_client==8.6.2
|
83 |
+
jupyter_core==5.7.2
|
84 |
+
jupyterlab_pygments==0.3.0
|
85 |
+
jupyterlab_widgets==3.0.13
|
86 |
+
kiwisolver==1.4.7
|
87 |
+
langchain==0.3.0
|
88 |
+
langchain-core==0.3.1
|
89 |
+
langchain-text-splitters==0.3.0
|
90 |
+
langcodes==3.4.0
|
91 |
+
langsmith==0.1.121
|
92 |
+
language_data==1.2.0
|
93 |
+
lazy_loader==0.4
|
94 |
+
linkify-it-py==2.0.3
|
95 |
+
llvmlite==0.43.0
|
96 |
+
lxml==5.3.0
|
97 |
+
marisa-trie==1.2.0
|
98 |
+
markdown-it-py==2.2.0
|
99 |
+
MarkupSafe==2.1.5
|
100 |
+
matplotlib==3.9.2
|
101 |
+
matplotlib-inline==0.1.7
|
102 |
+
mdit-py-plugins==0.3.3
|
103 |
+
mdurl==0.1.2
|
104 |
+
mistune==3.0.2
|
105 |
+
more-itertools==10.5.0
|
106 |
+
mpmath==1.3.0
|
107 |
+
msgpack==1.0.8
|
108 |
+
multidict==6.1.0
|
109 |
+
murmurhash==1.0.10
|
110 |
+
narwhals==1.10.0
|
111 |
+
nbclient==0.10.0
|
112 |
+
nbconvert==7.16.4
|
113 |
+
nbformat==5.10.4
|
114 |
+
nest-asyncio==1.6.0
|
115 |
+
networkx==3.3
|
116 |
+
ninja==1.11.1.1
|
117 |
+
numba==0.60.0
|
118 |
+
numpy==1.26.4
|
119 |
+
openai-whisper==20231117
|
120 |
+
opencv-python==4.10.0.84
|
121 |
+
opencv-python-headless==4.10.0.84
|
122 |
+
orjson==3.10.7
|
123 |
+
packaging==24.1
|
124 |
+
pandas==2.2.2
|
125 |
+
pandocfilters==1.5.1
|
126 |
+
parso==0.8.4
|
127 |
+
pickleshare==0.7.5
|
128 |
+
pillow==10.4.0
|
129 |
+
pip-chill==1.0.3
|
130 |
+
platformdirs==4.3.2
|
131 |
+
preshed==3.0.9
|
132 |
+
prompt_toolkit==3.0.47
|
133 |
+
proto-plus==1.24.0
|
134 |
+
protobuf==5.28.0
|
135 |
+
psutil==6.0.0
|
136 |
+
pure_eval==0.2.3
|
137 |
+
py-cpuinfo==9.0.0
|
138 |
+
pyasn1==0.6.0
|
139 |
+
pyasn1_modules==0.4.0
|
140 |
+
pyclipper==1.3.0.post5
|
141 |
+
pycparser==2.22
|
142 |
+
pydantic==2.9.1
|
143 |
+
pydantic_core==2.23.3
|
144 |
+
pydub==0.25.1
|
145 |
+
Pygments==2.18.0
|
146 |
+
PyJWT==2.9.0
|
147 |
+
PyMuPDF==1.24.10
|
148 |
+
PyMuPDFb==1.24.10
|
149 |
+
pyparsing==3.1.4
|
150 |
+
python-bidi==0.6.0
|
151 |
+
python-dateutil==2.9.0.post0
|
152 |
+
python-docx==1.1.2
|
153 |
+
python-dotenv==1.0.1
|
154 |
+
python-multipart==0.0.9
|
155 |
+
pytz==2024.1
|
156 |
+
PyYAML==6.0.2
|
157 |
+
pyzmq==26.2.0
|
158 |
+
referencing==0.35.1
|
159 |
+
regex==2024.7.24
|
160 |
+
requests==2.32.3
|
161 |
+
rich==13.8.0
|
162 |
+
rpds-py==0.20.0
|
163 |
+
rsa==4.9
|
164 |
+
ruff==0.6.5
|
165 |
+
safetensors==0.4.5
|
166 |
+
scikit-image==0.24.0
|
167 |
+
scikit-learn==1.5.1
|
168 |
+
scipy==1.14.1
|
169 |
+
seaborn==0.13.2
|
170 |
+
semantic-version==2.10.0
|
171 |
+
sentence-transformers==3.0.1
|
172 |
+
shapely==2.0.6
|
173 |
+
shellingham==1.5.4
|
174 |
+
six==1.16.0
|
175 |
+
smart-open==7.0.4
|
176 |
+
sniffio==1.3.1
|
177 |
+
soundfile==0.12.1
|
178 |
+
soupsieve==2.6
|
179 |
+
spacy==3.7.6
|
180 |
+
spacy-legacy==3.0.12
|
181 |
+
spacy-loggers==1.0.5
|
182 |
+
SQLAlchemy==2.0.35
|
183 |
+
srsly==2.4.8
|
184 |
+
stack-data==0.6.3
|
185 |
+
starlette==0.38.5
|
186 |
+
sympy==1.13.2
|
187 |
+
tenacity==8.5.0
|
188 |
+
thinc==8.2.4
|
189 |
+
threadpoolctl==3.5.0
|
190 |
+
tifffile==2024.8.30
|
191 |
+
tiktoken==0.7.0
|
192 |
+
tinycss2==1.3.0
|
193 |
+
tokenizers==0.19.1
|
194 |
+
tomlkit==0.12.0
|
195 |
+
torch
|
196 |
+
torchaudio
|
197 |
+
torchvision
|
198 |
+
tornado==6.4.1
|
199 |
+
tqdm==4.66.5
|
200 |
+
traitlets==5.14.3
|
201 |
+
transformers==4.44.2
|
202 |
+
typer==0.12.5
|
203 |
+
typing_extensions==4.12.2
|
204 |
+
tzdata==2024.1
|
205 |
+
uc-micro-py==1.0.3
|
206 |
+
ultralytics==8.3.13
|
207 |
+
ultralytics-thop==2.0.9
|
208 |
+
uritemplate==4.1.1
|
209 |
+
urllib3==2.2.2
|
210 |
+
uvicorn==0.30.6
|
211 |
+
wasabi==1.1.3
|
212 |
+
wcwidth==0.2.13
|
213 |
+
weasel==0.4.1
|
214 |
+
webencodings==0.5.1
|
215 |
+
websockets==11.0.3
|
216 |
+
widgetsnbextension==4.0.13
|
217 |
+
wrapt==1.16.0
|
218 |
+
yarg==0.1.9
|
219 |
+
yarl==1.11.1
|
response.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import google.generativeai as genai
|
4 |
+
import json
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import re
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# Configure Gemini API
|
12 |
+
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
13 |
+
|
14 |
+
# Set up logging
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
|
17 |
+
|
18 |
+
def format_response(json_string):
|
19 |
+
# Remove the "```json" at the start and "```" at the end
|
20 |
+
clean_string = json_string.strip().replace("```json", "").replace("```", "").replace("*","").replace("`","").strip()
|
21 |
+
# Convert the cleaned string to a Python dictionary
|
22 |
+
return json.loads(clean_string)
|
23 |
+
|
24 |
+
def generate_response(context: str, query: str) -> dict:
|
25 |
+
"""Generates a response from the Gemini model based on the provided context and query."""
|
26 |
+
|
27 |
+
model = genai.GenerativeModel(
|
28 |
+
"models/gemini-1.5-flash",
|
29 |
+
system_instruction="""
|
30 |
+
You are a Document query system named Verbisense
|
31 |
+
Instructions for handling context and query:
|
32 |
+
1. When context is provided: Answer the query by prioritizing the information from the context. If the context is sufficient to address the query, base your response on it.
|
33 |
+
2. When no context is provided: Answer the query directly, ensuring clarity and relevance.
|
34 |
+
3. When the context is incomplete or insufficient: Supplement the context with relevant details from the query to provide a well-rounded and comprehensive answer.
|
35 |
+
|
36 |
+
The response should be generated in the format with the following structure:
|
37 |
+
{{
|
38 |
+
"summary": "A clear and concise summary of the answer.",
|
39 |
+
"heading1": "Main Heading",
|
40 |
+
"heading2": [
|
41 |
+
"Subheading 1",
|
42 |
+
"Subheading 2"
|
43 |
+
]
|
44 |
+
"points": [
|
45 |
+
"Subheading 1" : ["point 1", "point 2", ....],
|
46 |
+
"Subheading 2" : ["point 1", "point 2", ....],
|
47 |
+
],
|
48 |
+
"example": [
|
49 |
+
"Example for Subheading 1",
|
50 |
+
"Example for Subheading 2"
|
51 |
+
],
|
52 |
+
"key_takeaways": "Key takeaways or insights from the answer."
|
53 |
+
}}
|
54 |
+
|
55 |
+
Guidelines for formatting and content creation:
|
56 |
+
1. Provide Summary only if the context is not sufficient to answer the query. The summary should be a concise overview of the response.
|
57 |
+
2. Use simple, clear, and user-friendly language. Your responses should be easily understandable by a general audience.
|
58 |
+
3. Ensure the JSON structure is properly formatted. Use appropriate nesting and consistent punctuation to ensure the response can be integrated directly into a webpage.
|
59 |
+
4. Provide detailed, insightful, and informative answers. Ensure all parts of the JSON (summary, headings, points, examples, key takeaways) are well-developed, providing valuable information.
|
60 |
+
5. Organize information logically. Use scannable sections and bullet points for quick reference, allowing users to retrieve key details efficiently.
|
61 |
+
6. provide the key takeaways in the response if its not a greeting or simple message. This should be a clear and concise statement summarizing the main insights or conclusions from the answer.
|
62 |
+
7. try to provide 5-10 points for each subheading. This will help to provide a comprehensive and detailed response to the query.
|
63 |
+
8. dont limit the headings and subheadings to the ones provided in the query. Feel free to add more headings and subheadings as needed to provide a complete response.
|
64 |
+
9. provided as much information as possible in the response. This will help to ensure that the user gets a comprehensive answer to their query.
|
65 |
+
10. check multiple times wheather the output is in the correct mentioned format or not. This will help to ensure that the response can be easily integrated into a webpage.
|
66 |
+
|
67 |
+
Guidelines for greeting handling:
|
68 |
+
1. Use a warm and approachable tone. Keep it friendly, but concise and welcoming.
|
69 |
+
2. Limit greeting responses to the 'summary' key only. For example, respond with a brief statement like: "Hello! How can I assist you today?"
|
70 |
+
3. Avoid unnecessary over-explanation in greetings. Keep the focus on inviting the user to continue the interaction.
|
71 |
+
|
72 |
+
Key considerations for all responses:
|
73 |
+
1. Your identity is Verbisense. Ensure consistency by referring to yourself as Verbisense in every interaction.
|
74 |
+
2. Prioritize information and engagement. Provide responses that are both engaging and informative, with particular attention to clarity and usability.
|
75 |
+
3. Tailor each response to the context and query. Ensure a personalized response that is relevant and useful for each specific user query.
|
76 |
+
""", generation_config={"response_mime_type": "application/json"}
|
77 |
+
)
|
78 |
+
|
79 |
+
# Define a general prompt template for other queries
|
80 |
+
general_prompt_template = f"""
|
81 |
+
Given the following context and query, generate a JSON-formatted answer optimized for direct integration into a webpage.
|
82 |
+
|
83 |
+
Context: {context if context else "None" }
|
84 |
+
Query: {query}
|
85 |
+
|
86 |
+
"""
|
87 |
+
|
88 |
+
|
89 |
+
try:
|
90 |
+
# Generate content from the model
|
91 |
+
response = model.generate_content(general_prompt_template)
|
92 |
+
print(response.text)
|
93 |
+
response_json = format_response(response.text)
|
94 |
+
|
95 |
+
print(response.text)
|
96 |
+
logging.info("Response generated successfully.")
|
97 |
+
|
98 |
+
return response_json
|
99 |
+
|
100 |
+
except Exception as e:
|
101 |
+
logging.error(f"Error generating content from Gemini: {e}")
|
102 |
+
return {"error": "Failed to generate content from Gemini."}
|
103 |
+
|
104 |
+
|
source.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List, Dict, Any
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
from sentence_transformers import SentenceTransformer, util
|
7 |
+
from time import perf_counter as timer
|
8 |
+
from concurrent.futures import ThreadPoolExecutor
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import logging
|
11 |
+
import google.generativeai as genai
|
12 |
+
import warnings
|
13 |
+
import json
|
14 |
+
|
15 |
+
|
16 |
+
# Suppress specific FutureWarning messages
|
17 |
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
18 |
+
|
19 |
+
|
20 |
+
# Load environment variables
|
21 |
+
load_dotenv()
|
22 |
+
|
23 |
+
# Gemini-API key
|
24 |
+
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
25 |
+
|
26 |
+
# Set up logging
|
27 |
+
logging.basicConfig(level=logging.INFO)
|
28 |
+
|
29 |
+
# Importing processors (assumed to be your custom modules)
|
30 |
+
from src.text_processor import process_text_file
|
31 |
+
from src.audio_processor import process_audio_from_url
|
32 |
+
from src.video_processor import process_video_file
|
33 |
+
from src.image_processor import process_image_file
|
34 |
+
|
35 |
+
from response import generate_response
|
36 |
+
|
37 |
+
def process_files(file_paths: List[str]) -> List[Dict[str, Any]]:
|
38 |
+
"""Processes a list of files in parallel and returns their processed content."""
|
39 |
+
if file_paths == []:
|
40 |
+
logging.info("No files to process")
|
41 |
+
return []
|
42 |
+
def process_single_file(file_path):
|
43 |
+
_, extension = os.path.splitext(file_path)
|
44 |
+
extension = extension.lower()
|
45 |
+
file_name = os.path.basename(file_path)
|
46 |
+
|
47 |
+
if "?alt=media&token=" in extension:
|
48 |
+
extension = list(extension.split("?"))[0]
|
49 |
+
print("\nprocessing file type : ",extension)
|
50 |
+
try:
|
51 |
+
if extension in ['.txt', '.pdf', '.docx']:
|
52 |
+
return process_text_file(file_path)
|
53 |
+
elif extension in ['.mp3', '.wav', '.flac']:
|
54 |
+
return process_audio_from_url(file_path)
|
55 |
+
elif extension in ['.mp4']:
|
56 |
+
return process_video_file(file_path)
|
57 |
+
elif extension in ['.png', '.jpg', '.jpeg']:
|
58 |
+
return process_image_file(file_path)
|
59 |
+
else:
|
60 |
+
logging.warning(f"Unsupported file type: {extension} for file {file_name}")
|
61 |
+
return []
|
62 |
+
except Exception as e:
|
63 |
+
logging.error(f"Error processing file {file_name}: {e}", exc_info=True)
|
64 |
+
return []
|
65 |
+
try:
|
66 |
+
# Process files in parallel, limiting threads to the number of CPU cores
|
67 |
+
with ThreadPoolExecutor(max_workers=min(len(file_paths), os.cpu_count())) as executor:
|
68 |
+
results = executor.map(process_single_file, file_paths)
|
69 |
+
# Flatten the results
|
70 |
+
processed_data = [item for result in results for item in result]
|
71 |
+
|
72 |
+
if not processed_data:
|
73 |
+
return []
|
74 |
+
return processed_data
|
75 |
+
except ValueError:
|
76 |
+
logging.error("contains invalid file paths")
|
77 |
+
|
78 |
+
|
79 |
+
def create_embeddings(processed_data: List[Dict[str, Any]], embedding_model: SentenceTransformer) -> pd.DataFrame:
|
80 |
+
"""Generates embeddings for processed data."""
|
81 |
+
try:
|
82 |
+
text_chunks = [item["text"] for item in processed_data]
|
83 |
+
embeddings_list = [] # Store embeddings in a list
|
84 |
+
batch_size = 32
|
85 |
+
|
86 |
+
# Process embeddings in batches to optimize memory usage
|
87 |
+
for i in range(0, len(text_chunks), batch_size):
|
88 |
+
batch_embeddings = embedding_model.encode(text_chunks[i:i + batch_size], convert_to_tensor=False) # Avoid torch tensor
|
89 |
+
embeddings_list.extend(batch_embeddings) # Accumulate embeddings
|
90 |
+
logging.info(f"Processed batch {i // batch_size + 1}/{(len(text_chunks) + batch_size - 1) // batch_size}")
|
91 |
+
|
92 |
+
# Convert to numpy array of float32 for compatibility with Annoy
|
93 |
+
embeddings_np = np.array(embeddings_list).astype('float32')
|
94 |
+
|
95 |
+
# Create a DataFrame with the embeddings
|
96 |
+
df = pd.DataFrame(processed_data)
|
97 |
+
df["embedding"] = embeddings_np.tolist()
|
98 |
+
return df
|
99 |
+
except Exception as e:
|
100 |
+
logging.error(f"Error creating embeddings: {e}", exc_info=True)
|
101 |
+
return pd.DataFrame()
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
def semantic_search(query: str, embeddings_df: pd.DataFrame, embedding_model: SentenceTransformer, num_results: int) -> List[Dict[str, Any]]:
|
107 |
+
"""Performs semantic search using embeddings and returns the top results."""
|
108 |
+
try:
|
109 |
+
# Create embedding for the query
|
110 |
+
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
|
111 |
+
|
112 |
+
# Convert embeddings from DataFrame to a tensor
|
113 |
+
embeddings = torch.tensor(np.array(embeddings_df["embedding"].tolist()), dtype=torch.float32).to(embedding_model.device)
|
114 |
+
|
115 |
+
# Measure search time
|
116 |
+
start_time = timer()
|
117 |
+
dot_scores = util.dot_score(query_embedding, embeddings)[0]
|
118 |
+
end_time = timer()
|
119 |
+
logging.info(f"Time taken to get scores on {len(embeddings)} embeddings: {end_time - start_time:.5f} seconds.")
|
120 |
+
|
121 |
+
# Get the top results
|
122 |
+
top_results = torch.topk(dot_scores, k=num_results)
|
123 |
+
results = []
|
124 |
+
|
125 |
+
# Format the results
|
126 |
+
for score, idx in zip(top_results.values, top_results.indices):
|
127 |
+
idx = idx.item() # Convert tensor to integer
|
128 |
+
result = {
|
129 |
+
"score": score.item(),
|
130 |
+
"text": embeddings_df.iloc[idx]["text"],
|
131 |
+
"file_name": embeddings_df.iloc[idx]["file_name"],
|
132 |
+
**{k: v for k, v in embeddings_df.iloc[idx].items() if k not in ["text", "file_name", "embedding"]}
|
133 |
+
}
|
134 |
+
results.append(result)
|
135 |
+
|
136 |
+
return results
|
137 |
+
except Exception as e:
|
138 |
+
logging.error(f"Error during semantic search: {e}", exc_info=True)
|
139 |
+
return []
|
140 |
+
|
141 |
+
|
142 |
+
def count_tokens(text: str) -> int:
|
143 |
+
"""Roughly estimate the number of tokens in a text."""
|
144 |
+
return len(text.split())
|
145 |
+
|
146 |
+
def main(files: list, query: str, min_text_length: int = 1000000, max_gemini_tokens: int = 7300):
|
147 |
+
"""Main function to process files, perform semantic search or send data directly to Gemini."""
|
148 |
+
|
149 |
+
try:
|
150 |
+
# Process files (your existing file processing logic)
|
151 |
+
processed_data = process_files(files)
|
152 |
+
# Combine all text chunks
|
153 |
+
combined_text = " ".join([item["text"] for item in processed_data])
|
154 |
+
|
155 |
+
logging.info(f"Total text length: {len(combined_text)} characters")
|
156 |
+
|
157 |
+
# Count tokens and check if they exceed the allowed limit for Gemini
|
158 |
+
token_count = count_tokens(combined_text)
|
159 |
+
print("Token count : ",token_count)
|
160 |
+
# If token count is within limits, send directly to Gemini for response generation
|
161 |
+
if token_count < min_text_length:
|
162 |
+
logging.info(f"Text is below the threshold ({min_text_length} tokens). Sending directly to Gemini.")
|
163 |
+
response = generate_response(combined_text, query)
|
164 |
+
return response
|
165 |
+
else:
|
166 |
+
logging.info(f"Text exceeds the maximum allowed tokens ({max_gemini_tokens}). Performing semantic search.")
|
167 |
+
# Only initialize embeddings when needed
|
168 |
+
embedding_model = SentenceTransformer("all-mpnet-base-v2", device="cuda" if torch.cuda.is_available() else "cpu")
|
169 |
+
|
170 |
+
# Create embeddings
|
171 |
+
embeddings_df = create_embeddings(processed_data, embedding_model)
|
172 |
+
if embeddings_df.empty:
|
173 |
+
logging.error("No embeddings created. Exiting.")
|
174 |
+
return {"error": "Failed to create embeddings from the processed data."}
|
175 |
+
|
176 |
+
# Perform semantic search
|
177 |
+
num_results = min(1, len(embeddings_df)) # Adjust number of results based on available data
|
178 |
+
results = semantic_search(query, embeddings_df, embedding_model, num_results)
|
179 |
+
print("Semantic Searchs return the top results with relevant scores and contextual information. \n",results)
|
180 |
+
if not results:
|
181 |
+
logging.error("No results found. Exiting.")
|
182 |
+
return {"error": "Semantic search returned no results."}
|
183 |
+
context = " ".join([result['text'] for result in results]) # Example context generation from results
|
184 |
+
response = generate_response(context, query)
|
185 |
+
return response
|
186 |
+
except Exception as e:
|
187 |
+
logging.error(f"Error: {e}")
|
188 |
+
return {"error": "An error occurred during the main process."}
|
189 |
+
|
190 |
+
if __name__ == "__main__":
|
191 |
+
files = [
|
192 |
+
# Your file paths go here
|
193 |
+
]
|
194 |
+
query = "Introduce yourself, what are you?"
|
195 |
+
main(files, query)
|
src/audio_processor.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
import requests
|
3 |
+
import ffmpeg
|
4 |
+
import numpy as np
|
5 |
+
from typing import List, Dict, Any
|
6 |
+
|
7 |
+
def process_audio_from_url(audio_url: str) -> List[Dict[str, Any]]:
|
8 |
+
# Download the audio file content
|
9 |
+
response = requests.get(audio_url, stream=True)
|
10 |
+
response.raise_for_status()
|
11 |
+
|
12 |
+
# Use ffmpeg to decode the audio stream
|
13 |
+
try:
|
14 |
+
out, _ = (
|
15 |
+
ffmpeg
|
16 |
+
.input('pipe:0')
|
17 |
+
.output('pipe:1', format='f32le', acodec='pcm_f32le', ac=1, ar='16k')
|
18 |
+
.run(input=response.raw.read(), capture_stdout=True, capture_stderr=True)
|
19 |
+
)
|
20 |
+
except ffmpeg.Error as e:
|
21 |
+
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
22 |
+
|
23 |
+
# Convert the audio to the format Whisper expects
|
24 |
+
audio = np.frombuffer(out, np.float32).flatten()
|
25 |
+
|
26 |
+
# Load the Whisper model
|
27 |
+
model = whisper.load_model("base")
|
28 |
+
|
29 |
+
# Transcribe the audio
|
30 |
+
result = model.transcribe(audio)
|
31 |
+
|
32 |
+
segments = []
|
33 |
+
for segment in result["segments"]:
|
34 |
+
segments.append({
|
35 |
+
"file_name": audio_url.split("/")[-1], # Extract filename from URL
|
36 |
+
"text": segment["text"]
|
37 |
+
})
|
38 |
+
return segments
|
39 |
+
|
40 |
+
def process_audio_data(audio: np.ndarray, file_name: str) -> List[Dict[str, Any]]:
|
41 |
+
# Load the Whisper model
|
42 |
+
model = whisper.load_model("base")
|
43 |
+
|
44 |
+
# Transcribe the audio
|
45 |
+
result = model.transcribe(audio)
|
46 |
+
|
47 |
+
segments = []
|
48 |
+
for segment in result["segments"]:
|
49 |
+
segments.append({
|
50 |
+
"file_name": file_name, # Ensure file_name is added
|
51 |
+
"text": segment["text"]
|
52 |
+
})
|
53 |
+
return segments
|
src/image_processor.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import easyocr
|
2 |
+
import requests
|
3 |
+
import io
|
4 |
+
from PIL import Image
|
5 |
+
from typing import List, Dict, Any
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
from gradio_client import Client
|
9 |
+
|
10 |
+
|
11 |
+
def process_image_file(image_url: str) -> List[Dict[str, Any]]:
|
12 |
+
# Fetch the image content from the URL
|
13 |
+
response = requests.get(image_url)
|
14 |
+
|
15 |
+
# Check if the request was successful
|
16 |
+
if response.status_code == 200:
|
17 |
+
# Load the image from the response content using PIL
|
18 |
+
image_stream = io.BytesIO(response.content)
|
19 |
+
image = Image.open(image_stream)
|
20 |
+
|
21 |
+
# Convert the image to a NumPy array, which is supported by EasyOCR
|
22 |
+
image_np = np.array(image)
|
23 |
+
|
24 |
+
# Use EasyOCR to extract text from the image
|
25 |
+
reader = easyocr.Reader(['en'])
|
26 |
+
result = reader.readtext(image_np)
|
27 |
+
|
28 |
+
print("*" * 50 + image_url)
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
# Combine the extracted text from EasyOCR
|
33 |
+
extracted_text = "\n".join([detection[1] for detection in result])
|
34 |
+
|
35 |
+
if len(extracted_text.split())<5 :
|
36 |
+
# Use the BLIP model for image captioning
|
37 |
+
client = Client("HARISH20205/blip-image-caption")
|
38 |
+
caption_result = client.predict(image_url=image_url, api_name="/predict")
|
39 |
+
content = "\nImage Caption:\n" + str(caption_result)
|
40 |
+
return [{
|
41 |
+
"file_name": os.path.basename(image_url),
|
42 |
+
"text": content,
|
43 |
+
}]
|
44 |
+
# Format the content
|
45 |
+
content = "Image Data:\n" + extracted_text
|
46 |
+
|
47 |
+
return [{
|
48 |
+
"file_name": os.path.basename(image_url),
|
49 |
+
"text": content,
|
50 |
+
}]
|
51 |
+
else:
|
52 |
+
return [{
|
53 |
+
"file_name": os.path.basename(image_url),
|
54 |
+
"text": "Failed to retrieve image.",
|
55 |
+
}]
|
src/text_processor.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List, Dict, Any
|
3 |
+
import fitz # PyMuPDF
|
4 |
+
import docx
|
5 |
+
import requests
|
6 |
+
import io
|
7 |
+
|
8 |
+
def process_text_file(file_url: str) -> List[Dict[str, Any]]:
|
9 |
+
_, extension = os.path.splitext(file_url)
|
10 |
+
extension = extension.lower()
|
11 |
+
|
12 |
+
if "?alt=media&token=" in extension:
|
13 |
+
extension = list(extension.split("?"))[0]
|
14 |
+
if extension == '.txt':
|
15 |
+
return process_txt(file_url)
|
16 |
+
elif extension == '.pdf':
|
17 |
+
return process_pdf(file_url)
|
18 |
+
elif extension == '.docx':
|
19 |
+
return process_docx(file_url)
|
20 |
+
else:
|
21 |
+
raise ValueError(f"Unsupported text file type: {extension}")
|
22 |
+
|
23 |
+
def process_txt(txt_url: str) -> List[Dict[str, Any]]:
|
24 |
+
# Fetch the TXT file content from the URL
|
25 |
+
response = requests.get(txt_url)
|
26 |
+
|
27 |
+
# Check if the request was successful
|
28 |
+
if response.status_code == 200:
|
29 |
+
content = response.text
|
30 |
+
return [{
|
31 |
+
"file_name": os.path.basename(txt_url),
|
32 |
+
"text": content,
|
33 |
+
"page_number": 1
|
34 |
+
}]
|
35 |
+
else:
|
36 |
+
print(f"Failed to fetch the TXT file. Status code: {response.status_code}")
|
37 |
+
return []
|
38 |
+
|
39 |
+
def process_pdf(pdf_url: str) -> List[Dict[str, Any]]:
|
40 |
+
# Fetch the PDF file content from the URL
|
41 |
+
response = requests.get(pdf_url)
|
42 |
+
|
43 |
+
# Check if the request was successful
|
44 |
+
if response.status_code == 200:
|
45 |
+
# Load the PDF file from the response content
|
46 |
+
pdf_stream = io.BytesIO(response.content)
|
47 |
+
|
48 |
+
# Open the PDF file with PyMuPDF
|
49 |
+
pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
|
50 |
+
|
51 |
+
# Extract text from all pages
|
52 |
+
pdf_text = ""
|
53 |
+
for page_num in range(len(pdf_document)):
|
54 |
+
page = pdf_document.load_page(page_num) # Load the page
|
55 |
+
pdf_text += page.get_text("text") # Extract text from the page
|
56 |
+
|
57 |
+
return [{
|
58 |
+
"file_name": os.path.basename(pdf_url),
|
59 |
+
"text": pdf_text
|
60 |
+
}]
|
61 |
+
else:
|
62 |
+
print(f"Failed to fetch the PDF file. Status code: {response.status_code}")
|
63 |
+
return []
|
64 |
+
|
65 |
+
def process_docx(docx_url: str) -> List[Dict[str, Any]]:
|
66 |
+
# Fetch the DOCX file content from the URL
|
67 |
+
response = requests.get(docx_url)
|
68 |
+
|
69 |
+
# Check if the request was successful
|
70 |
+
if response.status_code == 200:
|
71 |
+
# Load the DOCX file from the response content
|
72 |
+
docx_stream = io.BytesIO(response.content)
|
73 |
+
|
74 |
+
# Open the DOCX file with python-docx
|
75 |
+
doc = docx.Document(docx_stream)
|
76 |
+
|
77 |
+
# Extract text from the DOCX file
|
78 |
+
content = "\n".join([para.text for para in doc.paragraphs])
|
79 |
+
return [{
|
80 |
+
"file_name": os.path.basename(docx_url),
|
81 |
+
"text": content,
|
82 |
+
"page_number": 1 # DOCX doesn't have pages, so just 1
|
83 |
+
}]
|
84 |
+
else:
|
85 |
+
print(f"Failed to fetch the DOCX file. Status code: {response.status_code}")
|
src/video_processor.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ffmpeg
|
2 |
+
import numpy as np
|
3 |
+
from src.audio_processor import process_audio_data
|
4 |
+
import os
|
5 |
+
def process_video_file(file_path: str):
|
6 |
+
# Use ffmpeg to extract audio from the video file
|
7 |
+
try:
|
8 |
+
out, _ = (
|
9 |
+
ffmpeg
|
10 |
+
.input(file_path)
|
11 |
+
.output('pipe:1', format='f32le', acodec='pcm_f32le', ac=1, ar='16k')
|
12 |
+
.run(capture_stdout=True, capture_stderr=True)
|
13 |
+
)
|
14 |
+
except ffmpeg.Error as e:
|
15 |
+
raise RuntimeError(f"Failed to extract audio from video: {e.stderr.decode()}") from e
|
16 |
+
|
17 |
+
# Convert the audio to the format Whisper expects
|
18 |
+
audio = np.frombuffer(out, np.float32).flatten()
|
19 |
+
|
20 |
+
# Pass file name to audio processor
|
21 |
+
file_name = os.path.basename(file_path)
|
22 |
+
result = process_audio_data(audio, file_name)
|
23 |
+
return result
|