anirudh-248
commited on
Commit
·
c401d26
1
Parent(s):
0f2fc01
add application file
Browse files- .gitignore +2 -0
- app.py +85 -0
- requirements.txt +115 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
env
|
2 |
+
.env
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import google.generativeai as genai
|
5 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from qdrant_client import QdrantClient
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
12 |
+
gemini_model = genai.GenerativeModel('gemini-2.0-flash')
|
13 |
+
|
14 |
+
model = HuggingFaceEmbedding(
|
15 |
+
model_name="llamaindex/vdr-2b-multi-v1",
|
16 |
+
device="cpu",
|
17 |
+
trust_remote_code=True,
|
18 |
+
)
|
19 |
+
|
20 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
21 |
+
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
|
22 |
+
|
23 |
+
client = QdrantClient(
|
24 |
+
url=os.getenv("QDRANT_URL"),
|
25 |
+
api_key=os.getenv("QDRANT_API_KEY")
|
26 |
+
)
|
27 |
+
|
28 |
+
TXT_COLLECTION_NAME = "llama-summaries"
|
29 |
+
AUD_COLLECTION_NAME = "sbert-audio"
|
30 |
+
|
31 |
+
def retrieve_and_generate(query, history):
|
32 |
+
|
33 |
+
find = model.get_query_embedding(query)
|
34 |
+
audio_find = sbert_model.encode(query)
|
35 |
+
|
36 |
+
text_results = client.query_points(
|
37 |
+
collection_name=TXT_COLLECTION_NAME,
|
38 |
+
query=find,
|
39 |
+
using="text",
|
40 |
+
with_payload=["text"],
|
41 |
+
limit=5
|
42 |
+
)
|
43 |
+
|
44 |
+
audio_results = client.query_points(
|
45 |
+
collection_name=AUD_COLLECTION_NAME,
|
46 |
+
query=audio_find,
|
47 |
+
using="text",
|
48 |
+
with_payload=["text"],
|
49 |
+
limit=5
|
50 |
+
)
|
51 |
+
|
52 |
+
context = []
|
53 |
+
|
54 |
+
for idx, point in enumerate(audio_results.points):
|
55 |
+
if text := point.payload.get('text'):
|
56 |
+
context.append(f"[Audio Excerpt {idx+1}]: {text}")
|
57 |
+
|
58 |
+
for idx, point in enumerate(text_results.points):
|
59 |
+
if text := point.payload.get('text'):
|
60 |
+
context.append(f"[Slide Content {idx+1}]: {text}")
|
61 |
+
|
62 |
+
context_text = "\n\n".join(context)
|
63 |
+
|
64 |
+
prompt = f"""
|
65 |
+
You are a financial expert assistant. Synthesize a comprehensive answer using the
|
66 |
+
provided context from multiple sources. If information is insufficient, state that clearly.
|
67 |
+
|
68 |
+
Question: {query}
|
69 |
+
|
70 |
+
Context from various sources:
|
71 |
+
{context_text}
|
72 |
+
|
73 |
+
Provide a structured, professional response with clear sections when appropriate:
|
74 |
+
"""
|
75 |
+
|
76 |
+
response = gemini_model.generate_content([prompt], generation_config={"temperature": 0.5})
|
77 |
+
return response.text
|
78 |
+
|
79 |
+
demo = gr.ChatInterface(
|
80 |
+
fn=retrieve_and_generate,
|
81 |
+
title="Financial AI Assistant",
|
82 |
+
description="Ask financial questions and receive AI-powered responses based on multimodal data.",
|
83 |
+
)
|
84 |
+
|
85 |
+
demo.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohappyeyeballs==2.4.6
|
3 |
+
aiohttp==3.11.12
|
4 |
+
aiosignal==1.3.2
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.8.0
|
7 |
+
attrs==25.1.0
|
8 |
+
cachetools==5.5.1
|
9 |
+
certifi==2025.1.31
|
10 |
+
charset-normalizer==3.4.1
|
11 |
+
click==8.1.8
|
12 |
+
colorama==0.4.6
|
13 |
+
dataclasses-json==0.6.7
|
14 |
+
Deprecated==1.2.18
|
15 |
+
dirtyjson==1.0.8
|
16 |
+
fastapi==0.115.8
|
17 |
+
ffmpy==0.5.0
|
18 |
+
filelock==3.17.0
|
19 |
+
filetype==1.2.0
|
20 |
+
frozenlist==1.5.0
|
21 |
+
fsspec==2025.2.0
|
22 |
+
google-ai-generativelanguage==0.6.15
|
23 |
+
google-api-core==2.24.1
|
24 |
+
google-api-python-client==2.161.0
|
25 |
+
google-auth==2.38.0
|
26 |
+
google-auth-httplib2==0.2.0
|
27 |
+
google-generativeai==0.8.4
|
28 |
+
googleapis-common-protos==1.67.0
|
29 |
+
gradio==5.16.1
|
30 |
+
gradio_client==1.7.0
|
31 |
+
greenlet==3.1.1
|
32 |
+
grpcio==1.70.0
|
33 |
+
grpcio-status==1.70.0
|
34 |
+
grpcio-tools==1.70.0
|
35 |
+
h11==0.14.0
|
36 |
+
h2==4.2.0
|
37 |
+
hpack==4.1.0
|
38 |
+
httpcore==1.0.7
|
39 |
+
httplib2==0.22.0
|
40 |
+
httpx==0.28.1
|
41 |
+
huggingface-hub==0.29.0
|
42 |
+
hyperframe==6.1.0
|
43 |
+
idna==3.10
|
44 |
+
Jinja2==3.1.5
|
45 |
+
joblib==1.4.2
|
46 |
+
llama-index-core==0.12.19
|
47 |
+
llama-index-embeddings-huggingface==0.5.1
|
48 |
+
markdown-it-py==3.0.0
|
49 |
+
MarkupSafe==2.1.5
|
50 |
+
marshmallow==3.26.1
|
51 |
+
mdurl==0.1.2
|
52 |
+
mpmath==1.3.0
|
53 |
+
multidict==6.1.0
|
54 |
+
mypy-extensions==1.0.0
|
55 |
+
nest-asyncio==1.6.0
|
56 |
+
networkx==3.4.2
|
57 |
+
nltk==3.9.1
|
58 |
+
numpy==2.2.3
|
59 |
+
orjson==3.10.15
|
60 |
+
packaging==24.2
|
61 |
+
pandas==2.2.3
|
62 |
+
pillow==11.1.0
|
63 |
+
portalocker==2.10.1
|
64 |
+
propcache==0.2.1
|
65 |
+
proto-plus==1.26.0
|
66 |
+
protobuf==5.29.3
|
67 |
+
pyasn1==0.6.1
|
68 |
+
pyasn1_modules==0.4.1
|
69 |
+
pydantic==2.10.6
|
70 |
+
pydantic_core==2.27.2
|
71 |
+
pydub==0.25.1
|
72 |
+
Pygments==2.19.1
|
73 |
+
pyparsing==3.2.1
|
74 |
+
python-dateutil==2.9.0.post0
|
75 |
+
python-dotenv==1.0.1
|
76 |
+
python-multipart==0.0.20
|
77 |
+
pytz==2025.1
|
78 |
+
PyYAML==6.0.2
|
79 |
+
qdrant-client==1.13.2
|
80 |
+
regex==2024.11.6
|
81 |
+
requests==2.32.3
|
82 |
+
rich==13.9.4
|
83 |
+
rsa==4.9
|
84 |
+
ruff==0.9.6
|
85 |
+
safehttpx==0.1.6
|
86 |
+
safetensors==0.5.2
|
87 |
+
scikit-learn==1.6.1
|
88 |
+
scipy==1.15.2
|
89 |
+
semantic-version==2.10.0
|
90 |
+
sentence-transformers==3.4.1
|
91 |
+
setuptools==75.8.0
|
92 |
+
shellingham==1.5.4
|
93 |
+
six==1.17.0
|
94 |
+
sniffio==1.3.1
|
95 |
+
SQLAlchemy==2.0.38
|
96 |
+
starlette==0.45.3
|
97 |
+
sympy==1.13.1
|
98 |
+
tenacity==9.0.0
|
99 |
+
threadpoolctl==3.5.0
|
100 |
+
tiktoken==0.9.0
|
101 |
+
tokenizers==0.21.0
|
102 |
+
tomlkit==0.13.2
|
103 |
+
torch==2.6.0
|
104 |
+
tqdm==4.67.1
|
105 |
+
transformers==4.49.0
|
106 |
+
typer==0.15.1
|
107 |
+
typing-inspect==0.9.0
|
108 |
+
typing_extensions==4.12.2
|
109 |
+
tzdata==2025.1
|
110 |
+
uritemplate==4.1.1
|
111 |
+
urllib3==2.3.0
|
112 |
+
uvicorn==0.34.0
|
113 |
+
websockets==14.2
|
114 |
+
wrapt==1.17.2
|
115 |
+
yarl==1.18.3
|