anirudh-248 commited on
Commit
c401d26
·
1 Parent(s): 0f2fc01

add application file

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +85 -0
  3. requirements.txt +115 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ env
2
+ .env
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ import google.generativeai as genai
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from sentence_transformers import SentenceTransformer
7
+ from qdrant_client import QdrantClient
8
+
9
+ load_dotenv()
10
+
11
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
12
+ gemini_model = genai.GenerativeModel('gemini-2.0-flash')
13
+
14
+ model = HuggingFaceEmbedding(
15
+ model_name="llamaindex/vdr-2b-multi-v1",
16
+ device="cpu",
17
+ trust_remote_code=True,
18
+ )
19
+
20
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
21
+ sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
22
+
23
+ client = QdrantClient(
24
+ url=os.getenv("QDRANT_URL"),
25
+ api_key=os.getenv("QDRANT_API_KEY")
26
+ )
27
+
28
+ TXT_COLLECTION_NAME = "llama-summaries"
29
+ AUD_COLLECTION_NAME = "sbert-audio"
30
+
31
+ def retrieve_and_generate(query, history):
32
+
33
+ find = model.get_query_embedding(query)
34
+ audio_find = sbert_model.encode(query)
35
+
36
+ text_results = client.query_points(
37
+ collection_name=TXT_COLLECTION_NAME,
38
+ query=find,
39
+ using="text",
40
+ with_payload=["text"],
41
+ limit=5
42
+ )
43
+
44
+ audio_results = client.query_points(
45
+ collection_name=AUD_COLLECTION_NAME,
46
+ query=audio_find,
47
+ using="text",
48
+ with_payload=["text"],
49
+ limit=5
50
+ )
51
+
52
+ context = []
53
+
54
+ for idx, point in enumerate(audio_results.points):
55
+ if text := point.payload.get('text'):
56
+ context.append(f"[Audio Excerpt {idx+1}]: {text}")
57
+
58
+ for idx, point in enumerate(text_results.points):
59
+ if text := point.payload.get('text'):
60
+ context.append(f"[Slide Content {idx+1}]: {text}")
61
+
62
+ context_text = "\n\n".join(context)
63
+
64
+ prompt = f"""
65
+ You are a financial expert assistant. Synthesize a comprehensive answer using the
66
+ provided context from multiple sources. If information is insufficient, state that clearly.
67
+
68
+ Question: {query}
69
+
70
+ Context from various sources:
71
+ {context_text}
72
+
73
+ Provide a structured, professional response with clear sections when appropriate:
74
+ """
75
+
76
+ response = gemini_model.generate_content([prompt], generation_config={"temperature": 0.5})
77
+ return response.text
78
+
79
+ demo = gr.ChatInterface(
80
+ fn=retrieve_and_generate,
81
+ title="Financial AI Assistant",
82
+ description="Ask financial questions and receive AI-powered responses based on multimodal data.",
83
+ )
84
+
85
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohappyeyeballs==2.4.6
3
+ aiohttp==3.11.12
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ anyio==4.8.0
7
+ attrs==25.1.0
8
+ cachetools==5.5.1
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ click==8.1.8
12
+ colorama==0.4.6
13
+ dataclasses-json==0.6.7
14
+ Deprecated==1.2.18
15
+ dirtyjson==1.0.8
16
+ fastapi==0.115.8
17
+ ffmpy==0.5.0
18
+ filelock==3.17.0
19
+ filetype==1.2.0
20
+ frozenlist==1.5.0
21
+ fsspec==2025.2.0
22
+ google-ai-generativelanguage==0.6.15
23
+ google-api-core==2.24.1
24
+ google-api-python-client==2.161.0
25
+ google-auth==2.38.0
26
+ google-auth-httplib2==0.2.0
27
+ google-generativeai==0.8.4
28
+ googleapis-common-protos==1.67.0
29
+ gradio==5.16.1
30
+ gradio_client==1.7.0
31
+ greenlet==3.1.1
32
+ grpcio==1.70.0
33
+ grpcio-status==1.70.0
34
+ grpcio-tools==1.70.0
35
+ h11==0.14.0
36
+ h2==4.2.0
37
+ hpack==4.1.0
38
+ httpcore==1.0.7
39
+ httplib2==0.22.0
40
+ httpx==0.28.1
41
+ huggingface-hub==0.29.0
42
+ hyperframe==6.1.0
43
+ idna==3.10
44
+ Jinja2==3.1.5
45
+ joblib==1.4.2
46
+ llama-index-core==0.12.19
47
+ llama-index-embeddings-huggingface==0.5.1
48
+ markdown-it-py==3.0.0
49
+ MarkupSafe==2.1.5
50
+ marshmallow==3.26.1
51
+ mdurl==0.1.2
52
+ mpmath==1.3.0
53
+ multidict==6.1.0
54
+ mypy-extensions==1.0.0
55
+ nest-asyncio==1.6.0
56
+ networkx==3.4.2
57
+ nltk==3.9.1
58
+ numpy==2.2.3
59
+ orjson==3.10.15
60
+ packaging==24.2
61
+ pandas==2.2.3
62
+ pillow==11.1.0
63
+ portalocker==2.10.1
64
+ propcache==0.2.1
65
+ proto-plus==1.26.0
66
+ protobuf==5.29.3
67
+ pyasn1==0.6.1
68
+ pyasn1_modules==0.4.1
69
+ pydantic==2.10.6
70
+ pydantic_core==2.27.2
71
+ pydub==0.25.1
72
+ Pygments==2.19.1
73
+ pyparsing==3.2.1
74
+ python-dateutil==2.9.0.post0
75
+ python-dotenv==1.0.1
76
+ python-multipart==0.0.20
77
+ pytz==2025.1
78
+ PyYAML==6.0.2
79
+ qdrant-client==1.13.2
80
+ regex==2024.11.6
81
+ requests==2.32.3
82
+ rich==13.9.4
83
+ rsa==4.9
84
+ ruff==0.9.6
85
+ safehttpx==0.1.6
86
+ safetensors==0.5.2
87
+ scikit-learn==1.6.1
88
+ scipy==1.15.2
89
+ semantic-version==2.10.0
90
+ sentence-transformers==3.4.1
91
+ setuptools==75.8.0
92
+ shellingham==1.5.4
93
+ six==1.17.0
94
+ sniffio==1.3.1
95
+ SQLAlchemy==2.0.38
96
+ starlette==0.45.3
97
+ sympy==1.13.1
98
+ tenacity==9.0.0
99
+ threadpoolctl==3.5.0
100
+ tiktoken==0.9.0
101
+ tokenizers==0.21.0
102
+ tomlkit==0.13.2
103
+ torch==2.6.0
104
+ tqdm==4.67.1
105
+ transformers==4.49.0
106
+ typer==0.15.1
107
+ typing-inspect==0.9.0
108
+ typing_extensions==4.12.2
109
+ tzdata==2025.1
110
+ uritemplate==4.1.1
111
+ urllib3==2.3.0
112
+ uvicorn==0.34.0
113
+ websockets==14.2
114
+ wrapt==1.17.2
115
+ yarl==1.18.3