Spaces:
Running
Running
initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .github/workflows/dockerhub.yaml +39 -0
- .gitignore +148 -0
- Dockerfile +32 -0
- README.md +2 -2
- app.py +55 -0
- config.ini +39 -0
- examples/easy_ocr_example.py +12 -0
- examples/url_text_extraction_example.py +10 -0
- logging_config.yaml +27 -0
- requirements.txt +99 -0
- src/__init__.py +19 -0
- src/api/__init__.py +4 -0
- src/api/analytics_api.py +235 -0
- src/api/conversai_api.py +645 -0
- src/api/jewel_mirror.py +21 -0
- src/api/jwt_bearer.py +29 -0
- src/api/speech_api.py +85 -0
- src/api/user_management_api.py +157 -0
- src/jewel_mirror/__init__.py +0 -0
- src/jewel_mirror/jewel_mirror.py +69 -0
- src/llms/__init__.py +0 -0
- src/models/__init__.py +4 -0
- src/models/apis_models.py +207 -0
- src/models/response_handling_models.py +27 -0
- src/models/utls.py +11 -0
- src/pipeline/__init__.py +4 -0
- src/pipeline/conversai_analytic_pipeline.py +34 -0
- src/pipeline/conversai_pipeline.py +73 -0
- src/pipeline/speech_transcription_pipeline.py +20 -0
- src/pipeline/user_management_pipeline.py +80 -0
- src/prompts/__init__.py +0 -0
- src/prompts/custom_prompts.py +70 -0
- src/services/__init__.py +4 -0
- src/services/answer_query/__init__.py +4 -0
- src/services/answer_query/answerquery.py +98 -0
- src/services/document/__init__.py +4 -0
- src/services/document/add_document.py +30 -0
- src/services/embeddings/BGE-M3_vector_embedding.py +13 -0
- src/services/embeddings/Qdrant_BM25_embedding.py +11 -0
- src/services/embeddings/__init__.py +4 -0
- src/services/embeddings/jina_embeddings.py +8 -0
- src/services/embeddings/sentence_transformers_all_MiniLM_L6_v2_vector_embedding.py +20 -0
- src/services/file_analyzer/__init__.py +4 -0
- src/services/file_analyzer/data_analyzer.py +28 -0
- src/services/get_links/__init__.py +4 -0
- src/services/get_links/web_scraper.py +49 -0
- src/services/ocr/__init__.py +4 -0
- src/services/ocr/easy_ocr/__init__.py +4 -0
- src/services/ocr/easy_ocr/easy_ocr_.py +19 -0
- src/services/ocr/replicate_ocr/__init__.py +0 -0
.github/workflows/dockerhub.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Publish ConversAI Docker image
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [ dev ]
|
6 |
+
|
7 |
+
jobs:
|
8 |
+
push_to_registry:
|
9 |
+
name: Push Docker image to Docker Hub
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
permissions:
|
12 |
+
packages: write
|
13 |
+
contents: read
|
14 |
+
attestations: write
|
15 |
+
steps:
|
16 |
+
- name: Check out the repo
|
17 |
+
uses: actions/checkout@v4
|
18 |
+
|
19 |
+
- name: Log in to Docker Hub
|
20 |
+
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
|
21 |
+
with:
|
22 |
+
username: ${{ secrets.DOCKER_USERNAME }}
|
23 |
+
password: ${{ secrets.DOCKER_PASSWORD }}
|
24 |
+
|
25 |
+
- name: Extract metadata (tags, labels) for Docker
|
26 |
+
id: meta
|
27 |
+
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
28 |
+
with:
|
29 |
+
images: techconsp/tcp_frwjeiqhpyl9ty53cyfg2jtpbhwwzl_co
|
30 |
+
|
31 |
+
- name: Build and push Docker image
|
32 |
+
id: push
|
33 |
+
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
|
34 |
+
with:
|
35 |
+
context: .
|
36 |
+
file: ./Dockerfile
|
37 |
+
push: true
|
38 |
+
tags: ${{ steps.meta.outputs.tags }}
|
39 |
+
labels: ${{ steps.meta.outputs.labels }}
|
.gitignore
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
target/
|
76 |
+
|
77 |
+
# Jupyter Notebook
|
78 |
+
.ipynb_checkpoints
|
79 |
+
|
80 |
+
# IPython
|
81 |
+
profile_default/
|
82 |
+
ipython_config.py
|
83 |
+
|
84 |
+
# pyenv
|
85 |
+
.python-version
|
86 |
+
|
87 |
+
# pipenv
|
88 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
89 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
90 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
91 |
+
# install all needed dependencies.
|
92 |
+
#Pipfile.lock
|
93 |
+
|
94 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
95 |
+
__pypackages__/
|
96 |
+
|
97 |
+
# Celery stuff
|
98 |
+
celerybeat-schedule
|
99 |
+
celerybeat.pid
|
100 |
+
|
101 |
+
# SageMath parsed files
|
102 |
+
*.sage.py
|
103 |
+
|
104 |
+
# Environments
|
105 |
+
.env
|
106 |
+
.venv
|
107 |
+
env/
|
108 |
+
venv/
|
109 |
+
ENV/
|
110 |
+
env.bak/
|
111 |
+
venv.bak/
|
112 |
+
|
113 |
+
# Spyder project settings
|
114 |
+
.spyderproject
|
115 |
+
.spyproject
|
116 |
+
|
117 |
+
# Rope project settings
|
118 |
+
.ropeproject
|
119 |
+
|
120 |
+
# mkdocs documentation
|
121 |
+
/site
|
122 |
+
|
123 |
+
# mypy
|
124 |
+
.mypy_cache/
|
125 |
+
.dmypy.json
|
126 |
+
dmypy.json
|
127 |
+
|
128 |
+
# Pyre type checker
|
129 |
+
.pyre/
|
130 |
+
|
131 |
+
# Machine Learning and Speech Libraries
|
132 |
+
# TensorFlow
|
133 |
+
*.ckpt*
|
134 |
+
*.pbtxt
|
135 |
+
*.tfevents*
|
136 |
+
# PyTorch
|
137 |
+
*.pt
|
138 |
+
# Keras
|
139 |
+
*.h5
|
140 |
+
# Scikit-learn
|
141 |
+
*.pkl
|
142 |
+
# Speech Recognition
|
143 |
+
*.wav
|
144 |
+
*.mp3
|
145 |
+
.idea/
|
146 |
+
logs
|
147 |
+
images
|
148 |
+
resources
|
Dockerfile
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY . /app
|
6 |
+
|
7 |
+
RUN chmod -R 777 /app
|
8 |
+
|
9 |
+
RUN apt-get update && \
|
10 |
+
apt-get upgrade -y && \
|
11 |
+
apt-get install -y \
|
12 |
+
build-essential \
|
13 |
+
git \
|
14 |
+
cmake \
|
15 |
+
poppler-utils \
|
16 |
+
ffmpeg \
|
17 |
+
libsm6 \
|
18 |
+
libxext6 && \
|
19 |
+
apt-get clean && \
|
20 |
+
rm -rf /var/lib/apt/lists/*
|
21 |
+
|
22 |
+
RUN pip install --no-cache-dir nltk && \
|
23 |
+
mkdir -p /app/nltk_data && \
|
24 |
+
chmod -R 777 /app/nltk_data && \
|
25 |
+
python -m nltk.downloader -d /app/nltk_data all
|
26 |
+
|
27 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
28 |
+
pip install --no-cache-dir -r requirements.txt
|
29 |
+
|
30 |
+
EXPOSE 8000
|
31 |
+
|
32 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: J7RSYILECL
|
3 |
emoji: 🚀
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
---
|
|
|
1 |
---
|
2 |
title: J7RSYILECL
|
3 |
emoji: 🚀
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
---
|
app.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
5 |
+
import uvicorn
|
6 |
+
from fastapi import FastAPI, Depends
|
7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
from src import logging as logger
|
9 |
+
from src.api.conversai_api import conversai_api_router
|
10 |
+
from src.api.speech_api import speech_translator_router
|
11 |
+
from src.api.user_management_api import user_management_api_router
|
12 |
+
from src.utils.utils import load_ini_config
|
13 |
+
from src.api.analytics_api import analytic_endpoints_router
|
14 |
+
from src.api.jwt_bearer import access_check_bearer
|
15 |
+
from src.api.jewel_mirror import goldprice_apirouter
|
16 |
+
|
17 |
+
|
18 |
+
logger.info("---------------------------> Starting FastAPI Server <---------------------------")
|
19 |
+
|
20 |
+
config = load_ini_config("config.ini")
|
21 |
+
app = FastAPI(docs_url=config.get('fastapi_config', 'docs_url'), redoc_url=config.get('fastapi_config', 'redoc_url'),
|
22 |
+
openapi_url=config.get('fastapi_config', 'openapi_url')
|
23 |
+
)
|
24 |
+
|
25 |
+
PROTECTED = [Depends(access_check_bearer)]
|
26 |
+
|
27 |
+
logger.info("---------------------------> FastAPI Server Started <---------------------------")
|
28 |
+
app.add_middleware(
|
29 |
+
CORSMiddleware,
|
30 |
+
allow_origins=["*"], # Allow access from all sources
|
31 |
+
allow_credentials=True,
|
32 |
+
allow_methods=["*"], # Allow all HTTP methods
|
33 |
+
allow_headers=["*"], # Allow all request headers
|
34 |
+
)
|
35 |
+
app.include_router(user_management_api_router, prefix="/conversai")
|
36 |
+
logger.info("---------------------------> User Management API Started <---------------------------")
|
37 |
+
|
38 |
+
app.include_router(conversai_api_router, prefix="/conversai")
|
39 |
+
logger.info("---------------------------> ConversAI API Started <---------------------------")
|
40 |
+
|
41 |
+
app.include_router(speech_translator_router, prefix="/conversai")
|
42 |
+
logger.info("---------------------------> Speech Translator API Started <---------------------------")
|
43 |
+
|
44 |
+
app.include_router(analytic_endpoints_router, prefix="/conversai",dependencies=PROTECTED)
|
45 |
+
logger.info("---------------------------> Analytics API Started <---------------------------")
|
46 |
+
|
47 |
+
#for goldpricing
|
48 |
+
app.include_router(goldprice_apirouter,prefix="/conversai",dependencies=PROTECTED)
|
49 |
+
logger.info("---------------------------> Goldpricing API Started <---------------------------")
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
uvicorn.run(app, port=int(config.get('fastapi_config', 'port')), host=config.get('fastapi_config', 'host'),
|
55 |
+
timeout_keep_alive=300, timeout_graceful_shutdown=600)
|
config.ini
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[default]
|
2 |
+
chatbot_name = ConversAI
|
3 |
+
chatbot_prefix = convai
|
4 |
+
|
5 |
+
[data_analyzer]
|
6 |
+
groq_llm_name = llama-3.1-8b-instant
|
7 |
+
additional_query = .In case, you are to plot a chart, make sure the x-axis labels are 90 degree rotated.
|
8 |
+
verbose = False
|
9 |
+
|
10 |
+
; [easy_ocr]
|
11 |
+
; model_path = resources/easyocr_model
|
12 |
+
; language = en
|
13 |
+
; gpu = True
|
14 |
+
|
15 |
+
[speech_to_text]
|
16 |
+
model_id = openai/whisper-large-v3
|
17 |
+
max_new_tokens = 128
|
18 |
+
chunks_length_s = 30
|
19 |
+
batch_size = 16
|
20 |
+
|
21 |
+
[supabase_chatbot_management]
|
22 |
+
user_config_table = ConversAI_UserConfig
|
23 |
+
chat_bot_table = ConversAI_ChatbotInfo
|
24 |
+
|
25 |
+
[all_mini_l6_v2_vector_embedding]
|
26 |
+
device = cuda
|
27 |
+
normalize_embeddings = True
|
28 |
+
|
29 |
+
|
30 |
+
[fastapi_config]
|
31 |
+
host = 0.0.0.0
|
32 |
+
port = 8000
|
33 |
+
docs_url = /docs
|
34 |
+
redoc_url = /redoc
|
35 |
+
openapi_url = /openapi.json
|
36 |
+
|
37 |
+
|
38 |
+
[oauth]
|
39 |
+
redirect_to : https://convers-ai-test.vercel.app/home/
|
examples/easy_ocr_example.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
import cv2
|
6 |
+
|
7 |
+
from src.services.ocr.easy_ocr.easy_ocr_ import EasyOCR_
|
8 |
+
|
9 |
+
if __name__ == '__main__':
|
10 |
+
image = cv2.imread("images/img.png")
|
11 |
+
ocr = EasyOCR_()
|
12 |
+
print(ocr.read_text(image))
|
examples/url_text_extraction_example.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from src.services.website_url.text_extraction_urls import extract_text_from_url
|
6 |
+
|
7 |
+
if __name__ == '__main__':
|
8 |
+
website = "https://huggingface.co/BAAI/bge-m3"
|
9 |
+
extracted_text = extract_text_from_url(website)
|
10 |
+
print(extracted_text)
|
logging_config.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: 1
|
2 |
+
disable_existing_loggers: False
|
3 |
+
formatters:
|
4 |
+
standard:
|
5 |
+
format: '[%(asctime)s: %(levelname)s: %(module)s: %(message)s]'
|
6 |
+
handlers:
|
7 |
+
console:
|
8 |
+
class: logging.StreamHandler
|
9 |
+
level: DEBUG
|
10 |
+
formatter: standard
|
11 |
+
stream: ext://sys.stdout
|
12 |
+
file:
|
13 |
+
class: logging.handlers.TimedRotatingFileHandler
|
14 |
+
level: INFO
|
15 |
+
formatter: standard
|
16 |
+
filename: logs/application.log
|
17 |
+
when: midnight
|
18 |
+
interval: 1
|
19 |
+
backupCount: 30
|
20 |
+
loggers:
|
21 |
+
__main__:
|
22 |
+
level: DEBUG
|
23 |
+
handlers: [console, file]
|
24 |
+
propagate: no
|
25 |
+
root:
|
26 |
+
level: DEBUG
|
27 |
+
handlers: [console, file]
|
requirements.txt
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.33.0
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anyio==4.4.0
|
4 |
+
certifi==2024.7.4
|
5 |
+
charset-normalizer==3.3.2
|
6 |
+
click==8.1.7
|
7 |
+
dnspython==2.6.1
|
8 |
+
email_validator==2.2.0
|
9 |
+
exceptiongroup==1.2.2
|
10 |
+
fastapi==0.111.1
|
11 |
+
fastapi-cli==0.0.4
|
12 |
+
filelock==3.15.4
|
13 |
+
fsspec==2024.6.1
|
14 |
+
gTTS==2.5.2
|
15 |
+
h11==0.14.0
|
16 |
+
httpcore==1.0.5
|
17 |
+
httptools==0.6.1
|
18 |
+
httpx==0.27.0
|
19 |
+
huggingface-hub==0.24.5
|
20 |
+
idna==3.7
|
21 |
+
Jinja2==3.1.4
|
22 |
+
markdown-it-py==3.0.0
|
23 |
+
MarkupSafe==2.1.5
|
24 |
+
mdurl==0.1.2
|
25 |
+
mpmath==1.3.0
|
26 |
+
networkx==3.3
|
27 |
+
numpy==1.26.4
|
28 |
+
nvidia-cublas-cu12==12.1.3.1
|
29 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
30 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
31 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
32 |
+
nvidia-cudnn-cu12==9.1.0.70
|
33 |
+
nvidia-cufft-cu12==11.0.2.54
|
34 |
+
nvidia-curand-cu12==10.3.2.106
|
35 |
+
nvidia-cusolver-cu12==11.4.5.107
|
36 |
+
nvidia-cusparse-cu12==12.1.0.106
|
37 |
+
nvidia-nccl-cu12==2.20.5
|
38 |
+
nvidia-nvjitlink-cu12==12.6.20
|
39 |
+
nvidia-nvtx-cu12==12.1.105
|
40 |
+
packaging==24.1
|
41 |
+
psutil==6.0.0
|
42 |
+
pydantic==2.8.2
|
43 |
+
pydantic_core==2.20.1
|
44 |
+
Pygments==2.18.0
|
45 |
+
python-dotenv==1.0.1
|
46 |
+
python-multipart==0.0.9
|
47 |
+
PyYAML==6.0.1
|
48 |
+
regex==2024.7.24
|
49 |
+
requests==2.32.3
|
50 |
+
rich==13.7.1
|
51 |
+
safetensors==0.4.3
|
52 |
+
scikit-build==0.18.0
|
53 |
+
shellingham==1.5.4
|
54 |
+
sniffio==1.3.1
|
55 |
+
starlette==0.37.2
|
56 |
+
sympy==1.13.1
|
57 |
+
tokenizers==0.19.1
|
58 |
+
tomli==2.0.1
|
59 |
+
torch==2.4.0
|
60 |
+
tqdm==4.66.4
|
61 |
+
transformers @ git+https://github.com/huggingface/transformers.git@85a1269e19af022e04bc2aad82572cd5a9e8cdd9
|
62 |
+
triton==3.0.0
|
63 |
+
typer==0.12.3
|
64 |
+
typing_extensions==4.12.2
|
65 |
+
urllib3==2.2.2
|
66 |
+
uvicorn==0.30.4
|
67 |
+
uvloop==0.19.0
|
68 |
+
watchfiles==0.22.0
|
69 |
+
websockets==12.0
|
70 |
+
bs4
|
71 |
+
huggingface-hub
|
72 |
+
fastembed
|
73 |
+
nest_asyncio
|
74 |
+
beautifulsoup4
|
75 |
+
flashrank
|
76 |
+
flashrank[listwise]
|
77 |
+
PyMuPDF
|
78 |
+
langchain
|
79 |
+
langchain-community
|
80 |
+
langchain-cohere
|
81 |
+
langchain-huggingface
|
82 |
+
langchain-qdrant
|
83 |
+
langchain-groq
|
84 |
+
lxml
|
85 |
+
python-dotenv
|
86 |
+
pillow
|
87 |
+
pandas
|
88 |
+
sentence-transformers
|
89 |
+
supabase
|
90 |
+
unstructured
|
91 |
+
urllib3
|
92 |
+
langsmith
|
93 |
+
pandasai
|
94 |
+
easyocr
|
95 |
+
youtube-transcript-api
|
96 |
+
pdf2image
|
97 |
+
PyPDF2
|
98 |
+
PyJWT
|
99 |
+
|
src/__init__.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging.config
|
7 |
+
import yaml
|
8 |
+
import os
|
9 |
+
|
10 |
+
if os.path.exists("logs"):
|
11 |
+
pass
|
12 |
+
else:
|
13 |
+
os.makedirs("logs")
|
14 |
+
|
15 |
+
log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
|
16 |
+
with open(log_config_path, 'r') as file:
|
17 |
+
config = yaml.safe_load(file.read())
|
18 |
+
|
19 |
+
logging.config.dictConfig(config)
|
src/api/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
src/api/analytics_api.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-28
|
4 |
+
"""
|
5 |
+
from collections import Counter, defaultdict
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
from dateutil.parser import isoparse
|
8 |
+
from fastapi.routing import APIRouter
|
9 |
+
from src.pipeline.conversai_analytic_pipeline import ConversAIAnalyticPipeline
|
10 |
+
from fastapi import Request
|
11 |
+
from src.utils.error_handling import create_success_response, raise_http_exception, \
|
12 |
+
success_response_user_management
|
13 |
+
from src.models.apis_models import FeedbackRequest, DailyActiveEndUserRequest, AverageSessionInteractionRequest, \
|
14 |
+
TokenUsageRequest, UserSatisfactionRateRequest
|
15 |
+
from src import logging as logger
|
16 |
+
|
17 |
+
analytic_endpoints_router = APIRouter(tags=["Analytics Endpoints"])
|
18 |
+
|
19 |
+
conversai_analytic_pipeline = ConversAIAnalyticPipeline()
|
20 |
+
|
21 |
+
|
22 |
+
@analytic_endpoints_router.post("/daily_chat_count")
|
23 |
+
async def daily_chat_count(
|
24 |
+
request: DailyActiveEndUserRequest):
|
25 |
+
start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
|
26 |
+
logger.info(f">>> daily_chat_count API Triggered by {vectorstore} <<<")
|
27 |
+
try:
|
28 |
+
if not start_date or not end_date:
|
29 |
+
end_date = datetime.now().astimezone().date()
|
30 |
+
start_date = end_date - timedelta(days=7)
|
31 |
+
else:
|
32 |
+
start_date = isoparse(start_date).date()
|
33 |
+
end_date = isoparse(end_date).date()
|
34 |
+
|
35 |
+
response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
|
36 |
+
|
37 |
+
dates = [
|
38 |
+
isoparse(i["timestamp"]).date()
|
39 |
+
for i in response
|
40 |
+
if start_date <= isoparse(i["timestamp"]).date() <= end_date
|
41 |
+
]
|
42 |
+
|
43 |
+
date_count = Counter(dates)
|
44 |
+
|
45 |
+
data = [{"date": date.isoformat(), "count": count} for date, count in date_count.items()]
|
46 |
+
|
47 |
+
response = create_success_response(code=200, data=dict(output=data))
|
48 |
+
logger.info(f">>> daily_chat_count API Response Success for {vectorstore} <<<")
|
49 |
+
|
50 |
+
return response
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f">>> daily_chat_count API Response Failed for {vectorstore} {e}<<<")
|
54 |
+
|
55 |
+
raise_http_exception(500, "Internal Server Error")
|
56 |
+
|
57 |
+
|
58 |
+
@analytic_endpoints_router.post("/daily_active_end_user")
|
59 |
+
async def daily_active_end_user(
|
60 |
+
request: DailyActiveEndUserRequest
|
61 |
+
):
|
62 |
+
start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
|
63 |
+
logger.info(f">>> daily_active_end_user API Triggered by {vectorstore} <<<")
|
64 |
+
try:
|
65 |
+
if not start_date or not end_date:
|
66 |
+
end_date = datetime.now().astimezone().date()
|
67 |
+
start_date = end_date - timedelta(days=7)
|
68 |
+
else:
|
69 |
+
start_date = isoparse(start_date).date()
|
70 |
+
end_date = isoparse(end_date).date()
|
71 |
+
|
72 |
+
response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
|
73 |
+
|
74 |
+
ip_by_date = defaultdict(set)
|
75 |
+
|
76 |
+
for i in response:
|
77 |
+
timestamp = isoparse(i["timestamp"])
|
78 |
+
ip_address = i["IpAddress"]
|
79 |
+
if start_date <= timestamp.date() <= end_date:
|
80 |
+
date = timestamp.date()
|
81 |
+
ip_by_date[date].add(ip_address)
|
82 |
+
|
83 |
+
data = [{"date": date.isoformat(), "terminal": len(ips)} for date, ips in ip_by_date.items() if len(ips) > 1]
|
84 |
+
|
85 |
+
response = create_success_response(code=200, data=dict(output=data))
|
86 |
+
logger.info(f">>> daily_active_end_user API Response Success for {vectorstore} <<<")
|
87 |
+
|
88 |
+
return response
|
89 |
+
except Exception as e:
|
90 |
+
logger.error(f">>> daily_active_end_user API Response Failed for {vectorstore} {e}<<<")
|
91 |
+
|
92 |
+
raise_http_exception(500, "Internal Server Error")
|
93 |
+
|
94 |
+
|
95 |
+
@analytic_endpoints_router.post("/average_session_interaction")
|
96 |
+
async def average_session_interaction(
|
97 |
+
request: AverageSessionInteractionRequest
|
98 |
+
):
|
99 |
+
start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
|
100 |
+
logger.info(f">>> average_session_interaction API Triggered by {vectorstore} <<<")
|
101 |
+
try:
|
102 |
+
if not start_date or not end_date:
|
103 |
+
end_date = datetime.now().astimezone().date()
|
104 |
+
start_date = end_date - timedelta(days=7)
|
105 |
+
else:
|
106 |
+
start_date = isoparse(start_date).date()
|
107 |
+
end_date = isoparse(end_date).date()
|
108 |
+
|
109 |
+
response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
|
110 |
+
|
111 |
+
total_messages_by_date = defaultdict(int)
|
112 |
+
unique_ips_by_date = defaultdict(set)
|
113 |
+
|
114 |
+
for i in response:
|
115 |
+
timestamp = isoparse(i["timestamp"])
|
116 |
+
ip_address = i["IpAddress"]
|
117 |
+
if start_date <= timestamp.date() <= end_date:
|
118 |
+
date = timestamp.date()
|
119 |
+
total_messages_by_date[date] += 1
|
120 |
+
unique_ips_by_date[date].add(ip_address)
|
121 |
+
|
122 |
+
data = []
|
123 |
+
for date in sorted(total_messages_by_date.keys()):
|
124 |
+
total_messages = total_messages_by_date[date]
|
125 |
+
unique_ips = len(unique_ips_by_date[date])
|
126 |
+
average_interactions = total_messages / unique_ips if unique_ips > 0 else 0
|
127 |
+
data.append({"date": date.isoformat(), "interactions": average_interactions})
|
128 |
+
|
129 |
+
response = create_success_response(code=200, data=dict(data=data))
|
130 |
+
logger.info(f">>> average_session_interaction API Response Success for {vectorstore} <<<")
|
131 |
+
|
132 |
+
return response
|
133 |
+
except Exception as e:
|
134 |
+
logger.error(f">>> average_session_interaction API Response Failed for {vectorstore} {e}<<<")
|
135 |
+
raise_http_exception(500, "Internal Server Error")
|
136 |
+
|
137 |
+
|
138 |
+
@analytic_endpoints_router.post("/token_usages")
|
139 |
+
async def token_usages(request: TokenUsageRequest):
|
140 |
+
start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
|
141 |
+
logger.info(f">>> token_usages API Triggered by {vectorstore} <<<")
|
142 |
+
try:
|
143 |
+
if not start_date or not end_date:
|
144 |
+
end_date = datetime.now().astimezone().date()
|
145 |
+
start_date = end_date - timedelta(days=7)
|
146 |
+
else:
|
147 |
+
start_date = isoparse(start_date).date()
|
148 |
+
end_date = isoparse(end_date).date()
|
149 |
+
|
150 |
+
response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
|
151 |
+
|
152 |
+
token_usage_by_date = defaultdict(int)
|
153 |
+
|
154 |
+
for i in response:
|
155 |
+
timestamp = isoparse(i["timestamp"])
|
156 |
+
if start_date <= timestamp.date() <= end_date:
|
157 |
+
date = timestamp.date()
|
158 |
+
response_token_count = i.get("ResponseTokenCount")
|
159 |
+
if response_token_count is not None:
|
160 |
+
token_usage_by_date[date] += response_token_count
|
161 |
+
|
162 |
+
data = [{"date": date.isoformat(), "total_tokens": total_tokens} for date, total_tokens in
|
163 |
+
token_usage_by_date.items()]
|
164 |
+
|
165 |
+
response = create_success_response(code=200, data=dict(output=data))
|
166 |
+
logger.info(f">>> token_usages API Response Success for {vectorstore} <<<")
|
167 |
+
|
168 |
+
return response
|
169 |
+
except Exception as e:
|
170 |
+
logger.error(f">>> token_usages API Response Failed for {vectorstore} {e}<<<")
|
171 |
+
raise_http_exception(500, "Internal Server Error")
|
172 |
+
|
173 |
+
|
174 |
+
@analytic_endpoints_router.post("/add_feedback")
|
175 |
+
async def add_feedback(req: Request, request: FeedbackRequest):
|
176 |
+
feedback, user_id, vectorstore = request.feedback, request.user_id, request.vectorstore
|
177 |
+
try:
|
178 |
+
logger.info(f">>> add_feedback API Triggered by {request.vectorstore} <<<")
|
179 |
+
|
180 |
+
client_ip = req.client.host
|
181 |
+
city = conversai_analytic_pipeline.get_ip_info(client_ip)
|
182 |
+
|
183 |
+
conversai_analytic_pipeline.add_feedback_(feedback, user_id, city, client_ip, vectorstore)
|
184 |
+
|
185 |
+
response = success_response_user_management(code=200, message="Add Feedback Sucess")
|
186 |
+
logger.info(f">>> add_feedback API Response Success for {vectorstore} <<<")
|
187 |
+
|
188 |
+
return response
|
189 |
+
|
190 |
+
except Exception as e:
|
191 |
+
logger.error(f">>> add_feedback API Response Failed for {vectorstore} {e}<<<")
|
192 |
+
raise_http_exception(500, "Internal Server Error")
|
193 |
+
|
194 |
+
|
195 |
+
@analytic_endpoints_router.post("/user_satisfaction_rate")
|
196 |
+
async def user_satisfaction_rate(
|
197 |
+
request: UserSatisfactionRateRequest
|
198 |
+
):
|
199 |
+
start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
|
200 |
+
logger.info(f">>> user_satisfaction_rate API Triggered by {vectorstore} <<<")
|
201 |
+
try:
|
202 |
+
if not start_date or not end_date:
|
203 |
+
end_date = datetime.now().astimezone().date()
|
204 |
+
start_date = end_date - timedelta(days=7)
|
205 |
+
else:
|
206 |
+
start_date = isoparse(start_date).date()
|
207 |
+
end_date = isoparse(end_date).date()
|
208 |
+
|
209 |
+
feedback_counts = defaultdict(lambda: {"like": 0, "dislike": 0})
|
210 |
+
response = conversai_analytic_pipeline.feedback_table_(vectorstore)
|
211 |
+
for i in response:
|
212 |
+
timestamp = isoparse(i["timestamp"])
|
213 |
+
if start_date <= timestamp.date() <= end_date:
|
214 |
+
date = timestamp.date()
|
215 |
+
feedback = i.get("feedback")
|
216 |
+
if feedback == "like":
|
217 |
+
feedback_counts[date]["like"] += 1
|
218 |
+
elif feedback == "dislike":
|
219 |
+
feedback_counts[date]["dislike"] += 1
|
220 |
+
|
221 |
+
data = []
|
222 |
+
for date in sorted(feedback_counts.keys()):
|
223 |
+
like_count = feedback_counts[date]["like"]
|
224 |
+
dislike_count = feedback_counts[date]["dislike"]
|
225 |
+
total_feedback = like_count + dislike_count
|
226 |
+
satisfaction_rate = (like_count / total_feedback * 100) if total_feedback > 0 else 0
|
227 |
+
data.append({"date": date.isoformat(), "rate": satisfaction_rate})
|
228 |
+
|
229 |
+
response = create_success_response(code=200, data=dict(output=data))
|
230 |
+
logger.info(f">>> user_satisfaction_rate API Response Success for {vectorstore} <<<")
|
231 |
+
|
232 |
+
return response
|
233 |
+
except Exception as e:
|
234 |
+
logger.info(f">>> user_satisfaction_rate API Response Failed for {vectorstore} {e}<<<")
|
235 |
+
raise_http_exception(500, "Internal Server Error")
|
src/api/conversai_api.py
ADDED
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-02
|
4 |
+
"""
|
5 |
+
import io
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
import string
|
9 |
+
import tempfile
|
10 |
+
import requests
|
11 |
+
import pandas as pd
|
12 |
+
from src import logging as logger
|
13 |
+
from supabase import create_client
|
14 |
+
from urllib.parse import urlparse
|
15 |
+
|
16 |
+
from src.api.jwt_bearer import access_check_bearer
|
17 |
+
from src.models.apis_models import *
|
18 |
+
from fastapi.requests import Request
|
19 |
+
from fastapi.routing import APIRouter
|
20 |
+
from fastapi import UploadFile, File, HTTPException, Form, Depends
|
21 |
+
from src.pipeline.conversai_pipeline import ConversAIPipeline
|
22 |
+
from src.api.user_management_api import user_management
|
23 |
+
from src.services.supabase.analytics.analytic_tables import track_usage
|
24 |
+
from src.services.supabase.user_management.token_limit import token_limit_check
|
25 |
+
from src.utils.error_handling import create_error_response, create_success_response, raise_http_exception
|
26 |
+
from src.api.user_management_api import user_management as user_management_pipeline
|
27 |
+
from src.utils.utils import get_ip_info, encode_to_base64, clean_text, decode_base64
|
28 |
+
|
29 |
+
conversai_api_router = APIRouter(tags=["ConversAI"])
|
30 |
+
|
31 |
+
supabase_client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_KEY"))
|
32 |
+
supabase_client_ = supabase_client
|
33 |
+
conversai_pipeline = ConversAIPipeline()
|
34 |
+
|
35 |
+
|
36 |
+
@conversai_api_router.post("/add_text" )
|
37 |
+
async def add_text(request: AddTextRequest):
|
38 |
+
logger.info(f">>>AddText API Triggered By {request.vectorstore}<<<")
|
39 |
+
try:
|
40 |
+
vectorstore, text = request.vectorstore, request.text
|
41 |
+
track_usage(vectorstore=vectorstore, endpoint="/add_text", supabase_client=supabase_client)
|
42 |
+
username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
43 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
|
44 |
+
text=text)
|
45 |
+
text = clean_text(text)
|
46 |
+
if lim:
|
47 |
+
dct = {
|
48 |
+
"output": {"text": text},
|
49 |
+
"source": "Text",
|
50 |
+
}
|
51 |
+
num_token = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
52 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
53 |
+
file_name = user_management_pipeline.create_data_source_name(source_name="text", username=username)
|
54 |
+
supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
55 |
+
|
56 |
+
supa = supabase_client.table("ConversAI_ChatbotDataSources").insert(
|
57 |
+
{"username": username, "chatbotName": chat_bot_name, "dataSourceName": file_name,
|
58 |
+
"numTokens": num_token, "sourceEndpoint": "/add_text",
|
59 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
60 |
+
f"{file_name}_data.json")}).execute()
|
61 |
+
|
62 |
+
response = create_success_response(200, {"message": "Successfully added the text."})
|
63 |
+
logger.info(f">>>Text added successfully for {request.vectorstore}.<<<")
|
64 |
+
|
65 |
+
return response
|
66 |
+
else:
|
67 |
+
response = create_error_response(400,
|
68 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
69 |
+
return response
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
logger.error(f">>>Error in add_text: {e} for {request.vectorstore}.<<<")
|
73 |
+
raise_http_exception(500, "Internal Server Error")
|
74 |
+
|
75 |
+
|
76 |
+
@conversai_api_router.post("/answer_query")
|
77 |
+
async def answer_query(request: AnswerQueryRequest, req: Request):
|
78 |
+
logger.info(f">>>answer_query API Triggered By {request.vectorstore}<<<")
|
79 |
+
try:
|
80 |
+
track_usage(supabase_client=supabase_client, vectorstore=request.vectorstore, endpoint="/answer_query")
|
81 |
+
username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
82 |
+
ip_address = req.client.host
|
83 |
+
city = get_ip_info(ip_address)
|
84 |
+
output, followup_questions, source = conversai_pipeline.answer_query_(query=request.query,
|
85 |
+
vectorstore=request.vectorstore,
|
86 |
+
llm_model=request.llm_model)
|
87 |
+
supa = supabase_client.table("ConversAI_ChatHistory").insert(
|
88 |
+
{"username": username, "chatbotName": chatbot_name, "llmModel": request.llm_model,
|
89 |
+
"question": request.query, "response": output, "IpAddress": ip_address, "ResponseTokenCount": len(output),
|
90 |
+
"vectorstore": request.vectorstore, "City": city}).execute()
|
91 |
+
|
92 |
+
response = create_success_response(200, data={"output": output, "follow_up_questions": followup_questions,
|
93 |
+
"source": source})
|
94 |
+
logger.info(f">>>Query answered successfully for {request.vectorstore}.<<<")
|
95 |
+
return response
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f">>>Error in answer_query: {e} for {request.vectorstore}.<<<")
|
99 |
+
# raise HTTPException(status_code=500, detail="Internal Server Error")
|
100 |
+
raise e
|
101 |
+
|
102 |
+
@conversai_api_router.post("/data_analyzer" )
|
103 |
+
async def data_analyzer(query: str = Form(...), file: UploadFile = File(...)):
|
104 |
+
logger.info(f">>>data_analyzer API Triggered By {query}<<<")
|
105 |
+
try:
|
106 |
+
extension = file.filename.split(".")[-1]
|
107 |
+
if extension in ["xls", "xlsx", "xlsm", "xlsb"]:
|
108 |
+
df = pd.read_excel(io.BytesIO(await file.read()))
|
109 |
+
elif extension == "csv":
|
110 |
+
df = pd.read_csv(io.BytesIO(await file.read()))
|
111 |
+
else:
|
112 |
+
return {"output": "INVALID FILE TYPE"}
|
113 |
+
|
114 |
+
response = conversai_pipeline.data_analyzer(query=query, dataframe=df)
|
115 |
+
response = create_success_response(200, {"output": response})
|
116 |
+
logger.info(f">>>Data analyzed successfully for {query}.<<<")
|
117 |
+
return response
|
118 |
+
|
119 |
+
except Exception as e:
|
120 |
+
logger.error(f">>>Error in data_analyzer: {e} for {query}.<<<")
|
121 |
+
raise_http_exception(500, "Internal Server Error")
|
122 |
+
|
123 |
+
|
124 |
+
@conversai_api_router.post("/get_links" )
|
125 |
+
async def get_links(request: GetLinksRequest):
|
126 |
+
logger.info(f">>>get_links API Triggered By {request.url}<<<")
|
127 |
+
try:
|
128 |
+
response = conversai_pipeline.get_links_(url=request.url, timeout=30)
|
129 |
+
response = create_success_response(200, {"urls": response, "source": urlparse(request.url).netloc})
|
130 |
+
logger.info(f">>>Links fetched successfully for {request.url}.<<<")
|
131 |
+
return response
|
132 |
+
|
133 |
+
except Exception as e:
|
134 |
+
logger.error(f">>>Error in get_links: {e} for {request.url}.<<<")
|
135 |
+
raise_http_exception(500, "Internal Server Error")
|
136 |
+
|
137 |
+
|
138 |
+
@conversai_api_router.post("/image_pdf_text_extraction" )
|
139 |
+
async def image_pdf_text_extraction(vectorstore: str = Form(...)
|
140 |
+
, pdf: UploadFile = File(...)):
|
141 |
+
logger.info(f">>>image_pdf_text_extraction API Triggered By {pdf.filename}<<<")
|
142 |
+
try:
|
143 |
+
track_usage(vectorstore=vectorstore, endpoint="/image_pdf_text_extraction", supabase_client=supabase_client)
|
144 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
145 |
+
source = pdf.filename
|
146 |
+
pdf_bytes = await pdf.read()
|
147 |
+
response = conversai_pipeline.image_pdf_text_extraction_(image_pdf=pdf_bytes)
|
148 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
149 |
+
text=response)
|
150 |
+
logger.info(f"this is the {lim}")
|
151 |
+
if lim:
|
152 |
+
dct = {
|
153 |
+
"output": response,
|
154 |
+
"source": source
|
155 |
+
}
|
156 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
157 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
|
158 |
+
num_tokens = len(
|
159 |
+
" ".join([response[x] for x in response]).translate(str.maketrans('', '', string.punctuation)).split(
|
160 |
+
" "))
|
161 |
+
response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
162 |
+
supa = supabase_client.table("ConversAI_ChatbotDataSources").insert(
|
163 |
+
{"username": username,
|
164 |
+
"chatbotName": chatbot_name,
|
165 |
+
"dataSourceName": file_name,
|
166 |
+
"numTokens": num_tokens,
|
167 |
+
"sourceEndpoint": "/image_pdf_text_extraction",
|
168 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
169 |
+
f"{file_name}_data.json")}).execute()
|
170 |
+
|
171 |
+
response = create_success_response(200,
|
172 |
+
{"source": pdf.filename, "message": "Successfully extracted the text."})
|
173 |
+
logger.info(f">>>Text extracted successfully for {pdf.filename}.<<<")
|
174 |
+
return response
|
175 |
+
else:
|
176 |
+
response = create_error_response(402,
|
177 |
+
"Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
|
178 |
+
return response
|
179 |
+
|
180 |
+
except Exception as e:
|
181 |
+
logger.error(f">>>Error in image_pdf_text_extraction: {e} for {pdf.filename}.<<<")
|
182 |
+
raise_http_exception(500, "Internal Server Error")
|
183 |
+
|
184 |
+
|
185 |
+
@conversai_api_router.post("/text_pdf_extraction" )
|
186 |
+
async def text_pdf_extraction(vectorstore: str = Form(...)
|
187 |
+
, pdf: UploadFile = File(...)):
|
188 |
+
logger.info(f">>>text_pdf_extraction API Triggered By {pdf.filename}<<<")
|
189 |
+
try:
|
190 |
+
track_usage(vectorstore=vectorstore, endpoint="/text_pdf_extraction", supabase_client=supabase_client)
|
191 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
192 |
+
source = pdf.filename
|
193 |
+
pdf = await pdf.read()
|
194 |
+
|
195 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
196 |
+
temp_file.write(pdf)
|
197 |
+
temp_file_path = temp_file.name
|
198 |
+
|
199 |
+
response = conversai_pipeline.text_pdf_extraction_(pdf=temp_file_path)
|
200 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
201 |
+
text=response)
|
202 |
+
os.remove(temp_file_path)
|
203 |
+
if lim:
|
204 |
+
dct = {
|
205 |
+
"output": response,
|
206 |
+
"source": source
|
207 |
+
}
|
208 |
+
numTokens = len(
|
209 |
+
" ".join([response[x] for x in response]).translate(str.maketrans('', '', string.punctuation)).split(
|
210 |
+
" "))
|
211 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
212 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
|
213 |
+
response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
214 |
+
response = (
|
215 |
+
supabase_client.table("ConversAI_ChatbotDataSources")
|
216 |
+
.insert({"username": username,
|
217 |
+
"chatbotName": chatbot_name,
|
218 |
+
"dataSourceName": file_name,
|
219 |
+
"numTokens": numTokens,
|
220 |
+
"sourceEndpoint": "/text_pdf_extraction",
|
221 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
222 |
+
f"{file_name}_data.json")})
|
223 |
+
.execute()
|
224 |
+
)
|
225 |
+
response = create_success_response(200, {"source": source, "message": "Successfully extracted the text."})
|
226 |
+
logger.info(f">>>Text extracted successfully for {source}.<<<")
|
227 |
+
return response
|
228 |
+
|
229 |
+
else:
|
230 |
+
response = create_error_response(402,
|
231 |
+
"Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
|
232 |
+
return response
|
233 |
+
|
234 |
+
except Exception as e:
|
235 |
+
logger.error(f">>>Error in text_pdf_extraction: {e} for {vectorstore}.<<<")
|
236 |
+
raise_http_exception(500, "Internal Server Error")
|
237 |
+
|
238 |
+
|
239 |
+
@conversai_api_router.post("/youtube_transcript" )
|
240 |
+
async def youtube_transcript(request: YoutubeTranscriptRequest):
|
241 |
+
vectorstore, urls = request.vectorstore, request.urls
|
242 |
+
logger.info(f">>>youtube_transcript API Triggered By {urls}<<<")
|
243 |
+
try:
|
244 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/youtube_transcript")
|
245 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
246 |
+
|
247 |
+
response = conversai_pipeline.youtube_transcript_(url=urls)
|
248 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
249 |
+
text=response)
|
250 |
+
if lim:
|
251 |
+
|
252 |
+
dct = {
|
253 |
+
"output": response,
|
254 |
+
"source": "www.youtube.com"
|
255 |
+
}
|
256 |
+
num_tokens = len(
|
257 |
+
" ".join([response[x] for x in response]).translate(str.maketrans('', '', string.punctuation)).split(
|
258 |
+
" "))
|
259 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
260 |
+
file_name = user_management_pipeline.create_data_source_name(source_name="youtube", username=username)
|
261 |
+
response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
262 |
+
response = (
|
263 |
+
supabase_client.table("ConversAI_ChatbotDataSources")
|
264 |
+
.insert({"username": username,
|
265 |
+
"chatbotName": chatbot_name,
|
266 |
+
"dataSourceName": file_name,
|
267 |
+
"numTokens": num_tokens,
|
268 |
+
"sourceEndpoint": "/youtube_transcript",
|
269 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
270 |
+
f"{file_name}_data.json")})
|
271 |
+
.execute()
|
272 |
+
)
|
273 |
+
response = create_success_response(200, {"message": "Successfully fetched the youtube transcript."})
|
274 |
+
logger.info(f">>>Youtube transcript fetched successfully for {urls}.<<<")
|
275 |
+
return response
|
276 |
+
else:
|
277 |
+
response = create_error_response(402,
|
278 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
279 |
+
return response
|
280 |
+
|
281 |
+
except Exception as e:
|
282 |
+
logger.error(f">>>Error in youtube_transcript: {e} for {urls}.<<<")
|
283 |
+
raise_http_exception(500, "Internal Server Error")
|
284 |
+
|
285 |
+
|
286 |
+
@conversai_api_router.post("/website_url_text_extraction" )
|
287 |
+
async def add_website(request: AddWebsiteRequest):
|
288 |
+
vectorstore, website_urls, source = request.vectorstore, request.website_urls, request.source
|
289 |
+
|
290 |
+
logger.info(f">>>website_url_text_extraction API Triggered By {request.website_urls}<<<")
|
291 |
+
try:
|
292 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/fetch_text/urls")
|
293 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
294 |
+
|
295 |
+
text = conversai_pipeline.website_url_text_extraction_list_(urls=website_urls)
|
296 |
+
text_corpus="\n".join(text.values())
|
297 |
+
|
298 |
+
logger.info(f">>>website_url_text_extraction len {type(text)}<<<")
|
299 |
+
|
300 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
|
301 |
+
text=text_corpus)
|
302 |
+
if not lim:
|
303 |
+
|
304 |
+
response = create_error_response(402,
|
305 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
306 |
+
return response
|
307 |
+
else:
|
308 |
+
dct = {
|
309 |
+
"output": text,
|
310 |
+
"source": source
|
311 |
+
}
|
312 |
+
num_tokens = len(
|
313 |
+
" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
314 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
315 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=urlparse(source).netloc,
|
316 |
+
username=username)
|
317 |
+
supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
318 |
+
(
|
319 |
+
supabase_client.table("ConversAI_ChatbotDataSources")
|
320 |
+
.insert({"username": username,
|
321 |
+
"chatbotName": chatbot_name,
|
322 |
+
"dataSourceName": file_name,
|
323 |
+
"numTokens": num_tokens,
|
324 |
+
"sourceEndpoint": "/fetch_text/urls",
|
325 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
326 |
+
f"{file_name}_data.json")})
|
327 |
+
.execute()
|
328 |
+
)
|
329 |
+
response = create_success_response(200, {"message": "Successfully fetched the website text."})
|
330 |
+
logger.info(f">>>Website text extracted successfully for {request.website_urls}.<<<")
|
331 |
+
return response
|
332 |
+
except Exception as e:
|
333 |
+
logger.error(f">>>Error in website_url_text_extraction: {e} for {request.website_urls}.<<<")
|
334 |
+
raise e
|
335 |
+
# raise HTTPException(status_code=500, detail="Internal Server Error")
|
336 |
+
|
337 |
+
|
338 |
+
|
339 |
+
|
340 |
+
|
341 |
+
@conversai_api_router.get("/get_current_count" )
|
342 |
+
async def get_count(vectorstore: str):
|
343 |
+
logger.info(f">>>get_current_count API Triggered By {vectorstore}<<<")
|
344 |
+
try:
|
345 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
346 |
+
current_count = user_management_pipeline.get_current_count_(username)
|
347 |
+
|
348 |
+
response = create_success_response(200, {"current_count": current_count})
|
349 |
+
logger.info(f">>>Current count fetched successfully for {vectorstore}.<<<")
|
350 |
+
return response
|
351 |
+
|
352 |
+
except Exception as e:
|
353 |
+
logger.error(f">>>Error in get_current_count: {e} for {vectorstore}.<<<")
|
354 |
+
raise_http_exception(500, "Internal Server Error")
|
355 |
+
|
356 |
+
|
357 |
+
@conversai_api_router.post("/list_chatbots" )
|
358 |
+
async def list_chatbots(request: ListChatbotsRequest):
|
359 |
+
logger.info(f">>>list_chatbots API Triggered By {request.username}<<<")
|
360 |
+
try:
|
361 |
+
chatbots = user_management.list_tables(username=request.username)
|
362 |
+
response = create_success_response(200, {"chatbots": chatbots})
|
363 |
+
logger.info(f">>>Chatbots listed successfully for {request.username}.<<<")
|
364 |
+
return response
|
365 |
+
|
366 |
+
except Exception as e:
|
367 |
+
logger.error(f">>>Error in list_chatbots: {e} for {request.username}.<<<")
|
368 |
+
raise_http_exception(500, "Internal Server Error")
|
369 |
+
|
370 |
+
|
371 |
+
@conversai_api_router.post("/get_chat_history" )
|
372 |
+
async def chat_history(request: GetChatHistoryRequest):
|
373 |
+
logger.info(f">>>get_chat_history API Triggered By {request.vectorstore}<<<")
|
374 |
+
try:
|
375 |
+
_, username, chatbotName = request.vectorstore.split("$", 2)
|
376 |
+
|
377 |
+
history = supabase_client.table("ConversAI_ChatHistory").select(
|
378 |
+
"timestamp", "question", "response"
|
379 |
+
).eq("username", username).eq("chatbotName", chatbotName).execute().data
|
380 |
+
|
381 |
+
response = create_success_response(200, {"history": history})
|
382 |
+
logger.info(f">>>Chat history fetched successfully for {request.vectorstore}.<<<")
|
383 |
+
return response
|
384 |
+
|
385 |
+
|
386 |
+
except IndexError:
|
387 |
+
logger.warning(f"Chat history not found for {request.vectorstore}")
|
388 |
+
return create_error_response(404, "Chat history not found for the given chatbot.")
|
389 |
+
|
390 |
+
except Exception as e:
|
391 |
+
logger.error(f">>>Error in get_chat_history: {e} for {request.vectorstore}.<<<")
|
392 |
+
raise_http_exception(500, "Internal Server Error")
|
393 |
+
|
394 |
+
|
395 |
+
@conversai_api_router.post("/delete_chatbot" )
|
396 |
+
async def delete_chatbot(request: DeleteChatbotRequest):
|
397 |
+
logger.info(f">>>delete_chatbot API Triggered By {request.vectorstore}<<<")
|
398 |
+
try:
|
399 |
+
username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
400 |
+
supabase_client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname',
|
401 |
+
chatbot_name).execute()
|
402 |
+
all_sources = supabase_client.table("ConversAI_ChatbotDataSources").select("*").eq("username", username).eq(
|
403 |
+
"chatbotName", chatbot_name).execute().data
|
404 |
+
all_sources = [x["sourceContentURL"].split("/")[-1] for x in all_sources]
|
405 |
+
supabase_client.table("ConversAI_ChatbotDataSources").delete().eq("username", username).eq("chatbotName",
|
406 |
+
chatbot_name).execute()
|
407 |
+
supabase_client.table("ConversAI_ActivityLog").update({"isActive": False}).eq("username", username).eq(
|
408 |
+
"chatbotName",
|
409 |
+
chatbot_name).execute()
|
410 |
+
supabase_client.table("ConversAI_ChatHistory").update({"isActive": False}).eq("username", username).eq(
|
411 |
+
"chatbotName",
|
412 |
+
chatbot_name).execute()
|
413 |
+
for source in all_sources:
|
414 |
+
supabase_client.table("ConversAI_Chatbot")
|
415 |
+
supabase_client.storage.from_("ConversAI").remove(source)
|
416 |
+
user_management.delete_table(table_name=chatbot_name)
|
417 |
+
user_management.delete_qdrant_cluster(vectorstorename=request.vectorstore)
|
418 |
+
response = create_success_response(200, {"message": "Chatbot deleted successfully"})
|
419 |
+
logger.info(f">>>Chatbot deleted successfully for {request.vectorstore}.<<<")
|
420 |
+
return response
|
421 |
+
except Exception as e:
|
422 |
+
logger.error(f">>>Error in delete_chatbot: {e} for {request.vectorstore}.<<<")
|
423 |
+
raise_http_exception(500, "Internal Server Error")
|
424 |
+
|
425 |
+
|
426 |
+
@conversai_api_router.post("/add_qa_pair" )
|
427 |
+
async def add_qa_pair(request: AddQAPairRequest):
|
428 |
+
logger.info(f">>>add_qa_pair API Triggered By {request.vectorstore}<<<")
|
429 |
+
try:
|
430 |
+
vectorstore, question, answer = request.vectorstore, request.question, request.answer
|
431 |
+
track_usage(vectorstore=vectorstore, endpoint="/add_qa_pair", supabase_client=supabase_client)
|
432 |
+
username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
433 |
+
normal_text = f"\nQUESTION: {question}\nANSWER: {answer}\n"
|
434 |
+
lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
|
435 |
+
text=normal_text)
|
436 |
+
if lim:
|
437 |
+
dct = {
|
438 |
+
"output": {"text": normal_text},
|
439 |
+
"source": "QA Pair",
|
440 |
+
}
|
441 |
+
num_token = len(normal_text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
442 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
|
443 |
+
file_name = user_management_pipeline.create_data_source_name(source_name="qa_pair", username=username)
|
444 |
+
supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
|
445 |
+
(
|
446 |
+
supabase_client.table("ConversAI_ChatbotDataSources")
|
447 |
+
.insert({"username": username,
|
448 |
+
"chatbotName": chat_bot_name,
|
449 |
+
"dataSourceName": file_name,
|
450 |
+
"numTokens": num_token,
|
451 |
+
"sourceEndpoint": "/add_qa_pair",
|
452 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
453 |
+
f"{file_name}_data.json")})
|
454 |
+
.execute()
|
455 |
+
)
|
456 |
+
|
457 |
+
response = create_success_response(200, {"message": "Successfully added the qa pair."})
|
458 |
+
logger.info(f">>>QA Pair added successfully for {request.vectorstore}.<<<")
|
459 |
+
|
460 |
+
return response
|
461 |
+
else:
|
462 |
+
response = create_error_response(400,
|
463 |
+
"Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
|
464 |
+
return response
|
465 |
+
|
466 |
+
except Exception as e:
|
467 |
+
logger.error(f">>>Error in add_qa_pair: {e} for {request.vectorstore}.<<<")
|
468 |
+
raise_http_exception(500, "Internal Server Error")
|
469 |
+
|
470 |
+
|
471 |
+
@conversai_api_router.post("/load_edited_json" )
|
472 |
+
async def load_edited_json(request: LoadEditedJson):
|
473 |
+
vectorstore, data_source_name, source_endpoint, json_data = request.vectorstore, request.data_source_name, request.source_endpoint, request.json_data
|
474 |
+
username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
|
475 |
+
|
476 |
+
logger.info(f">>>loadEditedJson API Triggered By {request.vectorstore}<<<")
|
477 |
+
try:
|
478 |
+
track_usage(supabase_client=supabase_client, vectorstore=request.vectorstore,
|
479 |
+
endpoint="/load_edited_json")
|
480 |
+
json_data = decode_base64(request.json_data)
|
481 |
+
json_data = json.dumps(json_data, indent=1).encode("utf-8", errors="replace")
|
482 |
+
file_name = user_management_pipeline.create_data_source_name(source_name=data_source_name,
|
483 |
+
username=username)
|
484 |
+
response = supabase_client.storage.from_("ConversAI").upload(file=json_data, path=f"{file_name}_data.json")
|
485 |
+
response = (
|
486 |
+
supabase_client.table("ConversAI_ChatbotDataSources")
|
487 |
+
.insert({"username": username,
|
488 |
+
"chatbotName": chatbot_name,
|
489 |
+
"dataSourceName": file_name,
|
490 |
+
"sourceEndpoint": source_endpoint,
|
491 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
|
492 |
+
f"{file_name}_data.json")})
|
493 |
+
.execute()
|
494 |
+
)
|
495 |
+
response = create_success_response(200, {"output": "Successfully loaded the edited json."})
|
496 |
+
logger.info(f">>>Edited json loaded successfully for {vectorstore}.<<<")
|
497 |
+
|
498 |
+
return response
|
499 |
+
|
500 |
+
except Exception as e:
|
501 |
+
logger.error(f">>>Error in loadEditedJson: {e} for {vectorstore}.<<<")
|
502 |
+
raise_http_exception(500, "Internal Server Error")
|
503 |
+
|
504 |
+
|
505 |
+
@conversai_api_router.get("/list_chatbot_sources" )
|
506 |
+
async def list_chatbot_sources(vectorstore: str):
|
507 |
+
try:
|
508 |
+
logger.info(f">>>list_chatbot_sources API Triggered By {vectorstore}<<<")
|
509 |
+
|
510 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/list_chatbot_sources")
|
511 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
512 |
+
result = supabase_client.table("ConversAI_ChatbotDataSources").select("*").eq("username", username).eq(
|
513 |
+
"chatbotName",
|
514 |
+
chatbot_name).execute().data
|
515 |
+
|
516 |
+
response = create_success_response(200, {"output": result})
|
517 |
+
logger.info(f">>>Chatbot listed successfully for {vectorstore}.<<<")
|
518 |
+
return response
|
519 |
+
|
520 |
+
except Exception as e:
|
521 |
+
logger.error(f">>>Error in list_chatbot_sources: {e} for {vectorstore}.<<<")
|
522 |
+
raise_http_exception(500, "Internal Server Error")
|
523 |
+
|
524 |
+
|
525 |
+
@conversai_api_router.get("/get_data_source" )
|
526 |
+
async def get_data_source(vectorstore: str, source_url: str):
|
527 |
+
try:
|
528 |
+
logger.info(f">>>get_data_source API Triggered By {vectorstore}<<<")
|
529 |
+
|
530 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/get_data_source")
|
531 |
+
r = requests.get(source_url)
|
532 |
+
res = encode_to_base64(eval(r.content.decode("utf-8", errors="replace")))
|
533 |
+
|
534 |
+
response = create_success_response(200, {"output": res})
|
535 |
+
|
536 |
+
return response
|
537 |
+
|
538 |
+
except Exception as e:
|
539 |
+
logger.error(f">>>Error in get_data_source: {e} for {vectorstore}.<<<")
|
540 |
+
raise_http_exception(500, "Internal Server Error")
|
541 |
+
|
542 |
+
|
543 |
+
@conversai_api_router.post("/delete_chatbot_source" )
|
544 |
+
async def delete_chatbot_source(request: DeleteChatbotSourceRequest):
|
545 |
+
vectorstore, data_source_name = request.vectorstore, request.data_source_name
|
546 |
+
try:
|
547 |
+
|
548 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/delete_chatbot_source")
|
549 |
+
response = supabase_client.table("ConversAI_ChatbotDataSources").delete().eq("dataSourceName",
|
550 |
+
data_source_name).execute()
|
551 |
+
response = supabase_client.storage.from_('ConversAI').remove(f"{data_source_name}_data.json")
|
552 |
+
|
553 |
+
response = create_success_response(200, {"output": f"Successfully deleted the {data_source_name} data source."})
|
554 |
+
|
555 |
+
logger.info(f">>>Data source deleted successfully for {vectorstore}.<<<")
|
556 |
+
return response
|
557 |
+
|
558 |
+
|
559 |
+
except Exception as e:
|
560 |
+
logger.error(f">>>Error in delete_chatbot_source: {e} for {vectorstore}.<<<")
|
561 |
+
raise_http_exception(500, "Internal Server Error")
|
562 |
+
|
563 |
+
|
564 |
+
@conversai_api_router.post("/train_chatbot")
|
565 |
+
async def train_chatbot(request: TrainChatbotRequest):
|
566 |
+
vectorstore, url_sources = request.vectorstore, request.urls
|
567 |
+
logger.info(f">>>train_chatbot API Triggered By {vectorstore}<<<")
|
568 |
+
try:
|
569 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/train_chatbot")
|
570 |
+
texts = []
|
571 |
+
sources = []
|
572 |
+
fileTypes = [
|
573 |
+
supabase_client.table("ConversAI_ChatbotDataSources").select("sourceEndpoint").eq("sourceContentURL",
|
574 |
+
x).execute().data[0][
|
575 |
+
"sourceEndpoint"] for x in url_sources]
|
576 |
+
for source, fileType in zip(url_sources, fileTypes):
|
577 |
+
if ((fileType == "/text_pdf_extraction") | (fileType == "/image_pdf_text_extraction")):
|
578 |
+
logger.info(f"Source is {source}")
|
579 |
+
r = requests.get(source)
|
580 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
581 |
+
content = file["output"]
|
582 |
+
logger.info(f"content is {content}")
|
583 |
+
fileSource = file["source"]
|
584 |
+
texts.append(".".join(
|
585 |
+
[content[key] for key in content.keys()]).replace(
|
586 |
+
"\n", " "))
|
587 |
+
|
588 |
+
sources.append(fileSource)
|
589 |
+
elif fileType == "/add_text" or fileType == "/add_qa_pair":
|
590 |
+
r = requests.get(source)
|
591 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
592 |
+
content = file["output"]["text"]
|
593 |
+
fileSource = file["source"]
|
594 |
+
texts.append(content.replace("\n", " "))
|
595 |
+
sources.append(fileSource)
|
596 |
+
elif ((fileType == "/fetch_text/urls") | (fileType == "/youtube_transcript")):
|
597 |
+
r = requests.get(source)
|
598 |
+
file = eval(r.content.decode("utf-8", errors="replace"))
|
599 |
+
content = file["output"]
|
600 |
+
fileSource = file["source"]
|
601 |
+
texts.append(".".join(
|
602 |
+
[content[key] for key in content.keys()]).replace(
|
603 |
+
"\n", " "))
|
604 |
+
sources.append(fileSource)
|
605 |
+
else:
|
606 |
+
pass
|
607 |
+
texts = [(text, source) for text, source in zip(texts, sources)]
|
608 |
+
conversai_pipeline.add_document_(texts, vectorstore)
|
609 |
+
response = create_success_response(200, {"message": "Chatbot trained successfully."})
|
610 |
+
logger.info(f">>>Chatbot trained successfully for {vectorstore}.<<<")
|
611 |
+
|
612 |
+
return response
|
613 |
+
|
614 |
+
|
615 |
+
except Exception as e:
|
616 |
+
logger.error(f">>>Error in train_chatbot: {e} for {vectorstore}.<<<")
|
617 |
+
raise e
|
618 |
+
# raise_http_exception(500, "Internal Server Error")
|
619 |
+
|
620 |
+
|
621 |
+
@conversai_api_router.get("/activity_log" )
|
622 |
+
async def activity_log(username: str):
|
623 |
+
logger.info(f">>>activityLog API Triggered By {username}<<<")
|
624 |
+
try:
|
625 |
+
response = supabase_client.table("ConversAI_ActivityLog").select("*").eq("username", username).execute().data
|
626 |
+
|
627 |
+
logger.info(f">>>Activity log fetched successfully for {username}.<<<")
|
628 |
+
|
629 |
+
return response
|
630 |
+
except Exception as e:
|
631 |
+
logger.error(f">>>Error in activityLog: {e} for {username}.<<<")
|
632 |
+
raise_http_exception(500, "Internal Server Error")
|
633 |
+
|
634 |
+
|
635 |
+
@conversai_api_router.post("/new_chatbot" )
|
636 |
+
async def new_chatbot(request: NewChatbotRequest):
|
637 |
+
logger.info(f">>> new_chatbot API Triggered <<<")
|
638 |
+
try:
|
639 |
+
response = user_management.new_chatbot_(chatbot_name=request.chatbot_name, username=request.username)
|
640 |
+
logger.info(f">>> Chatbot created successfully for {request.username}.<<<")
|
641 |
+
return response
|
642 |
+
|
643 |
+
except Exception as e:
|
644 |
+
logger.error(f">>>Error in new_chatbot: {e} for {request.username}.<<<")
|
645 |
+
raise_http_exception(500, "Internal Server Error")
|
src/api/jewel_mirror.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi.routing import APIRouter
|
2 |
+
from fastapi.requests import Request
|
3 |
+
|
4 |
+
from src.jewel_mirror.jewel_mirror import GoldPricingJewelMirror
|
5 |
+
from src.models.apis_models import GoldPriceRequest
|
6 |
+
from src import logging as logger
|
7 |
+
|
8 |
+
goldprice_apirouter=APIRouter(tags=["GoldPrice"])
|
9 |
+
|
10 |
+
goldpricing=GoldPricingJewelMirror()
|
11 |
+
|
12 |
+
@goldprice_apirouter.post("/gold_price")
|
13 |
+
async def answer_goldprice(request:GoldPriceRequest,req:Request):
|
14 |
+
logger.info(f">>>Gold Price API Triggered <<<")
|
15 |
+
try:
|
16 |
+
response=goldpricing.response_query(query=request.query,llmModel=request.llm_model)
|
17 |
+
logger.info(f">>>Gold Price API Success<<<")
|
18 |
+
return response
|
19 |
+
except Exception as e:
|
20 |
+
logger.error(f">>>Error in Gold Price API: {e} <<<")
|
21 |
+
raise e
|
src/api/jwt_bearer.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-09-03
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
from fastapi import Depends
|
7 |
+
from supabase import create_client
|
8 |
+
from src import logging as logger
|
9 |
+
from src.utils.error_handling import create_error_response, raise_http_exception
|
10 |
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
11 |
+
|
12 |
+
security = HTTPBearer()
|
13 |
+
|
14 |
+
supabase_client = create_client(
|
15 |
+
os.getenv("SUPABASE_URL"),
|
16 |
+
os.getenv("SUPABASE_KEY")
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
async def access_check_bearer(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
21 |
+
access_token = credentials.credentials
|
22 |
+
try:
|
23 |
+
supabase_client.auth.get_user(access_token)
|
24 |
+
|
25 |
+
except Exception as e:
|
26 |
+
logger.info(f">>> Invalid access token {e}<<<")
|
27 |
+
raise_http_exception(code=401,
|
28 |
+
message="Invalid Access Token", details=[
|
29 |
+
{"info": "Invalid access token or access token expired please login again"}])
|
src/api/speech_api.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
import tempfile
|
7 |
+
from fastapi import Form
|
8 |
+
from fastapi import UploadFile, HTTPException, status
|
9 |
+
from src.models.apis_models import TextToSpeechRequest
|
10 |
+
from fastapi.routing import APIRouter
|
11 |
+
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
|
12 |
+
from src import logging as logger
|
13 |
+
from src.utils.error_handling import create_success_response, raise_http_exception
|
14 |
+
|
15 |
+
speech_translator_router = APIRouter(tags=["SpeechTranscription"])
|
16 |
+
pipeline = SpeechTranscriptionPipeline()
|
17 |
+
|
18 |
+
|
19 |
+
@speech_translator_router.post(
|
20 |
+
"/text_to_speech",
|
21 |
+
)
|
22 |
+
async def text_to_speech(request: TextToSpeechRequest):
|
23 |
+
logger.info(f">>>text_to_speech API Triggered <<<")
|
24 |
+
try:
|
25 |
+
audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
|
26 |
+
if not audio_bytes:
|
27 |
+
raise ValueError("Audio generation failed.")
|
28 |
+
response = create_success_response(code=200, data={"audio": audio_bytes})
|
29 |
+
logger.info(f">>>text_to_speech API success <<<")
|
30 |
+
return response
|
31 |
+
except ValueError as ve:
|
32 |
+
logger.info(f">>>text_to_speech API failed {ve}<<<")
|
33 |
+
raise_http_exception(code=400, message="Text to speech failed")
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
logger.error(f">>> Error processing text-to-speech {e}<<<")
|
37 |
+
raise_http_exception(code=500, message="Internal server error")
|
38 |
+
|
39 |
+
|
40 |
+
@speech_translator_router.post(
|
41 |
+
"/speech_to_text",
|
42 |
+
|
43 |
+
)
|
44 |
+
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
|
45 |
+
logger.info(f">>>speech_to_text API Triggered <<<")
|
46 |
+
try:
|
47 |
+
audio_bytes = await audio.read()
|
48 |
+
if not audio_bytes:
|
49 |
+
logger.error(f">>> Empty audio file <<<")
|
50 |
+
raise ValueError("Empty audio file")
|
51 |
+
except Exception as e:
|
52 |
+
logger.error(f">>> Invalid audio file {e}<<<")
|
53 |
+
raise HTTPException(
|
54 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
55 |
+
detail="Invalid audio file"
|
56 |
+
)
|
57 |
+
|
58 |
+
try:
|
59 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
|
60 |
+
temp_audio_file.write(audio_bytes)
|
61 |
+
temp_audio_file_path = temp_audio_file.name
|
62 |
+
except Exception as e:
|
63 |
+
logger.error(f">>> Error creating temporary file{e} <<<")
|
64 |
+
raise_http_exception(code=500, message="Internal server error")
|
65 |
+
|
66 |
+
try:
|
67 |
+
transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
|
68 |
+
response = create_success_response(code=200, data={"transcript": transcript})
|
69 |
+
logger.info(f">>>speech_to_text API success <<<")
|
70 |
+
|
71 |
+
return response
|
72 |
+
|
73 |
+
except FileNotFoundError:
|
74 |
+
logger.error(f">>> Temporary file not found <<<")
|
75 |
+
raise HTTPException(
|
76 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
77 |
+
detail="Temporary file not found"
|
78 |
+
)
|
79 |
+
except Exception as e:
|
80 |
+
logger.error(f">>> Error processing speech-to-text {e}<<<")
|
81 |
+
raise_http_exception(code=500, message="Error processing speech-to-text")
|
82 |
+
|
83 |
+
finally:
|
84 |
+
if os.path.exists(temp_audio_file_path):
|
85 |
+
os.remove(temp_audio_file_path)
|
src/api/user_management_api.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from fastapi import Depends
|
6 |
+
from src import logging as logger
|
7 |
+
from src.models.apis_models import *
|
8 |
+
from fastapi.routing import APIRouter
|
9 |
+
from src.api.jwt_bearer import access_check_bearer, supabase_client
|
10 |
+
from src.pipeline.user_management_pipeline import SupabaseUserManagementPipeline
|
11 |
+
from src.services.supabase.analytics.analytic_tables import track_usage
|
12 |
+
from src.utils.error_handling import raise_http_exception, create_success_response, create_error_response
|
13 |
+
|
14 |
+
user_management_api_router = APIRouter(tags=["User Management"])
|
15 |
+
|
16 |
+
user_management = SupabaseUserManagementPipeline()
|
17 |
+
|
18 |
+
|
19 |
+
@user_management_api_router.post("/user_signup")
|
20 |
+
async def user_signup(request: UserSignupRequest):
|
21 |
+
logger.info(f">>>user_signup API Triggered <<<")
|
22 |
+
response = user_management.user_signup_(username=request.username, email=request.email, password=request.password)
|
23 |
+
logger.info(f">>>user_signup API Success<<<")
|
24 |
+
|
25 |
+
return response
|
26 |
+
|
27 |
+
|
28 |
+
@user_management_api_router.post("/user_signin")
|
29 |
+
async def user_signin(request: UserSigninRequest):
|
30 |
+
logger.info(f">>>user_signin API Triggered <<<")
|
31 |
+
|
32 |
+
response = user_management.user_signin_(email=request.email, password=request.password)
|
33 |
+
if response != None:
|
34 |
+
logger.info(f">>>user_signin API Success.<<<")
|
35 |
+
return response
|
36 |
+
else:
|
37 |
+
logger.info(f">>> Email or password is incorrect please try again.<<<")
|
38 |
+
response = create_error_response(400, "Email or password is incorrect please try again.")
|
39 |
+
return response
|
40 |
+
|
41 |
+
|
42 |
+
@user_management_api_router.post("/get_user_data")
|
43 |
+
async def get_user_data(request: GetUserDataRequest):
|
44 |
+
logger.info(f">>>get_user_data API Triggered <<<")
|
45 |
+
response = user_management.get_user_data_(access_token=request.access_token)
|
46 |
+
return response
|
47 |
+
|
48 |
+
|
49 |
+
@user_management_api_router.post("/login_with_access_token")
|
50 |
+
async def login_with_access_token(request: LoginWithAccessTokenRequest):
|
51 |
+
logger.info(f">>>login_with_access_token API Triggered <<<")
|
52 |
+
|
53 |
+
response = user_management.login_with_access_token_(access_token=request.access_token,
|
54 |
+
refresh_token=request.refresh_token)
|
55 |
+
logger.info(f">>>login_with_access_token API Success<<<")
|
56 |
+
return response
|
57 |
+
|
58 |
+
|
59 |
+
@user_management_api_router.post("/set_session_data")
|
60 |
+
async def set_session_data(request: SetSessionDataRequest):
|
61 |
+
logger.info(f">>> set_session_data API Triggered <<<")
|
62 |
+
|
63 |
+
response = user_management.set_session_data_(access_token=request.access_token, refresh_token=request.refresh_token,
|
64 |
+
user_id=request.user_id)
|
65 |
+
return response
|
66 |
+
|
67 |
+
|
68 |
+
@user_management_api_router.post("/sign_out")
|
69 |
+
async def sign_out():
|
70 |
+
logger.info(f">>> sign_out API Triggered <<<")
|
71 |
+
|
72 |
+
response = user_management.sign_out_()
|
73 |
+
logger.info(f">>>sign_out API Success<<<")
|
74 |
+
return response
|
75 |
+
|
76 |
+
|
77 |
+
@user_management_api_router.post("/oauth_signin")
|
78 |
+
async def oauth_signin():
|
79 |
+
logger.info(f">>> oauth_signin API Triggered <<<")
|
80 |
+
response = user_management.oauth_signin_()
|
81 |
+
logger.info(f">>>oauth_signin API Success<<<")
|
82 |
+
return response
|
83 |
+
|
84 |
+
|
85 |
+
@user_management_api_router.post("/check_session")
|
86 |
+
async def check_session():
|
87 |
+
logger.info(f">>>check_session API Triggered <<<")
|
88 |
+
|
89 |
+
response = user_management.check_session_()
|
90 |
+
return response
|
91 |
+
|
92 |
+
|
93 |
+
@user_management_api_router.get("/get_public_chatbot")
|
94 |
+
async def get_public_chatbots():
|
95 |
+
logger.info(f">>>get_public_chatbot API Triggered<<<")
|
96 |
+
try:
|
97 |
+
response = supabase_client.table("ConversAI_ChatbotInfo").select("*").eq("isPrivate", False).execute().data
|
98 |
+
logger.info(f">>>Public chatbots fetched successfully.<<<")
|
99 |
+
return response
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f">>>Error in get_public_chatbot: {e}<<<")
|
102 |
+
raise_http_exception(500, "Internal Server Error")
|
103 |
+
|
104 |
+
|
105 |
+
@user_management_api_router.post("/public_private_check")
|
106 |
+
async def public_or_private(request: PublicPrivateCheckRequest):
|
107 |
+
vectorstore, mode = request.vectorstore, request.mode
|
108 |
+
logger.info(f">>>public_private_check API Triggered for {vectorstore}.<<<")
|
109 |
+
try:
|
110 |
+
track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/public_private_check")
|
111 |
+
username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
112 |
+
if len(mode) == 0:
|
113 |
+
value = (
|
114 |
+
supabase_client.table("ConversAI_ChatbotInfo")
|
115 |
+
.select("isPrivate")
|
116 |
+
.eq("user_id", username)
|
117 |
+
.eq("chatbotname", chatbot_name)
|
118 |
+
.execute()
|
119 |
+
)
|
120 |
+
value = value.data[0]["isPrivate"]
|
121 |
+
response = create_success_response(200, {"output": value})
|
122 |
+
else:
|
123 |
+
response = (
|
124 |
+
supabase_client.table("ConversAI_ChatbotInfo")
|
125 |
+
.update({"isPrivate": mode})
|
126 |
+
.eq("user_id", username)
|
127 |
+
.eq("chatbotname", chatbot_name)
|
128 |
+
.execute()
|
129 |
+
)
|
130 |
+
response = create_success_response(200, {"output": response})
|
131 |
+
logger.info(f">>>Public/Private check successful for {vectorstore}.<<<")
|
132 |
+
return response
|
133 |
+
|
134 |
+
|
135 |
+
except Exception as e:
|
136 |
+
logger.error(f">>>Error in public_private_check: {e} for {vectorstore}.<<<")
|
137 |
+
raise_http_exception(500, "Internal Server Error")
|
138 |
+
|
139 |
+
|
140 |
+
@user_management_api_router.post("/refresh_session", dependencies=[Depends(access_check_bearer)])
|
141 |
+
async def refresh_session(request: RefreshSessionRequest):
|
142 |
+
logger.info(f">>>refresh_session API Triggered <<<")
|
143 |
+
response = user_management.refresh_session__(refresh_token=request.refresh_token)
|
144 |
+
logger.info(f">>>refresh token fetched successfully.<<<")
|
145 |
+
|
146 |
+
return response
|
147 |
+
|
148 |
+
|
149 |
+
@user_management_api_router.post("/username_creation_oauth", dependencies=[Depends(access_check_bearer)])
|
150 |
+
async def username_creation_oauth(request: UsernameCreationOauthRequest):
|
151 |
+
logger.info(f">>> username_creation_oauth API Triggered <<<")
|
152 |
+
|
153 |
+
response = user_management.username_creation_oauth_(username=request.username, user_id=request.user_id,
|
154 |
+
email=request.email)
|
155 |
+
|
156 |
+
logger.info(f">>>username creation successful.<<<")
|
157 |
+
return response
|
src/jewel_mirror/__init__.py
ADDED
File without changes
|
src/jewel_mirror/jewel_mirror.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Todo: class and object (gold_price) ,
|
2 |
+
from langchain_community.document_loaders import WebBaseLoader
|
3 |
+
from langchain.schema import Document
|
4 |
+
from langchain.prompts import PromptTemplate
|
5 |
+
from langchain_groq import ChatGroq
|
6 |
+
from langchain.chains.question_answering import load_qa_chain
|
7 |
+
|
8 |
+
|
9 |
+
class GoldPricingJewelMirror:
|
10 |
+
def __init__(self):
|
11 |
+
self.template=(
|
12 |
+
"You are an elite financial analyst with extensive expertise in finance and investment strategies. "
|
13 |
+
"Provide a **direct and concise** answer to the question below **without** using any introductory phrases such as "
|
14 |
+
"'based on the context provided,' 'according to the information given,' or similar expressions. "
|
15 |
+
"Your response must rely solely on the information from the provided context and should **not** include any additional commentary, explanations, or opinions.\n\n"
|
16 |
+
"📊 **Context:**\n{context}\n\n"
|
17 |
+
"❓ **Question:**\n{question}\n\n"
|
18 |
+
"**✅ Answer:**"
|
19 |
+
)
|
20 |
+
|
21 |
+
def fetch_gold_price(self):
|
22 |
+
loader = WebBaseLoader("https://groww.in/gold-rates")
|
23 |
+
documents = loader.load()
|
24 |
+
content = " ".join([doc.page_content for doc in documents])
|
25 |
+
return content
|
26 |
+
def response_query(self,query:str,llmModel: str = "llama-3.1-70b-versatile"):
|
27 |
+
gold_price_context = self.fetch_gold_price()
|
28 |
+
gold_document = Document(page_content=gold_price_context, metadata={"source": "GoldReturns"})
|
29 |
+
custom_prompt = PromptTemplate(
|
30 |
+
input_variables=["context", "question"],
|
31 |
+
template=(
|
32 |
+
"You are an elite financial analyst with extensive expertise in finance and investment strategies. "
|
33 |
+
"Provide a **direct and concise** answer to the question below **without** using any introductory phrases such as "
|
34 |
+
"'based on the context provided,' 'according to the information given,' or similar expressions. "
|
35 |
+
"Your response must rely solely on the information from the provided context and should **not** include any additional commentary, explanations, or opinions.\n\n"
|
36 |
+
"📊 **Context:**\n{context}\n\n"
|
37 |
+
"❓ **Question:**\n{question}\n\n"
|
38 |
+
"**✅ Answer:**"
|
39 |
+
)
|
40 |
+
)
|
41 |
+
llm = ChatGroq(temperature=0, model_name=llmModel)
|
42 |
+
chain=custom_prompt | llm
|
43 |
+
result = chain.invoke({
|
44 |
+
"context": [gold_document],
|
45 |
+
"question": query
|
46 |
+
})
|
47 |
+
return result.content
|
48 |
+
|
49 |
+
if __name__=="__main__":
|
50 |
+
gold_price_jewel_mirror = GoldPricingJewelMirror()
|
51 |
+
query = "What is the current gold price of bangalore?"
|
52 |
+
result= gold_price_jewel_mirror.response_query(query)
|
53 |
+
print(result)
|
54 |
+
|
55 |
+
# future_use: For grading
|
56 |
+
|
57 |
+
# class gradeIntent(BaseModel):
|
58 |
+
#binary_score: str = Field(description="The intent related to gold pricing 'yes' or 'no'")
|
59 |
+
#
|
60 |
+
# self.intent_prompt = ChatPromptTemplate.from_template(
|
61 |
+
# template=(
|
62 |
+
# "You are an intelligent assistant responsible for evaluating whether a user's query pertains to pricing or price-related topics, "
|
63 |
+
# "specifically about commodities such as gold, silver, or other market prices.\n\n"
|
64 |
+
# "User query: {query}\n\n"
|
65 |
+
# "Does this query relate to gold pricing or similar price-related topics? Respond with '1' for yes and '0' for no. "
|
66 |
+
# "Your response must be exactly one character long, either '0' or '1'. Do not include any other text."
|
67 |
+
# )
|
68 |
+
# )
|
69 |
+
# self.intent_grading_chain = self.intent_prompt | self.llm
|
src/llms/__init__.py
ADDED
File without changes
|
src/models/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
src/models/apis_models.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
5 |
+
from pydantic import BaseModel, EmailStr
|
6 |
+
from typing import List, Optional
|
7 |
+
|
8 |
+
|
9 |
+
## ---------------------------------- SpeechTranscription API Models ----------------------------------
|
10 |
+
class TextToSpeechRequest(BaseModel):
|
11 |
+
text: str
|
12 |
+
lang: str
|
13 |
+
tld: str
|
14 |
+
|
15 |
+
|
16 |
+
class SpeechToTextRequest(BaseModel):
|
17 |
+
lang: str
|
18 |
+
|
19 |
+
|
20 |
+
## ---------------------------------- Chatbot API Models ----------------------------------
|
21 |
+
|
22 |
+
class AddTextRequest(BaseModel):
|
23 |
+
vectorstore: str
|
24 |
+
text: str
|
25 |
+
|
26 |
+
|
27 |
+
class AddWebsiteRequest(BaseModel):
|
28 |
+
website_urls: List[str]
|
29 |
+
vectorstore: str
|
30 |
+
source: str
|
31 |
+
|
32 |
+
|
33 |
+
class AnswerQueryRequest(BaseModel):
|
34 |
+
query: str
|
35 |
+
vectorstore: str
|
36 |
+
llm_model: str = "llama3-70b-8192"
|
37 |
+
|
38 |
+
|
39 |
+
class DataAnalyzerRequest(BaseModel):
|
40 |
+
query: str
|
41 |
+
|
42 |
+
|
43 |
+
class GetLinksRequest(BaseModel):
|
44 |
+
url: str
|
45 |
+
|
46 |
+
|
47 |
+
class YoutubeTranscriptRequest(BaseModel):
|
48 |
+
vectorstore: str
|
49 |
+
urls: List[str]
|
50 |
+
|
51 |
+
|
52 |
+
class WebsiteUrlTextExtractionRequest(BaseModel):
|
53 |
+
url: str
|
54 |
+
|
55 |
+
|
56 |
+
class WebsiteUrlTextExtractionListRequest(BaseModel):
|
57 |
+
urls: List[str]
|
58 |
+
|
59 |
+
|
60 |
+
class GetCurrentCountRequest(BaseModel):
|
61 |
+
vectorstore: str
|
62 |
+
|
63 |
+
|
64 |
+
class ListChatbotsRequest(BaseModel):
|
65 |
+
username: str
|
66 |
+
|
67 |
+
|
68 |
+
class GetChatHistoryRequest(BaseModel):
|
69 |
+
vectorstore: str
|
70 |
+
|
71 |
+
|
72 |
+
class ChatHistoryItem(BaseModel):
|
73 |
+
timestamp: str
|
74 |
+
question: str
|
75 |
+
response: str
|
76 |
+
|
77 |
+
|
78 |
+
class DeleteChatbotRequest(BaseModel):
|
79 |
+
vectorstore: str
|
80 |
+
|
81 |
+
|
82 |
+
class AddQAPairRequest(BaseModel):
|
83 |
+
vectorstore: str
|
84 |
+
question: str
|
85 |
+
answer: str
|
86 |
+
|
87 |
+
|
88 |
+
class TrainChatbotRequest(BaseModel):
|
89 |
+
vectorstore: str
|
90 |
+
urls: list[str]
|
91 |
+
|
92 |
+
|
93 |
+
class LoadPDFRequest(BaseModel):
|
94 |
+
vectorstore: str
|
95 |
+
|
96 |
+
|
97 |
+
class LoadEditedJson(BaseModel):
|
98 |
+
vectorstore: str
|
99 |
+
data_source_name: str
|
100 |
+
source_endpoint: str
|
101 |
+
json_data: dict
|
102 |
+
|
103 |
+
|
104 |
+
class PublicPrivateCheckRequest(BaseModel):
|
105 |
+
vectorstore: str
|
106 |
+
mode: str | None = None
|
107 |
+
|
108 |
+
|
109 |
+
class DeleteChatbotSourceRequest(BaseModel):
|
110 |
+
vectorstore: str
|
111 |
+
data_source_name: str
|
112 |
+
|
113 |
+
|
114 |
+
## ---------------------------------- User Management API Models ----------------------------------
|
115 |
+
|
116 |
+
class UserSignupRequest(BaseModel):
|
117 |
+
username: str
|
118 |
+
email: EmailStr
|
119 |
+
password: str
|
120 |
+
|
121 |
+
|
122 |
+
class UserSigninRequest(BaseModel):
|
123 |
+
email: EmailStr
|
124 |
+
password: str
|
125 |
+
|
126 |
+
|
127 |
+
class CheckSessionRequest(BaseModel):
|
128 |
+
user_id: str
|
129 |
+
|
130 |
+
|
131 |
+
class GetUserDataRequest(BaseModel):
|
132 |
+
access_token: str
|
133 |
+
|
134 |
+
|
135 |
+
class RefreshSessionRequest(BaseModel):
|
136 |
+
refresh_token: str
|
137 |
+
|
138 |
+
|
139 |
+
class LoginWithAccessTokenRequest(BaseModel):
|
140 |
+
access_token: str
|
141 |
+
refresh_token: str
|
142 |
+
|
143 |
+
|
144 |
+
class UsernameCreationOauthRequest(BaseModel):
|
145 |
+
username: str
|
146 |
+
user_id: str
|
147 |
+
email: str
|
148 |
+
|
149 |
+
|
150 |
+
class SetSessionDataRequest(BaseModel):
|
151 |
+
access_token: str
|
152 |
+
refresh_token: str
|
153 |
+
user_id: str
|
154 |
+
|
155 |
+
|
156 |
+
class SignOutRequest(BaseModel):
|
157 |
+
user_id: str
|
158 |
+
|
159 |
+
|
160 |
+
class NewChatbotRequest(BaseModel):
|
161 |
+
chatbot_name: str
|
162 |
+
username: str
|
163 |
+
|
164 |
+
|
165 |
+
## ---------------------------------- Analytics API Models ----------------------------------
|
166 |
+
|
167 |
+
|
168 |
+
class FeedbackRequest(BaseModel):
|
169 |
+
feedback: str
|
170 |
+
user_id: str
|
171 |
+
vectorstore: Optional[str]
|
172 |
+
|
173 |
+
|
174 |
+
class UserSatisfactionRateRequest(BaseModel):
|
175 |
+
start_date: Optional[str]
|
176 |
+
end_date: Optional[str]
|
177 |
+
vectorstore: Optional[str]
|
178 |
+
|
179 |
+
|
180 |
+
class TokenUsageRequest(BaseModel):
|
181 |
+
start_date: Optional[str]
|
182 |
+
end_date: Optional[str]
|
183 |
+
vectorstore: Optional[str]
|
184 |
+
|
185 |
+
|
186 |
+
class AverageSessionInteractionRequest(BaseModel):
|
187 |
+
start_date: Optional[str]
|
188 |
+
end_date: Optional[str]
|
189 |
+
vectorstore: Optional[str]
|
190 |
+
|
191 |
+
|
192 |
+
class DailyActiveEndUserRequest(BaseModel):
|
193 |
+
start_date: Optional[str]
|
194 |
+
end_date: Optional[str]
|
195 |
+
vectorstore: Optional[str]
|
196 |
+
|
197 |
+
|
198 |
+
class DailyChatCountRequest(BaseModel):
|
199 |
+
start_date: Optional[str]
|
200 |
+
end_date: Optional[str]
|
201 |
+
vectorstore: Optional[str]
|
202 |
+
|
203 |
+
##---------------------GOldPr---------------------------
|
204 |
+
|
205 |
+
class GoldPriceRequest(BaseModel):
|
206 |
+
query:str
|
207 |
+
llm_model:str="llama3-70b-8192"
|
src/models/response_handling_models.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-28
|
4 |
+
"""
|
5 |
+
from typing import Any, Dict
|
6 |
+
|
7 |
+
from pydantic import BaseModel
|
8 |
+
|
9 |
+
|
10 |
+
## ---------------------------------- Response Handling API Models ----------------------------------
|
11 |
+
class SuccessResponse(BaseModel):
|
12 |
+
status: str
|
13 |
+
code: int
|
14 |
+
data: Dict[str, Any]
|
15 |
+
|
16 |
+
|
17 |
+
class ErrorResponse(BaseModel):
|
18 |
+
status: str
|
19 |
+
code: int
|
20 |
+
error: Dict[str, Any]
|
21 |
+
|
22 |
+
|
23 |
+
class SuccessResponseUsermanagement(BaseModel):
|
24 |
+
status: str
|
25 |
+
code: int
|
26 |
+
message: str
|
27 |
+
data: Dict[str, Any]
|
src/models/utls.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-29
|
4 |
+
"""
|
5 |
+
from pydantic import BaseModel, Field
|
6 |
+
|
7 |
+
|
8 |
+
class FollowUps(BaseModel):
|
9 |
+
q1: str = Field(description="First Follow-up Question")
|
10 |
+
q2: str = Field(description="Second Follow-up Question")
|
11 |
+
q3: str = Field(description="Third Follow-up Question")
|
src/pipeline/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
src/pipeline/conversai_analytic_pipeline.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-28
|
4 |
+
"""
|
5 |
+
import requests
|
6 |
+
|
7 |
+
from src.api.conversai_api import supabase_client
|
8 |
+
from src.services.supabase.analytics.analytic_tables import feedback_table, chat_history_table, add_feedback, \
|
9 |
+
track_usage
|
10 |
+
|
11 |
+
|
12 |
+
class ConversAIAnalyticPipeline:
|
13 |
+
def __init__(self):
|
14 |
+
self.supabase_client = supabase_client
|
15 |
+
|
16 |
+
def feedback_table_(self, vectorstore):
|
17 |
+
return feedback_table(self.supabase_client, vectorstore)
|
18 |
+
|
19 |
+
def chat_history_table_(self, vectorstore):
|
20 |
+
return chat_history_table(self.supabase_client, vectorstore)
|
21 |
+
|
22 |
+
def add_feedback_(self, feedback, user_id, city, client_ip, vectorstore):
|
23 |
+
return add_feedback(self.supabase_client, feedback, user_id, city, client_ip, vectorstore)
|
24 |
+
|
25 |
+
def track_usage_(self, vectorstore: str, endpoint: str):
|
26 |
+
return track_usage(supabase_client=self.supabase_client, vectorstore=vectorstore, endpoint=endpoint)
|
27 |
+
|
28 |
+
def get_ip_info(self, ip: str):
|
29 |
+
try:
|
30 |
+
response = requests.get(f"https://ipinfo.io/{ip}/json")
|
31 |
+
data = response.json()
|
32 |
+
return data.get("city", "Unknown")
|
33 |
+
except Exception as e:
|
34 |
+
return "Unknown"
|
src/pipeline/conversai_pipeline.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-02
|
4 |
+
"""
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate
|
6 |
+
from langchain_core.prompts import PromptTemplate
|
7 |
+
from src.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
|
8 |
+
from src.services.document.add_document import AddDocument
|
9 |
+
from src.services.answer_query.answerquery import AnswerQuery
|
10 |
+
from src.services.file_analyzer.data_analyzer import DataAnalyzer
|
11 |
+
from src.services.get_links.web_scraper import WebScraper
|
12 |
+
# from src.services.ocr.easy_ocr.easy_ocr_ import EasyOCR_ as OCRService
|
13 |
+
from src.services.pdf_extraction.image_pdf.image_pdf_text_extraction import get_text_from_image_pdf
|
14 |
+
from src.services.pdf_extraction.text_pdf.text_pdf_extraction import extract_text_from_pdf
|
15 |
+
from src.services.video_transcription.youtube_video_transcription.youtube_video_transcript import \
|
16 |
+
youtube_video_transcription
|
17 |
+
from src.services.website_url.text_extraction_urls import extract_text_from_url_list, extract_text_from_url
|
18 |
+
from src.utils.utils import json_parser
|
19 |
+
from src.prompts.custom_prompts import _custom_prompts
|
20 |
+
from src.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR as OCRService
|
21 |
+
from src.services.embeddings.jina_embeddings import jina_embedding
|
22 |
+
|
23 |
+
class ConversAIPipeline:
|
24 |
+
def __init__(self):
|
25 |
+
prompt_template=_custom_prompts["RAG_ANSWER_PROMPT"]
|
26 |
+
follow_up_prompt_template = _custom_prompts["FOLLOW_UP_PROMPT"]
|
27 |
+
prompt = ChatPromptTemplate.from_template(prompt_template)
|
28 |
+
json_parser_ = json_parser()
|
29 |
+
follow_up_prompt = PromptTemplate(
|
30 |
+
template=follow_up_prompt_template,
|
31 |
+
input_variables=["context"],
|
32 |
+
partial_variables={"format_instructions": json_parser_.get_format_instructions()},
|
33 |
+
)
|
34 |
+
self.vector_embedding = jina_embedding()
|
35 |
+
self.sparse_embedding = qdrant_bm25_embedding()
|
36 |
+
self.add_document_service = AddDocument(self.vector_embedding, self.sparse_embedding)
|
37 |
+
|
38 |
+
self.answer_query_service = AnswerQuery(vector_embedding=self.vector_embedding,
|
39 |
+
sparse_embedding=self.sparse_embedding, prompt=prompt,
|
40 |
+
follow_up_prompt=follow_up_prompt, json_parser=json_parser_)
|
41 |
+
self.data_analyzer = DataAnalyzer()
|
42 |
+
self.get_website_links = WebScraper()
|
43 |
+
self.ocr_service = OCRService()
|
44 |
+
|
45 |
+
def add_document_(self, texts: list[tuple[str]], vectorstore: str):
|
46 |
+
return self.add_document_service.add_documents(texts=texts, vectorstore=vectorstore)
|
47 |
+
|
48 |
+
def answer_query_(self, query: str, vectorstore: str, llm_model: str = "llama-3.1-70b-versatile"):
|
49 |
+
output, follow_up_questions, source = self.answer_query_service.answer_query(query=query,
|
50 |
+
vectorstore=vectorstore,
|
51 |
+
llmModel=llm_model)
|
52 |
+
return output, follow_up_questions, source
|
53 |
+
|
54 |
+
def data_analyzer_(self, query: str, dataframe):
|
55 |
+
return self.data_analyzer.analyze_data(query=query, dataframe=dataframe)
|
56 |
+
|
57 |
+
def get_links_(self, url: str, timeout: int):
|
58 |
+
return self.get_website_links.get_links(url=url, timeout=timeout)
|
59 |
+
|
60 |
+
def image_pdf_text_extraction_(self, image_pdf: bytes):
|
61 |
+
return get_text_from_image_pdf(pdf_bytes=image_pdf)
|
62 |
+
|
63 |
+
def text_pdf_extraction_(self, pdf: str):
|
64 |
+
return extract_text_from_pdf(pdf_path=pdf)
|
65 |
+
|
66 |
+
def youtube_transcript_(self, url: list):
|
67 |
+
return youtube_video_transcription(youtube_video_url=url)
|
68 |
+
|
69 |
+
def website_url_text_extraction_(self, url: str):
|
70 |
+
return extract_text_from_url(url=url)
|
71 |
+
|
72 |
+
def website_url_text_extraction_list_(self, urls: list):
|
73 |
+
return extract_text_from_url_list(urls=urls)
|
src/pipeline/speech_transcription_pipeline.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
5 |
+
from src.services.speech_to_text.speech_to_text_replicate import SpeechToTextReplicate
|
6 |
+
from src.services.text_to_speech.text_to_speech_gtts import TextToSpeech
|
7 |
+
|
8 |
+
|
9 |
+
class SpeechTranscriptionPipeline:
|
10 |
+
def __init__(self):
|
11 |
+
self.speech_to_text_ = SpeechToTextReplicate()
|
12 |
+
self.text_to_speech_ = TextToSpeech()
|
13 |
+
|
14 |
+
def text_to_speech(self, text: str, lang: str, tld: str) -> str:
|
15 |
+
speech = self.text_to_speech_.conversion(text, lang, tld)
|
16 |
+
return speech
|
17 |
+
|
18 |
+
def speech_to_text(self, audio, lang: str) -> str:
|
19 |
+
transcript_with_timestamp, transcript = self.speech_to_text_.transcribe_audio(audio=audio, language=lang)
|
20 |
+
return transcript
|
src/pipeline/user_management_pipeline.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from src.services.supabase.user_management.user_service import UserManagement
|
6 |
+
from src.services.supabase.user_management.chatbot_management import SupabaseChatoBotManagement
|
7 |
+
from src.services.supabase.conversai_setup.conversai_user_db_setup import ConversAIUserDBSetup
|
8 |
+
from src.services.supabase.user_management.chat_history import get_chat_history
|
9 |
+
from supabase.client import create_client
|
10 |
+
from qdrant_client import QdrantClient
|
11 |
+
import os
|
12 |
+
|
13 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
14 |
+
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
15 |
+
|
16 |
+
QDRANT_URL = os.getenv("QDRANT_URL")
|
17 |
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
18 |
+
|
19 |
+
|
20 |
+
class SupabaseUserManagementPipeline:
|
21 |
+
def __init__(self):
|
22 |
+
self.supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
23 |
+
self.qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
|
24 |
+
self.user_management = UserManagement(supabase_client=self.supabase_client)
|
25 |
+
self.chatbot_management = SupabaseChatoBotManagement(supabase_client=self.supabase_client,
|
26 |
+
qdrant_client=self.qdrant_client)
|
27 |
+
self.user_db_setup = ConversAIUserDBSetup(supabase_client=self.supabase_client)
|
28 |
+
|
29 |
+
def user_signup_(self, username: str, email: str, password: str) -> dict:
|
30 |
+
return self.user_management.user_signup(username=username, email=email, password=password)
|
31 |
+
|
32 |
+
def user_signin_(self, email: str, password: str) -> dict:
|
33 |
+
return self.user_management.user_signin(email=email, password=password)
|
34 |
+
|
35 |
+
def check_session_(self) -> dict:
|
36 |
+
return self.user_management.check_session()
|
37 |
+
|
38 |
+
def get_user_data_(self, access_token: str) -> dict:
|
39 |
+
return self.user_management.get_user_data(access_token=access_token)
|
40 |
+
|
41 |
+
def refresh_session__(self, refresh_token: str) -> dict:
|
42 |
+
return self.user_management.refresh_session_(refresh_token=refresh_token)
|
43 |
+
|
44 |
+
def login_with_access_token_(self, access_token: str, refresh_token: str) -> dict:
|
45 |
+
return self.user_management.login_with_access_token(access_token=access_token, refresh_token=refresh_token)
|
46 |
+
|
47 |
+
def username_creation_oauth_(self, username: str, user_id: str, email: str):
|
48 |
+
return self.user_management.user_name_creation_oauth(user_id=user_id, username=username, email=email)
|
49 |
+
|
50 |
+
def set_session_data_(self, access_token: str, refresh_token: str, user_id: str):
|
51 |
+
return self.user_management.set_session_data(access_token=access_token, refresh_token=refresh_token,
|
52 |
+
user_id=user_id)
|
53 |
+
|
54 |
+
def sign_out_(self):
|
55 |
+
return self.user_management.sign_out_()
|
56 |
+
|
57 |
+
def oauth_signin_(self) -> dict:
|
58 |
+
return self.user_management.oauth()
|
59 |
+
|
60 |
+
def new_chatbot_(self, chatbot_name: str, username: str):
|
61 |
+
return self.chatbot_management.new_chatbot(chatbot_name=chatbot_name, username=username)
|
62 |
+
|
63 |
+
def get_chat_history_(self, vectorstore: str):
|
64 |
+
return get_chat_history(vectorstore=vectorstore, supabase_client=self.supabase_client)
|
65 |
+
|
66 |
+
def delete_table(self, table_name: str):
|
67 |
+
return self.chatbot_management.delete_table(table_name=table_name)
|
68 |
+
|
69 |
+
def list_tables(self, username: str):
|
70 |
+
return self.chatbot_management.list_tables(username=username)
|
71 |
+
|
72 |
+
def create_data_source_name(self, source_name: str, username: str):
|
73 |
+
return self.chatbot_management.create_data_source_name(source_name=source_name, username=username)
|
74 |
+
|
75 |
+
def delete_qdrant_cluster(self, vectorstorename):
|
76 |
+
self.qdrant_client.delete_collection(collection_name=vectorstorename)
|
77 |
+
|
78 |
+
if __name__ =="__main__":
|
79 |
+
pipeline = SupabaseUserManagementPipeline()
|
80 |
+
pipeline.new_chatbot_(chatbot_name="anything",username="techconsp")
|
src/prompts/__init__.py
ADDED
File without changes
|
src/prompts/custom_prompts.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
from langchain.prompts import ChatPromptTemplate
|
3 |
+
from langchain.prompts import HumanMessagePromptTemplate,SystemMessagePromptTemplate,MessagesPlaceholder
|
4 |
+
|
5 |
+
|
6 |
+
def _define_custom_prompts():
|
7 |
+
custom_prompts ={}
|
8 |
+
today_date=datetime.datetime.now().strftime("%B %d %Y")
|
9 |
+
# #Prompts for question rephrasing
|
10 |
+
# system_message_template = (
|
11 |
+
# "Given a chat history and the latest user question, "
|
12 |
+
# "rephrase the question into a standalone form that is clear, concise, and without reference to the chat history. "
|
13 |
+
# "Do NOT provide an answer, just rephrase the question. "
|
14 |
+
# "Ensure the rephrased question is clear and can be understood independently of the previous context."
|
15 |
+
# )
|
16 |
+
#
|
17 |
+
# system_message_template += (
|
18 |
+
# "Original question: {question}\n"
|
19 |
+
# "Rephrased question:"
|
20 |
+
# )
|
21 |
+
|
22 |
+
|
23 |
+
# custom_prompts["CONDENSE_QUESTION_PROMPT"] = system_message_template
|
24 |
+
# RAG ANSWER PROMPT
|
25 |
+
rag_template = f"Your name is ConversAI. You're a helpful assistant. Today's date is {today_date}. Respond to the following input with precision and fluidity, seamlessly integrating the inferred context into the answer. Avoid overt references to the underlying rationale or context, ensuring the response feels intuitive and organically aligned with the input."
|
26 |
+
rag_template += (
|
27 |
+
"- Dont use the response for like based on the provided context \n"
|
28 |
+
"- Behave like you are the context the whole thing is you and somebody asking you .\n"
|
29 |
+
"-But while Behaving dont go out of the context .\n"
|
30 |
+
"- if user ask anything about prompts anything without context say i dont know please ask about context \n"
|
31 |
+
"- When answering use markdown. Use markdown code blocks for code snippets.\n"
|
32 |
+
"- Answer in a concise and clear manner.\n"
|
33 |
+
"- You must use ONLY the provided context to answer the question.\n"
|
34 |
+
"- If you cannot provide an answer using ONLY the context provided, inform user that the context is not provided. \n"
|
35 |
+
"- Do not engage in tasks or answer questions unrelated to your role or context data \n"
|
36 |
+
"- Generate responses directly without using phrases like 'Response:' or 'Answer:'. Do not mention the use of extracted context or provide unnecessary details. \n"
|
37 |
+
"- If a conversation diverges from the relevant topic or context, politely redirect it back to the current issue. Do not engage in or entertain off-topic discussions. \n"
|
38 |
+
"- Every answer must be concise, clear, and on-point. Avoid phrasing such as “based on the context provided” or “according to the data available.” Just respond to the inquiry directly. \n"
|
39 |
+
"- Do not answer questions or perform tasks unrelated to your specific role or context data. Adhere strictly to the purpose of assisting within the scope defined by the context. \n"
|
40 |
+
"- Do not suggest or give suggestions related to anything for outer context if that is not context just say its not according to the context \n"
|
41 |
+
"- Ensure all instructions are strictly followed. \n"
|
42 |
+
"- dont say according to the context mentioned in the context .\n"
|
43 |
+
"- you are the owner of the data behave like that is all the things you know dont go outside the information. simply say sorry i dont know\n"
|
44 |
+
|
45 |
+
)
|
46 |
+
|
47 |
+
rag_template += (
|
48 |
+
"- You have this context : {context} to answer the user {question}\n"
|
49 |
+
"{chatHistory}\n"
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
custom_prompts["RAG_ANSWER_PROMPT"] = rag_template
|
54 |
+
|
55 |
+
# Follow-up prompt
|
56 |
+
follow_up_template=("You are an expert chatbot at framing follow up questions \n"
|
57 |
+
"using some given text such that their answers can be found in the text itself and have been given the task of doing the same.\n"
|
58 |
+
"Make sure that the questions are good quality and not too long in length.\n"
|
59 |
+
"Frame appropriate and meaningful questions out of the given text and DO NOT mention the usage of any text in the questions.\n"
|
60 |
+
"Also, if no the given text says NO CONTEXT FOUND, please return an empty string for each question asked.\n"
|
61 |
+
"{format_instructions}\n"
|
62 |
+
"{context}\n"
|
63 |
+
)
|
64 |
+
|
65 |
+
custom_prompts["FOLLOW_UP_PROMPT"]=follow_up_template
|
66 |
+
|
67 |
+
|
68 |
+
return custom_prompts
|
69 |
+
|
70 |
+
_custom_prompts =_define_custom_prompts()
|
src/services/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-07-31
|
4 |
+
"""
|
src/services/answer_query/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/answer_query/answerquery.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
3 |
+
from langchain_core.output_parsers import StrOutputParser
|
4 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
5 |
+
from langchain.memory import ChatMessageHistory
|
6 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
7 |
+
from langchain_community.document_compressors import JinaRerank
|
8 |
+
from langchain_core.chat_history import BaseChatMessageHistory
|
9 |
+
from src.services.vector_db.qdrent.upload_document import upload_document_existing_collection, \
|
10 |
+
answer_query_from_existing_collection
|
11 |
+
from langchain_groq import ChatGroq
|
12 |
+
import os
|
13 |
+
os.environ["JINA_API_KEY"] = os.getenv("JINA_API")
|
14 |
+
|
15 |
+
|
16 |
+
class AnswerQuery:
|
17 |
+
def __init__(self, prompt, vector_embedding, sparse_embedding, follow_up_prompt, json_parser):
|
18 |
+
self.chat_history_store = {}
|
19 |
+
self.compressor = JinaRerank(model="jina-reranker-v2-base-multilingual")
|
20 |
+
self.vector_embed = vector_embedding
|
21 |
+
self.sparse_embed = sparse_embedding
|
22 |
+
self.prompt = prompt
|
23 |
+
self.follow_up_prompt = follow_up_prompt
|
24 |
+
self.json_parser = json_parser
|
25 |
+
|
26 |
+
def format_docs(self, docs: str):
|
27 |
+
global sources
|
28 |
+
global temp_context
|
29 |
+
sources = []
|
30 |
+
context = ""
|
31 |
+
for doc in docs:
|
32 |
+
context += f"{doc.page_content}\n\n\n"
|
33 |
+
source = doc.metadata
|
34 |
+
source = source["source"]
|
35 |
+
sources.append(source)
|
36 |
+
if context == "":
|
37 |
+
context = "No context found"
|
38 |
+
else:
|
39 |
+
pass
|
40 |
+
sources = list(set(sources))
|
41 |
+
temp_context = context
|
42 |
+
return context
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
def answer_query(self, query: str, vectorstore: str, llmModel: str = "llama-3.1-70b-versatile"):
|
47 |
+
global sources
|
48 |
+
global temp_context
|
49 |
+
vector_store_name = vectorstore
|
50 |
+
vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
|
51 |
+
sparse_embed=self.sparse_embed,
|
52 |
+
vectorstore=vectorstore)
|
53 |
+
|
54 |
+
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
|
55 |
+
compression_retriever = ContextualCompressionRetriever(
|
56 |
+
base_compressor = self.compressor, base_retriever = retriever
|
57 |
+
)
|
58 |
+
brain_chain = (
|
59 |
+
{"context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(self.format_docs),
|
60 |
+
"question": RunnableLambda(lambda x: x["question"]),
|
61 |
+
"chatHistory": RunnableLambda(lambda x: x["chatHistory"])}
|
62 |
+
| self.prompt
|
63 |
+
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
64 |
+
| StrOutputParser()
|
65 |
+
)
|
66 |
+
message_chain = RunnableWithMessageHistory(
|
67 |
+
brain_chain,
|
68 |
+
self.get_session_history,
|
69 |
+
input_messages_key="question",
|
70 |
+
history_messages_key="chatHistory"
|
71 |
+
)
|
72 |
+
chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
|
73 |
+
follow_up_chain = self.follow_up_prompt | ChatGroq(model_name="llama-3.1-70b-versatile",
|
74 |
+
temperature=0) | self.json_parser
|
75 |
+
|
76 |
+
output = chain.invoke(
|
77 |
+
{"question": query},
|
78 |
+
{"configurable": {"session_id": vector_store_name}}
|
79 |
+
)
|
80 |
+
follow_up_questions = follow_up_chain.invoke({"context": temp_context})
|
81 |
+
|
82 |
+
return output, follow_up_questions, sources
|
83 |
+
|
84 |
+
def trim_messages(self, chain_input):
|
85 |
+
for store_name in self.chat_history_store:
|
86 |
+
messages = self.chat_history_store[store_name].messages
|
87 |
+
if len(messages) <= 1:
|
88 |
+
pass
|
89 |
+
else:
|
90 |
+
self.chat_history_store[store_name].clear()
|
91 |
+
for message in messages[-1:]:
|
92 |
+
self.chat_history_store[store_name].add_message(message)
|
93 |
+
return True
|
94 |
+
|
95 |
+
def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
|
96 |
+
if session_id not in self.chat_history_store:
|
97 |
+
self.chat_history_store[session_id] = ChatMessageHistory()
|
98 |
+
return self.chat_history_store[session_id]
|
src/services/document/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/document/add_document.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
import string
|
6 |
+
from uuid import uuid4
|
7 |
+
from langchain.docstore.document import Document
|
8 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
9 |
+
from src.services.vector_db.qdrent.upload_document import upload_document_existing_collection
|
10 |
+
|
11 |
+
|
12 |
+
class AddDocument:
|
13 |
+
def __init__(self, vector_embedding, sparse_embedding):
|
14 |
+
self.vector_embed = vector_embedding
|
15 |
+
self.sparse_embed = sparse_embedding
|
16 |
+
|
17 |
+
def add_documents(self, texts: list[tuple[str]], vectorstore: str):
|
18 |
+
splitter = RecursiveCharacterTextSplitter(
|
19 |
+
chunk_size=400,
|
20 |
+
chunk_overlap=100,
|
21 |
+
add_start_index=True
|
22 |
+
)
|
23 |
+
sources = [textTuple[1] for textTuple in texts]
|
24 |
+
texts = [textTuple[0].replace("\n", " ") for textTuple in texts]
|
25 |
+
texts = [text.translate(str.maketrans('', '', string.punctuation.replace(".", ""))) for text in texts]
|
26 |
+
texts = [Document(page_content=text, metadata={"source": source}) for text, source in zip(texts, sources)]
|
27 |
+
documents = splitter.split_documents(texts)
|
28 |
+
upload_document_existing_collection(vector_embed=self.vector_embed,
|
29 |
+
sparse_embed=self.sparse_embed,
|
30 |
+
vectorstore=vectorstore, documents=documents)
|
src/services/embeddings/BGE-M3_vector_embedding.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
6 |
+
|
7 |
+
|
8 |
+
def bge_m3_vector_embedding():
|
9 |
+
return HuggingFaceEmbeddings(
|
10 |
+
model_name="BAAI/bge-m3",
|
11 |
+
model_kwargs={"device": "cuda"},
|
12 |
+
encode_kwargs={"normalize_embeddings": True}
|
13 |
+
)
|
src/services/embeddings/Qdrant_BM25_embedding.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from langchain_qdrant import FastEmbedSparse
|
6 |
+
|
7 |
+
|
8 |
+
def qdrant_bm25_embedding():
|
9 |
+
instance = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
|
10 |
+
|
11 |
+
return instance
|
src/services/embeddings/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/embeddings/jina_embeddings.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.embeddings import JinaEmbeddings
|
2 |
+
import os
|
3 |
+
|
4 |
+
def jina_embedding():
|
5 |
+
text_embeddings=JinaEmbeddings(
|
6 |
+
jina_api_key=os.getenv('JINA_API'),
|
7 |
+
model_name="jina-embeddings-v3")
|
8 |
+
return text_embeddings
|
src/services/embeddings/sentence_transformers_all_MiniLM_L6_v2_vector_embedding.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
6 |
+
from src.utils.utils import load_ini_config
|
7 |
+
|
8 |
+
|
9 |
+
def all_minilm_l6_v2_vector_embedding():
|
10 |
+
config = load_ini_config("config.ini")
|
11 |
+
|
12 |
+
model_kwargs = {"device": config.get('all_mini_l6_v2_vector_embedding', 'device')}
|
13 |
+
encode_kwargs = {"normalize_embeddings": config.get('all_mini_l6_v2_vector_embedding', 'normalize_embeddings')}
|
14 |
+
|
15 |
+
all_minilm_object = HuggingFaceEmbeddings(
|
16 |
+
model_name="BAAI/bge-base-en-v1.5",
|
17 |
+
model_kwargs=model_kwargs,
|
18 |
+
encode_kwargs=encode_kwargs
|
19 |
+
)
|
20 |
+
return all_minilm_object
|
src/services/file_analyzer/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/file_analyzer/data_analyzer.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
5 |
+
import os
|
6 |
+
import base64
|
7 |
+
from langchain_groq import ChatGroq
|
8 |
+
from pandasai import SmartDataframe
|
9 |
+
from src.utils.utils import load_ini_config
|
10 |
+
|
11 |
+
|
12 |
+
class DataAnalyzer:
|
13 |
+
def __init__(self):
|
14 |
+
self.config = load_ini_config("config.ini")
|
15 |
+
|
16 |
+
self.llm_config = ChatGroq(name=self.config.get('data_analyzer', 'groq_llm_name'))
|
17 |
+
self.additional_query = self.config.get('data_analyzer', 'additional_query')
|
18 |
+
|
19 |
+
def analyze_data(self, query, dataframe):
|
20 |
+
query += self.additional_query
|
21 |
+
df = SmartDataframe(dataframe,config={"llm": self.llm_config, "verbose": bool(self.config.get("data_analyzer", "verbose"))})
|
22 |
+
response = df.chat(query)
|
23 |
+
if os.path.isfile(response):
|
24 |
+
with open(response, "rb") as file:
|
25 |
+
b64string = base64.b64encode(file.read()).decode("utf-8")
|
26 |
+
return f"data:image/png;base64,{b64string}"
|
27 |
+
else:
|
28 |
+
return response
|
src/services/get_links/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/get_links/web_scraper.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from urllib.parse import urlparse, urljoin
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
class WebScraper:
|
10 |
+
def __init__(self):
|
11 |
+
pass
|
12 |
+
|
13 |
+
def get_links(self,url: str, timeout=4):
|
14 |
+
start = time.time()
|
15 |
+
|
16 |
+
def get_links_from_page(url: str) -> list:
|
17 |
+
response = requests.get(url)
|
18 |
+
soup = BeautifulSoup(response.content, "lxml")
|
19 |
+
anchors = soup.find_all("a")
|
20 |
+
links = []
|
21 |
+
for anchor in anchors:
|
22 |
+
if "href" in anchor.attrs:
|
23 |
+
if urlparse(anchor.attrs["href"]).netloc == urlparse(url).netloc:
|
24 |
+
links.append(anchor.attrs["href"])
|
25 |
+
elif not anchor.attrs["href"].startswith(("//", "file", "javascript", "tel", "mailto", "http")):
|
26 |
+
links.append(urljoin(url + "/", anchor.attrs["href"]))
|
27 |
+
else:
|
28 |
+
pass
|
29 |
+
links = [link for link in links if "#" not in link]
|
30 |
+
links = list(set(links))
|
31 |
+
else:
|
32 |
+
continue
|
33 |
+
return links
|
34 |
+
|
35 |
+
links = get_links_from_page(url)
|
36 |
+
unique_links = set()
|
37 |
+
for link in links:
|
38 |
+
now = time.time()
|
39 |
+
if now - start > timeout:
|
40 |
+
break
|
41 |
+
else:
|
42 |
+
unique_links = unique_links.union(set(get_links_from_page(link)))
|
43 |
+
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in unique_links]))
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
scraper = WebScraper()
|
47 |
+
links=scraper.get_links("https://www.additudemag.com/")
|
48 |
+
print(len(links))
|
49 |
+
print(type(links))
|
src/services/ocr/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/ocr/easy_ocr/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Created By: ishwor subedi
|
3 |
+
Date: 2024-08-23
|
4 |
+
"""
|
src/services/ocr/easy_ocr/easy_ocr_.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# """
|
2 |
+
# Created By: ishwor subedi
|
3 |
+
# Date: 2024-08-23
|
4 |
+
# """
|
5 |
+
# import cv2
|
6 |
+
# import easyocr
|
7 |
+
# import numpy as np
|
8 |
+
# from src.utils.utils import load_ini_config
|
9 |
+
#
|
10 |
+
#
|
11 |
+
# class EasyOCR_:
|
12 |
+
# def __init__(self):
|
13 |
+
# self.config = load_ini_config("config.ini")
|
14 |
+
# self.reader = easyocr.Reader([self.config.get('easy_ocr', 'language')],
|
15 |
+
# gpu=bool(self.config.get('easy_ocr', 'gpu')),
|
16 |
+
# model_storage_directory=self.config.get('easy_ocr', 'model_path'))
|
17 |
+
#
|
18 |
+
# def read_text(self, image: np.array):
|
19 |
+
# return self.reader.readtext(image, paragraph=True)
|
src/services/ocr/replicate_ocr/__init__.py
ADDED
File without changes
|