|
import pdfplumber |
|
import streamlit as st |
|
import requests |
|
import json |
|
import redis |
|
import redis.commands.search |
|
from redis.commands.search.field import TagField, VectorField, TextField |
|
from redis.commands.search.indexDefinition import IndexDefinition, IndexType |
|
import logging |
|
from redis.commands.search.query import Query |
|
import numpy as np |
|
from typing import List, Dict, Any |
|
from utlis.constant import * |
|
from PIL import Image |
|
import google.generativeai as genai |
|
genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0") |
|
import base64 |
|
import sqlite3 |
|
def initialize_session_state(): |
|
if "doc_ortext" not in st.session_state: |
|
st.session_state["doc_ortext"] = None |
|
if "token" not in st.session_state: |
|
st.session_state["token"] ="abcd" |
|
if "service" not in st.session_state: |
|
st.session_state["service"] = None |
|
if "use_document" not in st.session_state: |
|
st.session_state.use_document = False |
|
if "flag" not in st.session_state: |
|
st.session_state.flag = False |
|
if "embdding_model" not in st.session_state: |
|
st.session_state["embdding_model"] = None |
|
if "indexing_method" not in st.session_state: |
|
st.session_state["indexing_method"] = None |
|
if "uploaded_files" not in st.session_state: |
|
st.session_state["uploaded_files"] = None |
|
|
|
if "messages" not in st.session_state: |
|
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text="" |
|
with pdfplumber.open(pdf_path) as pdf: |
|
for page_number, page in enumerate(pdf.pages, start=1): |
|
|
|
text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10) |
|
return text |
|
|
|
def delete_service(token,service_slected_to_delete): |
|
for srevice_name in service_slected_to_delete: |
|
url = REMOVE_SERVICE_API |
|
|
|
data = { |
|
"token": token, |
|
"servicename": srevice_name |
|
} |
|
json_data = json.dumps(data) |
|
|
|
|
|
headers = {'Content-Type': 'application/json'} |
|
|
|
|
|
response = requests.delete(url, data=json_data, headers=headers) |
|
if json.loads( response.text).get("success")==True: |
|
st.success(f"{srevice_name} deleted successfully") |
|
else: |
|
st.error(f"{srevice_name} not deleted successfully") |
|
|
|
def delete_document(token, service,document_slected_to_delete): |
|
print(document_slected_to_delete) |
|
|
|
url = REMOVE_DOCUMENTS_API |
|
|
|
data = { |
|
"token": token, |
|
"service_name": service, |
|
"document_names":document_slected_to_delete |
|
} |
|
|
|
|
|
json_data = json.dumps(data) |
|
|
|
headers = {'Content-Type': 'application/json'} |
|
|
|
|
|
response = requests.delete(url, data=json_data, headers=headers) |
|
print(response) |
|
if json.loads( response.text).get("status")=="success": |
|
st.success("document(s) deleted successfully") |
|
else: |
|
st.error("document(s) not deleted successfully") |
|
def gemini_vision(file): |
|
load_image = Image.open(file) |
|
prompt= "please extract all text fromt this image" |
|
model = genai.GenerativeModel('gemini-pro-vision') |
|
response = model.generate_content([prompt, load_image]) |
|
|
|
return response.text |
|
def add_service(token,servicename): |
|
url = ADD_SERVICES_API |
|
|
|
data = { |
|
"token": token, |
|
"services": [ |
|
{ |
|
"servicename": servicename |
|
} |
|
] |
|
} |
|
|
|
|
|
json_data = json.dumps(data) |
|
|
|
|
|
headers = {'Content-Type': 'application/json'} |
|
|
|
|
|
response = requests.post(url, data=json_data, headers=headers) |
|
if json.loads( response.text).get("added_services"): |
|
st.success(f"{servicename} added successfully") |
|
else: |
|
st.error(response.text) |
|
def add_text_document(token, servicename): |
|
|
|
document_text = st.session_state.text_area |
|
document_name = st.session_state.name_text_area.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_").replace(".", "_") |
|
|
|
|
|
encoded_text = base64.b64encode(document_text.encode('utf-8')).decode('utf-8') |
|
|
|
url = ADD_STORE_DOCUMENT |
|
|
|
data = { |
|
"token": token, |
|
"service_name": servicename, |
|
"document_name": document_name, |
|
"file": encoded_text |
|
} |
|
|
|
|
|
headers = {'Content-Type': 'application/json'} |
|
response = requests.post(url, data=json.dumps(data), headers=headers) |
|
status = json.loads(response.text).get("status") |
|
|
|
if status == "success": |
|
st.success(f"{document_name} uploaded successfully as text") |
|
else: |
|
st.error(f"{document_name} not uploaded successfully") |
|
|
|
def add_document(token,servicename): |
|
|
|
|
|
file = st.session_state.uploaded_files |
|
print(file) |
|
|
|
url = ADD_STORE_DOCUMENT |
|
|
|
document_name = file.name.replace(" ","") |
|
|
|
document_name = document_name.replace("(","_") |
|
document_name = document_name.replace(")","_") |
|
document_name = document_name.replace("-","_") |
|
document_name = document_name.replace(".","_") |
|
encoded_file = base64.b64encode(file.read()).decode('utf-8') |
|
data = { |
|
"token": token, |
|
"service_name": servicename, |
|
"document_name": document_name, |
|
"file":encoded_file |
|
} |
|
|
|
|
|
json_data = json.dumps(data) |
|
|
|
|
|
headers = {'Content-Type': 'application/json'} |
|
|
|
|
|
response = requests.post(url, data=json_data, headers=headers) |
|
document_name = file.name.replace(" ","_") |
|
if json.loads( response.text).get("status")=="success": |
|
st.success(f"{document_name} uploaded successfully") |
|
else: |
|
st.error(f"{document_name} not uploaded successfully") |
|
|
|
|
|
def get_all_keys(d): |
|
all_keys = set() |
|
def get_keys(d): |
|
for k, v in d.items(): |
|
all_keys.add(k) |
|
if isinstance(v, dict): |
|
get_keys(v) |
|
elif isinstance(v, list): |
|
for item in v: |
|
if isinstance(item, dict): |
|
get_keys(item) |
|
get_keys(d) |
|
return list(all_keys) |
|
def display_and_validate_schema(schema): |
|
if schema: |
|
schema_str = json.dumps(schema, indent=2) |
|
else: |
|
schema_str = json.dumps(DEFAULT_SCHEMA, indent=2) |
|
schema_input = st.text_area("JSON Schema", schema_str, height=300) |
|
try: |
|
schema = json.loads(schema_input) |
|
st.success("JSON schema is valid.") |
|
return schema |
|
except json.JSONDecodeError: |
|
st.error("The JSON schema is invalid. Please correct it and try again.") |
|
return None |
|
def handle_comments(comments,keys): |
|
items_per_page = 6 |
|
total_pages = (len(keys) + items_per_page - 1) // items_per_page |
|
|
|
st.write("Please provide comments for each key to assist our system:") |
|
|
|
page = st.number_input("Page", min_value=1, max_value=total_pages, step=1) |
|
start_idx = (page - 1) * items_per_page |
|
end_idx = start_idx + items_per_page |
|
|
|
for key in keys[start_idx:end_idx]: |
|
with st.expander(f"{key}"): |
|
comments[key] = st.text_input(f"{key}", value=comments.get(key,"")) |
|
|
|
|
|
return comments |
|
|
|
def save_schema(document_id, schema): |
|
conn = sqlite3.connect('document_cache.db') |
|
c = conn.cursor() |
|
c.execute('REPLACE INTO schemas (document_id, schema) VALUES (?, ?)', (document_id, json.dumps(schema))) |
|
conn.commit() |
|
conn.close() |
|
|
|
def get_schema(document_id): |
|
conn = sqlite3.connect('document_cache.db') |
|
c = conn.cursor() |
|
c.execute('SELECT schema FROM schemas WHERE document_id = ?', (document_id,)) |
|
result = c.fetchone() |
|
conn.close() |
|
return json.loads(result[0]) if result else None |
|
|
|
def save_comments(document_id, comments): |
|
conn = sqlite3.connect('document_cache.db') |
|
c = conn.cursor() |
|
c.execute('REPLACE INTO comments (document_id, comments) VALUES (?, ?)', (document_id, json.dumps(comments))) |
|
conn.commit() |
|
conn.close() |
|
|
|
def get_comments(document_id): |
|
conn = sqlite3.connect('document_cache.db') |
|
c = conn.cursor() |
|
c.execute('SELECT comments FROM comments WHERE document_id = ?', (document_id,)) |
|
result = c.fetchone() |
|
conn.close() |
|
return json.loads(result[0]) if result else None |
|
|