Delete RD
Browse files- RD/.DS_Store +0 -0
- RD/.gitattributes +0 -35
- RD/AI_full_stack_repository/.DS_Store +0 -0
- RD/AI_full_stack_repository/README.md +0 -3
- RD/AI_full_stack_repository/app.py +0 -29
- RD/AI_full_stack_repository/config.yml +0 -11
- RD/AI_full_stack_repository/data/Doc-1.pdf +0 -0
- RD/AI_full_stack_repository/data/Doc-2.pdf +0 -0
- RD/AI_full_stack_repository/data/invoice_1.pdf +0 -0
- RD/AI_full_stack_repository/ingest.py +0 -34
- RD/AI_full_stack_repository/llm/__init__.py +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-310.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-312.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-310.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-312.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-310.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-312.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-310.pyc +0 -0
- RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-312.pyc +0 -0
- RD/AI_full_stack_repository/llm/llm.py +0 -29
- RD/AI_full_stack_repository/llm/prompts.py +0 -13
- RD/AI_full_stack_repository/llm/test.py +0 -95
- RD/AI_full_stack_repository/llm/wrapper.py +0 -51
- RD/AI_full_stack_repository/models/.DS_Store +0 -0
- RD/AI_full_stack_repository/models/model_download.txt +0 -1
- RD/AI_full_stack_repository/requirements.txt +0 -13
- RD/AI_full_stack_repository/screenshot_images/.DS_Store +0 -0
- RD/AI_full_stack_repository/screenshot_images/invoice_image.png +0 -0
- RD/AI_full_stack_repository/vectorestore/.DS_Store +0 -0
- RD/AI_full_stack_repository/vectorestore/chroma/.DS_Store +0 -0
- RD/README.md +0 -13
RD/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/AI_full_stack_repository/README.md
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
# AI_full_stack_repository
|
2 |
-
I built a LLM of Mistral7b-based chat with text by using my full-stack AI skills at work. On top of that, I created a prototype using the Streamlit API (module).
|
3 |
-
# sample_test
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/app.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
from ingest import run_ingest
|
2 |
-
from llm.wrapper import setup_qa_chain
|
3 |
-
from llm.wrapper import query_embeddings
|
4 |
-
import timeit
|
5 |
-
|
6 |
-
|
7 |
-
import streamlit as st
|
8 |
-
def main():
|
9 |
-
st.set_page_config(page_title="Document seemless process ")
|
10 |
-
st.title("Auto text extraction with AI Planet ")
|
11 |
-
st.subheader("I can help you in extracting text from pdf,documents ....")
|
12 |
-
pdf = st.file_uploader("Upload text here for now, only PDF files allowed ", type=["pdf","txt"],accept_multiple_files=True)
|
13 |
-
submit=st.button("Extract Data")
|
14 |
-
if submit:
|
15 |
-
with st.spinner('Wait for it...'):
|
16 |
-
run_ingest()
|
17 |
-
question = st.text_input("Please wirte a Query: ", key="Please ask question on uploaded pdf")
|
18 |
-
submit = st.button('Generate')
|
19 |
-
if submit:
|
20 |
-
with st.spinner('Wait for it...'):
|
21 |
-
qa_chain = setup_qa_chain()
|
22 |
-
response = qa_chain({'query': question})
|
23 |
-
answer = {'answer': response['result']}
|
24 |
-
st.subheader("Answer:")
|
25 |
-
st.write(answer)
|
26 |
-
st.success("Hope I was able to save your time❤️")
|
27 |
-
#Invoking main function
|
28 |
-
if __name__ == '__main__':
|
29 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/config.yml
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
RETURN_SOURCE_DOCUMENTS: True
|
2 |
-
VECTOR_COUNT: 2
|
3 |
-
CHUNK_SIZE: 300
|
4 |
-
CHUNK_OVERLAP: 30
|
5 |
-
DATA_PATH: 'data/'
|
6 |
-
DB_FAISS_PATH: 'vectorstore/chroma'
|
7 |
-
MODEL_TYPE: 'mistral'
|
8 |
-
MODEL_BIN_PATH: 'models/mistral-7b-instruct-v0.1.Q5_K_M.gguf'
|
9 |
-
EMBEDDINGS: 'sentence-transformers/all-mpnet-base-v2'
|
10 |
-
MAX_NEW_TOKENS: 2048
|
11 |
-
TEMPERATURE: 0.00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/data/Doc-1.pdf
DELETED
Binary file (10.1 kB)
|
|
RD/AI_full_stack_repository/data/Doc-2.pdf
DELETED
Binary file (12.2 kB)
|
|
RD/AI_full_stack_repository/data/invoice_1.pdf
DELETED
Binary file (45.3 kB)
|
|
RD/AI_full_stack_repository/ingest.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
import box
|
2 |
-
import yaml
|
3 |
-
from langchain.vectorstores import FAISS
|
4 |
-
from langchain.document_loaders import PyPDFDirectoryLoader
|
5 |
-
from langchain.text_splitter import CharacterTextSplitter
|
6 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
-
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
8 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
9 |
-
from langchain_community.embeddings.sentence_transformer import (
|
10 |
-
SentenceTransformerEmbeddings,
|
11 |
-
)
|
12 |
-
from langchain.vectorstores import Chroma
|
13 |
-
|
14 |
-
# Import config vars
|
15 |
-
with open('config.yml', 'r', encoding='utf8') as ymlfile:
|
16 |
-
cfg = box.Box(yaml.safe_load(ymlfile))
|
17 |
-
|
18 |
-
|
19 |
-
def run_ingest():
|
20 |
-
loader = DirectoryLoader(cfg.DATA_PATH,
|
21 |
-
glob='*.pdf',
|
22 |
-
loader_cls=PyPDFLoader)
|
23 |
-
|
24 |
-
documents = loader.load()
|
25 |
-
|
26 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20,length_function =len,add_start_index = True)
|
27 |
-
text = text_splitter.split_documents(documents)
|
28 |
-
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
|
29 |
-
# load it into Chroma
|
30 |
-
# save to disk
|
31 |
-
db2 = Chroma.from_documents(text, embedding_function, persist_directory="./vectorestore/db_faiss")
|
32 |
-
|
33 |
-
if __name__ == "__main__":
|
34 |
-
run_ingest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/llm/__init__.py
DELETED
File without changes
|
RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-310.pyc
DELETED
Binary file (160 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-312.pyc
DELETED
Binary file (164 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-310.pyc
DELETED
Binary file (629 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-312.pyc
DELETED
Binary file (970 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-310.pyc
DELETED
Binary file (454 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-312.pyc
DELETED
Binary file (465 Bytes)
|
|
RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-310.pyc
DELETED
Binary file (1.9 kB)
|
|
RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-312.pyc
DELETED
Binary file (2.7 kB)
|
|
RD/AI_full_stack_repository/llm/llm.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
from langchain.llms import CTransformers
|
2 |
-
import box
|
3 |
-
import yaml
|
4 |
-
from langchain.llms import LlamaCpp
|
5 |
-
config={'max_new_tokens': 2000,
|
6 |
-
'temperature': 0.01,
|
7 |
-
"context_length" : 4000}
|
8 |
-
# Import config vars
|
9 |
-
with open('config.yml', 'r', encoding='utf8') as ymlfile:
|
10 |
-
cfg = box.Box(yaml.safe_load(ymlfile))
|
11 |
-
|
12 |
-
|
13 |
-
def setup_llm():
|
14 |
-
# llm = CTransformers(model=cfg.MODEL_BIN_PATH,
|
15 |
-
# model_type=cfg.MODEL_TYPE,
|
16 |
-
# max_new_tokens=cfg.MAX_NEW_TOKENS,
|
17 |
-
# temperature=cfg.TEMPERATURE
|
18 |
-
# )
|
19 |
-
llm = LlamaCpp(
|
20 |
-
streaming = True,
|
21 |
-
model_path=cfg.MODEL_BIN_PATH,#"mistral-7b-instruct-v0.1.Q4_K_M.gguf",
|
22 |
-
temperature=0.75,
|
23 |
-
top_p=1,
|
24 |
-
verbose=True,
|
25 |
-
n_ctx=4096
|
26 |
-
)
|
27 |
-
|
28 |
-
|
29 |
-
return llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/llm/prompts.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
# Note: Precise formatting of spacing and indentation of the prompt template is important,
|
2 |
-
# as it is highly sensitive to whitespace changes. For example, it could have problems generating
|
3 |
-
# a summary from the pieces of context if the spacing is not done correctly
|
4 |
-
|
5 |
-
qa_template = """Use the following pieces of information to answer the user's question.
|
6 |
-
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
7 |
-
|
8 |
-
Context: {context}
|
9 |
-
Question: {question}
|
10 |
-
|
11 |
-
Only return the helpful answer below and nothing else.
|
12 |
-
Helpful answer:
|
13 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/llm/test.py
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
# from langchain.vectorstores import Chroma
|
2 |
-
# # from langchain_chroma import Chroma
|
3 |
-
# from langchain_community.document_loaders import TextLoader
|
4 |
-
# from langchain_community.embeddings.sentence_transformer import (
|
5 |
-
# SentenceTransformerEmbeddings,
|
6 |
-
# )
|
7 |
-
# from langchain.document_loaders import PyPDFDirectoryLoader
|
8 |
-
# from langchain_text_splitters import CharacterTextSplitter
|
9 |
-
# from langchain.text_splitter import CharacterTextSplitter
|
10 |
-
# from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
-
# import os
|
12 |
-
# os.getcwd()
|
13 |
-
|
14 |
-
# #Load Documents
|
15 |
-
# def file_loader(filename):
|
16 |
-
# if filename.endswith('.txt'):
|
17 |
-
# # load the text document and split it into chunks
|
18 |
-
# loader = TextLoader(filename)
|
19 |
-
# documents = loader.load()
|
20 |
-
# return documents
|
21 |
-
# #Loads pdf files available in a directory with pypdf
|
22 |
-
# elif filename.endswith('.pdf'):
|
23 |
-
# loader = PyPDFDirectoryLoader(filename)
|
24 |
-
# documents = loader.load()
|
25 |
-
# return documents
|
26 |
-
# filename = '/data'
|
27 |
-
# def load_docs(directory):
|
28 |
-
# loader = PyPDFDirectoryLoader(directory)
|
29 |
-
# documents = loader.load()
|
30 |
-
# if not documents:
|
31 |
-
# raise ValueError(f"No documents loaded from directory: {directory}")
|
32 |
-
# return documents
|
33 |
-
# documents = load_docs(filename)
|
34 |
-
# print(f"Number of loaded documents: {len(documents)}")
|
35 |
-
|
36 |
-
# # split it into chunks
|
37 |
-
# def split_docs(documents, chunk_size=2000, chunk_overlap=20):
|
38 |
-
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
39 |
-
# docs = text_splitter.split_documents(documents)
|
40 |
-
# if not docs:
|
41 |
-
# raise ValueError("Document splitting resulted in an empty list.")
|
42 |
-
# return docs
|
43 |
-
# docs = split_docs(documents)
|
44 |
-
# print(f"Number of document chunks: {len(docs)}")
|
45 |
-
|
46 |
-
|
47 |
-
# # Generate text embeddings
|
48 |
-
# #Huggingface LLM for creating Embeddings for documents/text
|
49 |
-
|
50 |
-
# # create the open-source embedding function
|
51 |
-
# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
|
52 |
-
|
53 |
-
# # load it into Chroma
|
54 |
-
# db = Chroma.from_documents(docs, embedding_function)
|
55 |
-
|
56 |
-
# # query it
|
57 |
-
# query = "What is invoice number?"
|
58 |
-
# docs = db.similarity_search(query)
|
59 |
-
|
60 |
-
# # print results
|
61 |
-
# print(docs[0].page_content)
|
62 |
-
|
63 |
-
#---------------------------------------------------------PDF-READER------------------------------------------------------------------
|
64 |
-
# import easyocr
|
65 |
-
# reader = easyocr.Reader(['en'])
|
66 |
-
# result = reader.readtext(r'/Users/hemasagarendluri1996/llm-mistral-invoice-cpu/screenshot_images/invoice_image.png')
|
67 |
-
# for detection in result:
|
68 |
-
# print(detection[1])
|
69 |
-
import streamlit as st
|
70 |
-
|
71 |
-
#Hello! It seems like you want to import the Streamlit library in Python. Streamlit is a powerful open-source framework used for building web applications with interactive data visualizations and machine learning models. To import Streamlit, you'll need to ensure that you have it installed in your Python environment.
|
72 |
-
#Once you have Streamlit installed, you can import it into your Python script using the import statement,
|
73 |
-
def main():
|
74 |
-
|
75 |
-
st.set_page_config(page_title="Document seemless process ")
|
76 |
-
st.title("Auto text extraction with AI Planet ")
|
77 |
-
st.subheader("I can help you in extracting text from pdf,documents ....")
|
78 |
-
|
79 |
-
|
80 |
-
# Upload the Invoices (pdf files)...
|
81 |
-
pdf = st.file_uploader("Upload invoices here for now, only PDF files allowed and will accept other formate as well", type=["pdf"],accept_multiple_files=True)
|
82 |
-
|
83 |
-
submit=st.button("Extract Data")
|
84 |
-
response = 4+5
|
85 |
-
if submit:
|
86 |
-
with st.spinner('Wait for it...'):
|
87 |
-
st.subheader("Answer:")
|
88 |
-
st.write(response)
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
#Invoking main function
|
94 |
-
if __name__ == '__main__':
|
95 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/llm/wrapper.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
import box
|
2 |
-
import yaml
|
3 |
-
from langchain.prompts import PromptTemplate
|
4 |
-
from langchain.chains import RetrievalQA
|
5 |
-
# from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
-
from langchain.vectorstores import FAISS
|
7 |
-
from llm.prompts import qa_template
|
8 |
-
from llm.llm import setup_llm
|
9 |
-
from langchain_community.embeddings.sentence_transformer import (
|
10 |
-
SentenceTransformerEmbeddings,
|
11 |
-
)
|
12 |
-
from langchain.vectorstores import Chroma
|
13 |
-
# Import config vars
|
14 |
-
with open('config.yml', 'r', encoding='utf8') as ymlfile:
|
15 |
-
cfg = box.Box(yaml.safe_load(ymlfile))
|
16 |
-
def set_qa_prompt():
|
17 |
-
"""
|
18 |
-
Prompt template for QA retrieval for each vectorstore
|
19 |
-
"""
|
20 |
-
prompt = PromptTemplate(template=qa_template,
|
21 |
-
input_variables=['context', 'question'])
|
22 |
-
return prompt
|
23 |
-
|
24 |
-
def build_retrieval_qa_chain(llm, prompt):
|
25 |
-
# create the open-source embedding function
|
26 |
-
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
|
27 |
-
# load from disk
|
28 |
-
chromadb = Chroma(persist_directory="./vectorestore/db_faiss", embedding_function=embedding_function)
|
29 |
-
retriever = chromadb.as_retriever(search_kwargs={'k': cfg.VECTOR_COUNT})
|
30 |
-
qa_chain = RetrievalQA.from_chain_type(llm=llm,
|
31 |
-
chain_type='stuff',
|
32 |
-
retriever=retriever,
|
33 |
-
return_source_documents=cfg.RETURN_SOURCE_DOCUMENTS,
|
34 |
-
chain_type_kwargs={'prompt': prompt})
|
35 |
-
|
36 |
-
return qa_chain
|
37 |
-
def setup_qa_chain():
|
38 |
-
llm = setup_llm()
|
39 |
-
qa_prompt = set_qa_prompt()
|
40 |
-
qa_chain = build_retrieval_qa_chain(llm, qa_prompt)
|
41 |
-
return qa_chain
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
def query_embeddings(query):
|
47 |
-
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
|
48 |
-
chromadb = Chroma(persist_directory="./vectorestore/db_faiss", embedding_function=embedding_function)
|
49 |
-
retriever = chromadb.as_retriever(search_kwargs={'k': cfg.VECTOR_COUNT})
|
50 |
-
semantic_search = retriever.similarity_search_with_relevance_scores(query)
|
51 |
-
return semantic_search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/models/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/AI_full_stack_repository/models/model_download.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Download the quantized mistral-7b-instruct-v0.1.Q5_K_M.gguf model from: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/tree/main
|
|
|
|
RD/AI_full_stack_repository/requirements.txt
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
streamlit==1.29.0
|
2 |
-
langchain==0.1.13
|
3 |
-
# unstructured==0.12.3
|
4 |
-
tiktoken==0.5.2
|
5 |
-
pypdf==4.1.0
|
6 |
-
sentence-transformers==2.5.1
|
7 |
-
langchain-community
|
8 |
-
langchain-chroma
|
9 |
-
numpy==1.26.1
|
10 |
-
python-box
|
11 |
-
llama-cpp-python==0.2.76
|
12 |
-
# pdfservices-sdk==4.0.0
|
13 |
-
watchdog==4.0.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RD/AI_full_stack_repository/screenshot_images/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/AI_full_stack_repository/screenshot_images/invoice_image.png
DELETED
Binary file (220 kB)
|
|
RD/AI_full_stack_repository/vectorestore/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/AI_full_stack_repository/vectorestore/chroma/.DS_Store
DELETED
Binary file (6.15 kB)
|
|
RD/README.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: RD
|
3 |
-
emoji: 📊
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: purple
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.35.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|