Spaces:

Hemasagar
/

RD

Sleeping

App Files Files Community

Hemasagar commited on Jun 4, 2024

Commit

79b1b39

verified ·

1 Parent(s): c9fcae3

Delete RD

Browse files

Files changed (31) hide show

RD/.DS_Store +0 -0
RD/.gitattributes +0 -35
RD/AI_full_stack_repository/.DS_Store +0 -0
RD/AI_full_stack_repository/README.md +0 -3
RD/AI_full_stack_repository/app.py +0 -29
RD/AI_full_stack_repository/config.yml +0 -11
RD/AI_full_stack_repository/data/Doc-1.pdf +0 -0
RD/AI_full_stack_repository/data/Doc-2.pdf +0 -0
RD/AI_full_stack_repository/data/invoice_1.pdf +0 -0
RD/AI_full_stack_repository/ingest.py +0 -34
RD/AI_full_stack_repository/llm/__init__.py +0 -0
RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-310.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-312.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-310.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-312.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-310.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-312.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-310.pyc +0 -0
RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-312.pyc +0 -0
RD/AI_full_stack_repository/llm/llm.py +0 -29
RD/AI_full_stack_repository/llm/prompts.py +0 -13
RD/AI_full_stack_repository/llm/test.py +0 -95
RD/AI_full_stack_repository/llm/wrapper.py +0 -51
RD/AI_full_stack_repository/models/.DS_Store +0 -0
RD/AI_full_stack_repository/models/model_download.txt +0 -1
RD/AI_full_stack_repository/requirements.txt +0 -13
RD/AI_full_stack_repository/screenshot_images/.DS_Store +0 -0
RD/AI_full_stack_repository/screenshot_images/invoice_image.png +0 -0
RD/AI_full_stack_repository/vectorestore/.DS_Store +0 -0
RD/AI_full_stack_repository/vectorestore/chroma/.DS_Store +0 -0
RD/README.md +0 -13

RD/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

RD/AI_full_stack_repository/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/AI_full_stack_repository/README.md DELETED Viewed

@@ -1,3 +0,0 @@
-# AI_full_stack_repository
-I built a LLM of Mistral7b-based chat with text by using my full-stack AI skills at work. On top of that, I created a prototype using the Streamlit API (module).
-# sample_test

RD/AI_full_stack_repository/app.py DELETED Viewed

@@ -1,29 +0,0 @@
-from ingest import run_ingest
-from llm.wrapper import setup_qa_chain
-from llm.wrapper import query_embeddings
-import timeit
-import streamlit as st
-def main():
-    st.set_page_config(page_title="Document seemless process ")
-    st.title("Auto text extraction with AI Planet ")
-    st.subheader("I can help you in extracting text from pdf,documents ....")
-    pdf = st.file_uploader("Upload text here for now, only PDF files allowed ", type=["pdf","txt"],accept_multiple_files=True)
-    submit=st.button("Extract Data")
-    if submit:
-        with st.spinner('Wait for it...'):
-            run_ingest()
-    question = st.text_input("Please wirte a Query: ", key="Please ask question on uploaded pdf")
-    submit = st.button('Generate')
-    if submit:
-        with st.spinner('Wait for it...'):
-            qa_chain = setup_qa_chain()
-            response = qa_chain({'query': question})
-            answer = {'answer': response['result']}
-            st.subheader("Answer:")
-            st.write(answer)
-            st.success("Hope I was able to save your time❤️")
-#Invoking main function
-if __name__ == '__main__':
-    main()

RD/AI_full_stack_repository/config.yml DELETED Viewed

@@ -1,11 +0,0 @@
-RETURN_SOURCE_DOCUMENTS: True
-VECTOR_COUNT: 2
-CHUNK_SIZE: 300
-CHUNK_OVERLAP: 30
-DATA_PATH: 'data/'
-DB_FAISS_PATH: 'vectorstore/chroma'
-MODEL_TYPE: 'mistral'
-MODEL_BIN_PATH: 'models/mistral-7b-instruct-v0.1.Q5_K_M.gguf'
-EMBEDDINGS: 'sentence-transformers/all-mpnet-base-v2'
-MAX_NEW_TOKENS: 2048
-TEMPERATURE: 0.00

RD/AI_full_stack_repository/data/Doc-1.pdf DELETED Viewed

Binary file (10.1 kB)

RD/AI_full_stack_repository/data/Doc-2.pdf DELETED Viewed

Binary file (12.2 kB)

RD/AI_full_stack_repository/data/invoice_1.pdf DELETED Viewed

Binary file (45.3 kB)

RD/AI_full_stack_repository/ingest.py DELETED Viewed

@@ -1,34 +0,0 @@
-import box
-import yaml
-from langchain.vectorstores import FAISS
-from langchain.document_loaders import PyPDFDirectoryLoader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import PyPDFLoader, DirectoryLoader
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain_community.embeddings.sentence_transformer import (
-    SentenceTransformerEmbeddings,
-)
-from langchain.vectorstores import Chroma
-# Import config vars
-with open('config.yml', 'r', encoding='utf8') as ymlfile:
-    cfg = box.Box(yaml.safe_load(ymlfile))
-def run_ingest():
-    loader = DirectoryLoader(cfg.DATA_PATH,
-                             glob='*.pdf',
-                             loader_cls=PyPDFLoader)
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20,length_function =len,add_start_index = True)
-    text = text_splitter.split_documents(documents)
-    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
-    # load it into Chroma
-    # save to disk
-    db2 = Chroma.from_documents(text, embedding_function, persist_directory="./vectorestore/db_faiss")
-if __name__ == "__main__":
-    run_ingest()

RD/AI_full_stack_repository/llm/__init__.py DELETED Viewed

File without changes

RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (160 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (164 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-310.pyc DELETED Viewed

Binary file (629 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/llm.cpython-312.pyc DELETED Viewed

Binary file (970 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-310.pyc DELETED Viewed

Binary file (454 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/prompts.cpython-312.pyc DELETED Viewed

Binary file (465 Bytes)

RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-310.pyc DELETED Viewed

Binary file (1.9 kB)

RD/AI_full_stack_repository/llm/__pycache__/wrapper.cpython-312.pyc DELETED Viewed

Binary file (2.7 kB)

RD/AI_full_stack_repository/llm/llm.py DELETED Viewed

@@ -1,29 +0,0 @@
-from langchain.llms import CTransformers
-import box
-import yaml
-from langchain.llms import LlamaCpp
-config={'max_new_tokens': 2000,
-        'temperature': 0.01,
-        "context_length" : 4000}
-# Import config vars
-with open('config.yml', 'r', encoding='utf8') as ymlfile:
-    cfg = box.Box(yaml.safe_load(ymlfile))
-def setup_llm():
-    # llm = CTransformers(model=cfg.MODEL_BIN_PATH,
-    #                     model_type=cfg.MODEL_TYPE,
-    #                     max_new_tokens=cfg.MAX_NEW_TOKENS,
-    #                     temperature=cfg.TEMPERATURE
-    # )
-    llm = LlamaCpp(
-    streaming = True,
-    model_path=cfg.MODEL_BIN_PATH,#"mistral-7b-instruct-v0.1.Q4_K_M.gguf",
-    temperature=0.75,
-    top_p=1,
-    verbose=True,
-    n_ctx=4096
-    )
-    return llm

RD/AI_full_stack_repository/llm/prompts.py DELETED Viewed

@@ -1,13 +0,0 @@
-# Note: Precise formatting of spacing and indentation of the prompt template is important,
-# as it is highly sensitive to whitespace changes. For example, it could have problems generating
-# a summary from the pieces of context if the spacing is not done correctly
-qa_template = """Use the following pieces of information to answer the user's question.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
-Context: {context}
-Question: {question}
-Only return the helpful answer below and nothing else.
-Helpful answer:
-"""

RD/AI_full_stack_repository/llm/test.py DELETED Viewed

@@ -1,95 +0,0 @@
-# from langchain.vectorstores import Chroma
-# # from langchain_chroma import Chroma
-# from langchain_community.document_loaders import TextLoader
-# from langchain_community.embeddings.sentence_transformer import (
-#     SentenceTransformerEmbeddings,
-# )
-# from langchain.document_loaders import PyPDFDirectoryLoader
-# from langchain_text_splitters import CharacterTextSplitter
-# from langchain.text_splitter import CharacterTextSplitter
-# from langchain.text_splitter import RecursiveCharacterTextSplitter
-# import os
-# os.getcwd()
-# #Load Documents
-# def file_loader(filename):
-#     if filename.endswith('.txt'):
-#         # load the text document and split it into chunks
-#         loader = TextLoader(filename)
-#         documents = loader.load()
-#         return documents
-#     #Loads pdf files available in a directory with pypdf
-#     elif filename.endswith('.pdf'):
-#         loader = PyPDFDirectoryLoader(filename)
-#         documents = loader.load()
-#         return documents
-# filename = '/data'
-# def load_docs(directory):
-#     loader = PyPDFDirectoryLoader(directory)
-#     documents = loader.load()
-#     if not documents:
-#         raise ValueError(f"No documents loaded from directory: {directory}")
-#     return documents
-# documents = load_docs(filename)
-# print(f"Number of loaded documents: {len(documents)}")
-# # split it into chunks
-# def split_docs(documents, chunk_size=2000, chunk_overlap=20):
-#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-#     docs = text_splitter.split_documents(documents)
-#     if not docs:
-#         raise ValueError("Document splitting resulted in an empty list.")
-#     return docs
-# docs = split_docs(documents)
-# print(f"Number of document chunks: {len(docs)}")
-# # Generate text embeddings
-# #Huggingface LLM for creating Embeddings for documents/text
-# # create the open-source embedding function
-# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
-# # load it into Chroma
-# db = Chroma.from_documents(docs, embedding_function)
-# # query it
-# query = "What is invoice number?"
-# docs = db.similarity_search(query)
-# # print results
-# print(docs[0].page_content)
-#---------------------------------------------------------PDF-READER------------------------------------------------------------------
-# import easyocr
-# reader = easyocr.Reader(['en'])
-# result = reader.readtext(r'/Users/hemasagarendluri1996/llm-mistral-invoice-cpu/screenshot_images/invoice_image.png')
-# for detection in result:
-#     print(detection[1])
-import streamlit as st
-#Hello! It seems like you want to import the Streamlit library in Python. Streamlit is a powerful open-source framework used for building web applications with interactive data visualizations and machine learning models. To import Streamlit, you'll need to ensure that you have it installed in your Python environment.
-#Once you have Streamlit installed, you can import it into your Python script using the import statement,
-def main():
-    st.set_page_config(page_title="Document seemless process ")
-    st.title("Auto text extraction with AI Planet ")
-    st.subheader("I can help you in extracting text from pdf,documents ....")
-    # Upload the Invoices (pdf files)...
-    pdf = st.file_uploader("Upload invoices here for now, only PDF files allowed and will accept other formate as well", type=["pdf"],accept_multiple_files=True)
-    submit=st.button("Extract Data")
-    response = 4+5
-    if submit:
-        with st.spinner('Wait for it...'):
-            st.subheader("Answer:")
-        st.write(response)
-#Invoking main function
-if __name__ == '__main__':
-    main()

RD/AI_full_stack_repository/llm/wrapper.py DELETED Viewed

@@ -1,51 +0,0 @@
-import box
-import yaml
-from langchain.prompts import PromptTemplate
-from langchain.chains import RetrievalQA
-# from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
-from llm.prompts import qa_template
-from llm.llm import setup_llm
-from langchain_community.embeddings.sentence_transformer import (
-    SentenceTransformerEmbeddings,
-)
-from langchain.vectorstores import Chroma
-# Import config vars
-with open('config.yml', 'r', encoding='utf8') as ymlfile:
-    cfg = box.Box(yaml.safe_load(ymlfile))
-def set_qa_prompt():
-    """
-    Prompt template for QA retrieval for each vectorstore
-    """
-    prompt = PromptTemplate(template=qa_template,
-                            input_variables=['context', 'question'])
-    return prompt
-def build_retrieval_qa_chain(llm, prompt):
-    # create the open-source embedding function
-    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
-    # load from disk
-    chromadb = Chroma(persist_directory="./vectorestore/db_faiss", embedding_function=embedding_function)
-    retriever = chromadb.as_retriever(search_kwargs={'k': cfg.VECTOR_COUNT})
-    qa_chain = RetrievalQA.from_chain_type(llm=llm,
-                                           chain_type='stuff',
-                                           retriever=retriever,
-                                           return_source_documents=cfg.RETURN_SOURCE_DOCUMENTS,
-                                           chain_type_kwargs={'prompt': prompt})
-    return qa_chain
-def setup_qa_chain():
-    llm = setup_llm()
-    qa_prompt = set_qa_prompt()
-    qa_chain = build_retrieval_qa_chain(llm, qa_prompt)
-    return qa_chain
-def query_embeddings(query):
-    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})
-    chromadb = Chroma(persist_directory="./vectorestore/db_faiss", embedding_function=embedding_function)
-    retriever = chromadb.as_retriever(search_kwargs={'k': cfg.VECTOR_COUNT})
-    semantic_search = retriever.similarity_search_with_relevance_scores(query)
-    return semantic_search

RD/AI_full_stack_repository/models/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/AI_full_stack_repository/models/model_download.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Download the quantized mistral-7b-instruct-v0.1.Q5_K_M.gguf model from: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/tree/main

RD/AI_full_stack_repository/requirements.txt DELETED Viewed

@@ -1,13 +0,0 @@
-streamlit==1.29.0
-langchain==0.1.13
-# unstructured==0.12.3
-tiktoken==0.5.2
-pypdf==4.1.0
-sentence-transformers==2.5.1
-langchain-community
-langchain-chroma
-numpy==1.26.1
-python-box
-llama-cpp-python==0.2.76
-# pdfservices-sdk==4.0.0
-watchdog==4.0.1

RD/AI_full_stack_repository/screenshot_images/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/AI_full_stack_repository/screenshot_images/invoice_image.png DELETED Viewed

Binary file (220 kB)

RD/AI_full_stack_repository/vectorestore/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/AI_full_stack_repository/vectorestore/chroma/.DS_Store DELETED Viewed

Binary file (6.15 kB)

RD/README.md DELETED Viewed

@@ -1,13 +0,0 @@
----
-title: RD
-emoji: 📊
-colorFrom: indigo
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.35.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference