Spaces:

AmeerH
/

TEST_MTM

Runtime error

App Files Files Community

AmeerH commited on Oct 19, 2023

Commit

f1cccc9

1 Parent(s): 23f91d6

Upload 3 files

Browse files

Files changed (3) hide show

MTM_Memoir_txt.txt +0 -0
app.py +153 -0
requirements.txt +128 -0

MTM_Memoir_txt.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# We are going to develop the code for the RAG here. This is going to be the first and the only attempt IA!!
+# To create the POC
+# ! We need to do the following,
+# Convert the PDF to Embeddings and save it into a vector database.
+# Load LLAMA 2
+# Connect LLAMA 2 to the vector database.
+# Ask Questions and give answers.
+# ! LLAMA IS LOADED
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import BitsAndBytesConfig
+import torch
+import json
+from torch import cuda
+import torch
+import transformers
+from time import time
+import chromadb
+from chromadb.config import Settings
+from langchain.llms import huggingface_pipeline
+from langchain.document_loaders import TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chains import RetrievalQA
+from langchain.vectorstores.chroma import Chroma
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+# Change the model path here to test any other model.
+# model_path = 'training_date_02_10_2023_psql/final_merged_checkpoint'
+model_path = 'Llama-13b-chat'
+tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                local_files_only=True,
+)
+model = AutoModelForCausalLM.from_pretrained(
+                model_path,
+                local_files_only=True,
+                low_cpu_mem_usage=True,
+                device_map="auto",
+                offload_folder="offload/",
+                cache_dir="cache/",
+                quantization_config=nf4_config # forgot this on the first try so full model was loaded.
+)
+model_config = transformers.AutoConfig.from_pretrained(model_path)
+# define query huggingface_pipeline
+query_pipeline = transformers.pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+llm = huggingface_pipeline.HuggingFacePipeline(pipeline=query_pipeline)
+# Ingestion of data using text loader
+loader = TextLoader("MTM_Memoir_txt.txt", encoding="utf-8")
+documents = loader.load()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
+all_splits = text_splitter.split_documents(documents)
+# let's create the embeddings and store in vector store
+model_name = "sentence-transformers/all-mpnet-base-v2"
+model_kwargs = {"device": "cuda"}
+embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
+# initialize chromadb
+vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")
+# Initialize the chain
+retriever = vectordb.as_retriever()
+qa = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever=retriever,
+    verbose=True
+)
+# let's test the RAG
+def test_rag(qa, query):
+    print(query)
+    result = qa.run(query)
+    print(f"Result \t {result}")
+test_rag(qa, "Hello when were you born?")
+def preprocess_query(query):
+    # load the query as a dict
+    # res = json.loads(str(query))
+    # human_language = res['human_language']
+    # SQL_TABLE_CONTEXT = "CREATE TABLE properties (address character, details characterstate character, property_type character, price integer, bedrooms integer, bathrooms integer, sqft integer)"
+    # INTRO = f"<s>[INST] <<SYS>> \
+    #         You are a helpful, genius data scientist, who has access to a database that contains listing of properties in New York. Your job is to write PostgreSQL Query to fetch data based on User Request and Parameters. If you can't reply correctly just say that not enough information was provided \n\n<</SYS>>"
+    INTRO = f"<s>[INST] <<SYS>>You are former Malaysian Prime Minister Tun Dr Mahathir Mohamad.. A visionary leader \n\n<</SYS>>"
+    INSTRUCTION = f"### Instruction\n Respond to the following query by your subject {query} Just like yourself. \n\n"
+    RESPONSE = f"### Response:\n\n"
+    final_payload = INTRO + INSTRUCTION + RESPONSE
+    payload_length = len(final_payload)
+    return final_payload
+def get_result(qa=qa, query = ""):
+    return qa.run(query)
+def predict(query):
+    processed_query = preprocess_query(query=query)
+    result = get_result(query=processed_query)
+    return(result)
+# ! The following will also work now! I mistakenly wrote ap_name insted of api_name in the submit_button.click()
+with gr.Blocks() as sql_generator:
+    query = gr.Textbox(label="Query", placeholder='Ask the president?')
+    output = gr.Textbox(label="Output")
+    submit_button = gr.Button("Submit")
+    submit_button.click(fn=predict,
+                        inputs=query,
+                        outputs=output, api_name="predict"
+                        )
+sql_generator.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,128 @@

+accelerate==0.23.0
+aiofiles==23.2.1
+aiohttp==3.8.6
+aiosignal==1.3.1
+altair==5.1.2
+annotated-types==0.6.0
+anyio==3.7.1
+async-timeout==4.0.3
+attrs==23.1.0
+backoff==2.2.1
+bcrypt==4.0.1
+bitsandbytes==0.41.1
+certifi==2023.7.22
+charset-normalizer==3.3.0
+chroma-hnswlib==0.7.3
+chromadb==0.4.14
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.1.1
+cycler==0.12.1
+dataclasses-json==0.6.1
+einops==0.7.0
+exceptiongroup==1.1.3
+fastapi==0.103.2
+ffmpy==0.3.1
+filelock==3.12.4
+flatbuffers==23.5.26
+fonttools==4.43.1
+frozenlist==1.4.0
+fsspec==2023.9.2
+gradio==3.47.1
+gradio_client==0.6.0
+greenlet==3.0.0
+grpcio==1.59.0
+h11==0.14.0
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
+huggingface-hub==0.17.3
+humanfriendly==10.0
+idna==3.4
+importlib-resources==6.1.0
+Jinja2==3.1.2
+joblib==1.3.2
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+langchain==0.0.315
+langsmith==0.0.43
+MarkupSafe==2.1.3
+marshmallow==3.20.1
+matplotlib==3.8.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+networkx==3.1
+nltk==3.8.1
+numpy==1.26.1
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.2.140
+nvidia-nvtx-cu12==12.1.105
+onnxruntime==1.16.1
+orjson==3.9.9
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.1
+Pillow==10.1.0
+posthog==3.0.2
+protobuf==4.24.4
+psutil==5.9.6
+pulsar-client==3.3.0
+pydantic==2.4.2
+pydantic_core==2.10.1
+pydub==0.25.1
+pyparsing==3.1.1
+PyPika==0.48.9
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
+rpds-py==0.10.6
+safetensors==0.4.0
+scikit-learn==1.3.1
+scipy==1.11.3
+semantic-version==2.10.0
+sentence-transformers==2.2.2
+sentencepiece==0.1.99
+six==1.16.0
+sniffio==1.3.0
+SQLAlchemy==2.0.22
+starlette==0.27.0
+sympy==1.12
+tenacity==8.2.3
+threadpoolctl==3.2.0
+tokenizers==0.14.1
+toolz==0.12.0
+torch==2.1.0
+torchvision==0.16.0
+tqdm==4.66.1
+transformers==4.34.0
+triton==2.1.0
+typer==0.9.0
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+urllib3==2.0.6
+uvicorn==0.23.2
+uvloop==0.18.0
+watchfiles==0.21.0
+websockets==11.0.3
+xformers==0.0.22.post4
+yarl==1.9.2