asadmajeed commited on
Commit
fe6efc9
·
verified ·
1 Parent(s): 9c77d70

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from llama_index.llms.llama_cpp import LlamaCPP
6
+ from llama_index.llms.llama_cpp.llama_utils import (
7
+ messages_to_prompt,
8
+ completion_to_prompt,
9
+ )
10
+
11
+ model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
12
+ llm = LlamaCPP(
13
+ # You can pass in the URL to a GGML model to download it automatically
14
+ model_url=model_url,
15
+ temperature=0.1,
16
+ max_new_tokens=256,
17
+ context_window=2048,
18
+ # kwargs to pass to __call__()
19
+ generate_kwargs={},
20
+ # kwargs to pass to __init__()
21
+ # set to at least 1 to use GPU
22
+ model_kwargs={"n_gpu_layers": 1},
23
+ # transform inputs into Llama2 format
24
+ messages_to_prompt=messages_to_prompt,
25
+ completion_to_prompt=completion_to_prompt,
26
+ verbose=True,
27
+ )
28
+ # Initialize embeddings and LLM
29
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
30
+
31
+ def initialize_index():
32
+ """Initialize the vector store index from PDF files in the data directory"""
33
+ # Load documents from the data directory
34
+ loader = SimpleDirectoryReader(
35
+ input_dir="data",
36
+ required_exts=[".pdf"]
37
+ )
38
+ documents = loader.load_data()
39
+
40
+ # Create index
41
+ index = VectorStoreIndex.from_documents(
42
+ documents,
43
+ embed_model=embeddings,
44
+ )
45
+
46
+ # Return query engine with Llama
47
+ return index.as_query_engine(llm=llm)
48
+
49
+ # Initialize the query engine at startup
50
+ query_engine = initialize_index()
51
+
52
+ def process_query(
53
+ message: str,
54
+ history: list[tuple[str, str]],
55
+ ) -> str:
56
+ """Process a query using the RAG system"""
57
+ try:
58
+ # Get response from the query engine
59
+ response = query_engine.query(
60
+ message,
61
+ #streaming=True
62
+ )
63
+ return str(response)
64
+ except Exception as e:
65
+ return f"Error processing query: {str(e)}"
66
+
67
+ # Create the Gradio interface
68
+ demo = gr.ChatInterface(
69
+ process_query,
70
+ title="PDF Question Answering with RAG + Llama",
71
+ description="Ask questions about the content of the loaded PDF documents using Llama model",
72
+ #undo_btn="Delete Previous",
73
+ #clear_btn="Clear",
74
+ )
75
+
76
+ if __name__ == "__main__":
77
+ demo.launch(debug=True)