Deepakraj2006 commited on
Commit
f179f1d
Β·
verified Β·
1 Parent(s): fe41991

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -49
app.py CHANGED
@@ -1,14 +1,19 @@
1
  import os
2
  import gradio as gr
3
  import torch
 
4
  from langchain.chains import ConversationalRetrievalChain
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
  from langchain_community.vectorstores import Chroma
9
  from langchain_community.llms import HuggingFacePipeline
10
  from transformers import pipeline
11
 
 
 
 
 
12
  # Set Hugging Face Cache Directory
13
  os.environ["HF_HOME"] = "/tmp/huggingface_cache"
14
 
@@ -22,74 +27,53 @@ llm_pipeline = None
22
  embeddings = None
23
  persist_directory = "/tmp/chroma_db" # Storage for vector DB
24
 
25
-
26
  def init_llm():
27
  """Initialize LLM and Embeddings"""
28
  global llm_pipeline, embeddings
29
-
30
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
31
  if not hf_token:
32
  raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in environment variables.")
33
-
34
- model_id = "tiiuae/falcon-rw-1b"
35
-
36
  hf_pipeline = pipeline("text-generation", model=model_id, device=DEVICE)
37
  llm_pipeline = HuggingFacePipeline(pipeline=hf_pipeline)
38
-
39
  embeddings = HuggingFaceEmbeddings(
40
  model_name="sentence-transformers/all-MiniLM-L6-v2",
41
  model_kwargs={"device": DEVICE}
42
  )
43
-
44
-
45
- import time
46
 
47
  def process_document(file):
 
48
  global conversation_retrieval_chain
49
-
50
  if not llm_pipeline or not embeddings:
51
  init_llm()
52
-
53
- start_time = time.time()
54
- print(f"πŸ“‚ Uploading PDF: {file.name}")
55
-
56
  try:
57
- # βœ… Ensure file is saved correctly
58
- file_path = os.path.join("/tmp/uploads", file.name)
59
- with open(file_path, "wb") as f:
60
- f.write(file.read())
61
- print(f"βœ… PDF saved at {file_path} in {time.time() - start_time:.2f}s")
62
-
63
- # βœ… Load PDF
64
- start_time = time.time()
65
- loader = PyPDFLoader(file_path)
66
- documents = loader.load()
67
- print(f"βœ… PDF loaded in {time.time() - start_time:.2f}s")
68
 
69
- # βœ… Split text
70
- start_time = time.time()
71
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
72
  texts = text_splitter.split_documents(documents)
73
- print(f"βœ… Text split in {time.time() - start_time:.2f}s")
74
 
75
- # βœ… Create ChromaDB
76
- start_time = time.time()
77
- db = Chroma.from_documents(texts, embedding=embeddings, persist_directory="/tmp/chroma_db")
78
- print(f"βœ… ChromaDB created in {time.time() - start_time:.2f}s")
79
 
80
- # βœ… Create retrieval chain
81
  conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
82
- llm=llm_pipeline, retriever=db.as_retriever()
83
  )
84
- print("βœ… Document processing complete!")
85
-
86
  return "πŸ“„ PDF uploaded and processed successfully! You can now ask questions."
87
 
88
  except Exception as e:
89
- print(f"❌ Error processing PDF: {str(e)}")
90
- return f"Error: {str(e)}"
91
-
92
-
93
 
94
  def process_prompt(prompt, chat_history_display):
95
  """Generate a response using the retrieval chain"""
@@ -102,24 +86,21 @@ def process_prompt(prompt, chat_history_display):
102
  answer = output["answer"]
103
 
104
  chat_history.append((prompt, answer))
105
-
106
  return chat_history
107
 
108
-
109
  # Define Gradio UI
110
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
111
  gr.Markdown("<h1 style='text-align: center;'>Personal Data Assistant</h1>")
112
-
113
  with gr.Row():
114
  dark_mode = gr.Checkbox(label="πŸŒ™ Toggle light/dark mode")
115
 
116
- with gr.Column(): # βœ… Replace `gr.Box()` with `gr.Column()`
117
  gr.Markdown("Hello there! I'm your friendly data assistant, ready to answer any questions regarding your data. Could you please upload a PDF file for me to analyze?")
118
  file_input = gr.File(label="Upload File")
119
  upload_button = gr.Button("οΏ½οΏ½οΏ½οΏ½ Upload File")
120
-
121
  status_output = gr.Textbox(label="Status", interactive=False)
122
-
123
  chat_history_display = gr.Chatbot(label="Chat History")
124
 
125
  with gr.Row():
@@ -134,4 +115,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
134
 
135
  # Launch Gradio App
136
  if __name__ == "__main__":
137
- demo.launch(share=True)
 
1
  import os
2
  import gradio as gr
3
  import torch
4
+ import logging
5
  from langchain.chains import ConversationalRetrievalChain
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain.document_loaders import PyMuPDFLoader # βœ… More stable PDF loader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.vectorstores import Chroma
10
  from langchain_community.llms import HuggingFacePipeline
11
  from transformers import pipeline
12
 
13
+ # Setup Logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
  # Set Hugging Face Cache Directory
18
  os.environ["HF_HOME"] = "/tmp/huggingface_cache"
19
 
 
27
  embeddings = None
28
  persist_directory = "/tmp/chroma_db" # Storage for vector DB
29
 
 
30
  def init_llm():
31
  """Initialize LLM and Embeddings"""
32
  global llm_pipeline, embeddings
33
+
34
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
35
  if not hf_token:
36
  raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in environment variables.")
37
+
38
+ model_id = "tiiuae/falcon-rw-1b" # βœ… Can switch to "tiiuae/falcon-rw-1b" for lighter model
 
39
  hf_pipeline = pipeline("text-generation", model=model_id, device=DEVICE)
40
  llm_pipeline = HuggingFacePipeline(pipeline=hf_pipeline)
41
+
42
  embeddings = HuggingFaceEmbeddings(
43
  model_name="sentence-transformers/all-MiniLM-L6-v2",
44
  model_kwargs={"device": DEVICE}
45
  )
46
+ logger.info("βœ… LLM and Embeddings Initialized Successfully!")
 
 
47
 
48
  def process_document(file):
49
+ """Process uploaded PDF and create a retriever"""
50
  global conversation_retrieval_chain
51
+
52
  if not llm_pipeline or not embeddings:
53
  init_llm()
54
+
 
 
 
55
  try:
56
+ file_path = file.name # βœ… Ensures correct file path is passed
57
+ logger.info(f"πŸ“‚ Processing PDF: {file_path}")
 
 
 
 
 
 
 
 
 
58
 
59
+ loader = PyMuPDFLoader(file_path) # βœ… Alternative loader for stability
60
+ documents = loader.load()
61
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
62
  texts = text_splitter.split_documents(documents)
 
63
 
64
+ # Load or create ChromaDB
65
+ db = Chroma.from_documents(texts, embedding=embeddings, persist_directory=persist_directory)
66
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 6})
 
67
 
 
68
  conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
69
+ llm=llm_pipeline, retriever=retriever
70
  )
71
+ logger.info("βœ… PDF Processed Successfully!")
 
72
  return "πŸ“„ PDF uploaded and processed successfully! You can now ask questions."
73
 
74
  except Exception as e:
75
+ logger.error(f"❌ Error processing PDF: {str(e)}")
76
+ return f"❌ Error processing PDF: {str(e)}"
 
 
77
 
78
  def process_prompt(prompt, chat_history_display):
79
  """Generate a response using the retrieval chain"""
 
86
  answer = output["answer"]
87
 
88
  chat_history.append((prompt, answer))
 
89
  return chat_history
90
 
 
91
  # Define Gradio UI
92
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
93
  gr.Markdown("<h1 style='text-align: center;'>Personal Data Assistant</h1>")
94
+
95
  with gr.Row():
96
  dark_mode = gr.Checkbox(label="πŸŒ™ Toggle light/dark mode")
97
 
98
+ with gr.Column():
99
  gr.Markdown("Hello there! I'm your friendly data assistant, ready to answer any questions regarding your data. Could you please upload a PDF file for me to analyze?")
100
  file_input = gr.File(label="Upload File")
101
  upload_button = gr.Button("οΏ½οΏ½οΏ½οΏ½ Upload File")
102
+
103
  status_output = gr.Textbox(label="Status", interactive=False)
 
104
  chat_history_display = gr.Chatbot(label="Chat History")
105
 
106
  with gr.Row():
 
115
 
116
  # Launch Gradio App
117
  if __name__ == "__main__":
118
+ demo.launch(server_name="0.0.0.0", server_port=7860) # βœ… Works in Hugging Face Spaces