Spaces:

bhuvanmdev
/

QA_document

Sleeping

App Files Files Community

bhuvanmdev commited on Nov 24, 2024

Commit

fe9b76b

verified ·

1 Parent(s): 21a0347

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -97

app.py CHANGED Viewed

@@ -8,22 +8,17 @@ from enum import Enum
 import gradio as gr
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
-from langchain.prompts import PromptTemplate
 from langchain.schema import BaseRetriever
 from langchain.embeddings.base import Embeddings
 from langchain.llms.base import BaseLanguageModel
 import PyPDF2
 # Install required packages
 # Initialize models
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer,  BitsAndBytesConfig
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
-from langchain_community.llms import HuggingFacePipeline
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
 embed_model = HuggingFaceBgeEmbeddings(
     model_name="all-MiniLM-L6-v2",#"dunzhang/stella_en_1.5B_v5",
     model_kwargs={'device': 'cpu'},
@@ -31,34 +26,10 @@ embed_model = HuggingFaceBgeEmbeddings(
 )
 model_name = "meta-llama/Llama-3.2-3B-Instruct"#"google/gemma-2-2b-it"#"prithivMLmods/Llama-3.2-3B-GGUF"
-from huggingface_hub import InferenceClient
-client = InferenceClient(model_name)
-# tokenizer = AutoTokenizer.from_pretrained(model_name)
-# model = AutoModelForCausalLM.from_pretrained(
-#     model_name,
-#     trust_remote_code=True,
-#     use_auth_token=True
-# )
-# pipe = pipeline(
-#     "text-generation",
-#     model=model,
-#     tokenizer=tokenizer,
-#     max_new_tokens=2048*2,
-#     temperature=0.3,
-#     top_p=0.95,
-#     generation_config=model.generation_config
-#     # repetition_penalty=1.15
-# )
-# llm = HuggingFacePipeline(pipeline=pipe)
-# model.generation_config.pad_token_id = model.generation_config.eos_token_id
-# embed_model = embedding_model
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -71,24 +42,14 @@ class DocumentFormat(Enum):
 @dataclass
 class RAGConfig:
     """Configuration for RAG system parameters"""
-    chunk_size: int = 500
-    chunk_overlap: int = 100
     retriever_k: int = 3
     persist_directory: str = "./chroma_db"
 class AdvancedRAGSystem:
     """Advanced RAG System with improved error handling and type safety"""
-    DEFAULT_TEMPLATE = """<|start_header_id|>system<|end_header_id|>
-You are a helpful assistant. Use the following pieces of context to answer the question at the end.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
-Context:
-{context}
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-"""
     def __init__(
         self,
@@ -104,10 +65,6 @@ Context:
         self.last_context: Optional[str] = None
         self.context = None
         self.source_documents = 0
-        # self.prompt = PromptTemplate(
-        #     template=self.DEFAULT_TEMPLATE,
-        #     input_variables=["context", "question"]
-        # )
     def _validate_file(self, file_path: Path) -> bool:
         """Validate if the file is of supported format and exists"""
@@ -191,48 +148,44 @@ Context:
             retrieved_docs = retriever.get_relevant_documents(question)
             context = self._format_context(retrieved_docs)
             self.last_context = context
             messages = [
     {
         "role":"system",
-        "content":f"""<|start_header_id|>system<|end_header_id|>
-You are a helpful assistant. Use the following pieces of context to answer the question at the end.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
 Context:
 {context}
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """
     },
 	{
 		"role": "user",
-		"content": "What is the capital of France?"
 	}
 ]
-            self.context = context
-            self.source_documents = len(retrieved_docs)
-            # Generate response using LLM ###########
-            # response = self.llm.invoke(
-            #     self.prompt.format(
-            #         context=context,
-            #         question=question
-            #     )
-            # )
-            for x in self.llm.chat.completions.create(
-                        model=model_name,
-                    	messages=messages,
-                    	max_tokens=500,
-                        stream=True
-                        ):
-                yield x
         except Exception as e:
             error_msg = f"Error during query processing: {str(e)}"
             logger.error(error_msg)
-            raise RuntimeError(error_msg)
 def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
     """Create an improved Gradio interface for the RAG system"""
@@ -274,14 +227,14 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
                     chunk_size = gr.Slider(
                         minimum=100,
                         maximum=10000,
-                        value=500,
                         step=100,
                         label="Chunk Size"
                     )
                     overlap = gr.Slider(
                         minimum=10,
                         maximum=5000,
-                        value=100,
                         step=10,
                         label="Chunk Overlap"
                     )
@@ -315,40 +268,20 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
         )
         query_button.click(
-            fn=query_fin,
             inputs=[question_input],
             outputs=[answer_output],
             api_name="stream_response",
-            show_progress=False
-        )
-        query_button.click(
-            fn=update_history,
             inputs=[question_input],
-            outputs=[history_output]
         )
     return demo
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-# demo = gr.ChatInterface(
-#     respond,
-#     additional_inputs=[
-#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-#         gr.Slider(
-#             minimum=0.1,
-#             maximum=1.0,
-#             value=0.95,
-#             step=0.05,
-#             label="Top-p (nucleus sampling)",
-#         ),
-#     ],
-# )
 rag_system = AdvancedRAGSystem(embed_model, client)
 demo = create_gradio_interface(rag_system)

 import gradio as gr
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain.schema import BaseRetriever
 from langchain.embeddings.base import Embeddings
 from langchain.llms.base import BaseLanguageModel
 import PyPDF2
+from huggingface_hub import InferenceClient
 # Install required packages
 # Initialize models
 import torch
 from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 embed_model = HuggingFaceBgeEmbeddings(
     model_name="all-MiniLM-L6-v2",#"dunzhang/stella_en_1.5B_v5",
     model_kwargs={'device': 'cpu'},
 )
 model_name = "meta-llama/Llama-3.2-3B-Instruct"#"google/gemma-2-2b-it"#"prithivMLmods/Llama-3.2-3B-GGUF"
+client = InferenceClient(model_name)
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 @dataclass
 class RAGConfig:
     """Configuration for RAG system parameters"""
+    chunk_size: int = 100
+    chunk_overlap: int = 10
     retriever_k: int = 3
     persist_directory: str = "./chroma_db"
 class AdvancedRAGSystem:
     """Advanced RAG System with improved error handling and type safety"""
     def __init__(
         self,
         self.last_context: Optional[str] = None
         self.context = None
         self.source_documents = 0
     def _validate_file(self, file_path: Path) -> bool:
         """Validate if the file is of supported format and exists"""
             retrieved_docs = retriever.get_relevant_documents(question)
             context = self._format_context(retrieved_docs)
             self.last_context = context
+            self.context = context
+            self.source_documents = len(retrieved_docs)
             messages = [
     {
         "role":"system",
+        "content":f"""You are a helpful assistant. Use the following pieces of context to answer the question at the end.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
 Context:
 {context}
 """
     },
 	{
 		"role": "user",
+		"content": question
 	}
 ]
+            response_text = ""
+            for chunk in self.llm.chat.completions.create(
+                model=model_name,
+                messages=messages,
+                max_tokens=500,
+                stream=True
+            ):
+                if hasattr(chunk.choices[0].delta, 'content'):
+                    content = chunk.choices[0].delta.content
+                    if content is not None:
+                        response_text += content
+                        yield response_text
         except Exception as e:
             error_msg = f"Error during query processing: {str(e)}"
             logger.error(error_msg)
+            yield error_msg
 def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
     """Create an improved Gradio interface for the RAG system"""
                     chunk_size = gr.Slider(
                         minimum=100,
                         maximum=10000,
+                        value=100,
                         step=100,
                         label="Chunk Size"
                     )
                     overlap = gr.Slider(
                         minimum=10,
                         maximum=5000,
+                        value=10,
                         step=10,
                         label="Chunk Overlap"
                     )
         )
         query_button.click(
+            fn=query_streaming,
             inputs=[question_input],
             outputs=[answer_output],
             api_name="stream_response",
+            queue=False
+        ).then(
+            fn=update_context,
             inputs=[question_input],
+            outputs=[context_output]
         )
     return demo
 rag_system = AdvancedRAGSystem(embed_model, client)
 demo = create_gradio_interface(rag_system)