Spaces:

juan-demo
/

amtrak-acela-rmm-demo

Sleeping

App Files Files Community

juan-demo commited on Sep 22, 2024

Commit

9e773ef

verified ·

1 Parent(s): dc5fd20

Upload 3 files

Browse files

Files changed (3) hide show

.env +4 -0
app.py +139 -63
requirements.txt +11 -1

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+AWS_ACCESS_KEY_ID=AKIAS55UQNUUDASJBMTY
+AWS_SECRET_ACCESS_KEY=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
+PINECONE_API_KEY=c20a0dc9-e5b2-4d70-87c1-0835feb1e920
+OPENAI_API_KEY=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

app.py CHANGED Viewed

@@ -1,63 +1,139 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+import openai
+import pinecone
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.docstore.document import Document
+from langchain.prompts import PromptTemplate
+from langchain.memory import ConversationBufferMemory
+import boto3
+import os
+from time import sleep
+from dotenv import load_dotenv
+import gradio as gr
+# Load environment variables
+load_dotenv()
+# Load OpenAI and Pinecone API keys from environment variables
+openai.api_key = os.getenv("OPENAI_API_KEY")
+pinecone_api_key = os.getenv("PINECONE_API_KEY")
+aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
+aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+# Download the combined extracted text file from S3
+s3_client = boto3.client('s3',
+                         aws_access_key_id=aws_access_key,
+                         aws_secret_access_key=aws_secret_key,
+                         region_name='us-east-1')
+bucket_name = 'amtrak-superliner-ai-poc'  # Replace with your S3 bucket name
+txt_file_name = 'combined_extracted_text.txt'  # Name of the text file stored in S3
+local_txt_path = f'/tmp/{txt_file_name}'  # Temporary location to store the file locally
+# Download the text file from S3
+s3_client.download_file(bucket_name, txt_file_name, local_txt_path)
+# Load the extracted text from the text file
+with open(local_txt_path, 'r') as f:
+    doc = f.read()
+# Split the document into smaller chunks (increase chunk size as needed)
+text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500)
+docs = [Document(page_content=doc)]
+split_docs = text_splitter.split_documents(docs)
+# Initialize the HuggingFace SciBERT model for embedding
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
+# Create embeddings for the document chunks
+doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs]
+# Initialize Pinecone client using the old structure you prefer
+pc = pinecone.Pinecone(api_key=pinecone_api_key)
+# Create Pinecone index if it doesn't exist
+index_name = "amtrak-acela-ai-demo"
+embedding_dim = 768  # For SciBERT model
+if index_name not in pc.list_indexes().names():
+    # Create Pinecone index if it doesn't exist
+    pc.create_index(
+        name=index_name,
+        dimension=embedding_dim,
+        metric="cosine",
+        spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
+    )
+# Connect to the Pinecone index
+index = pc.Index(index_name)
+# Upload document embeddings to Pinecone with metadata
+for i, doc in enumerate(split_docs):
+    index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})])
+# Set up conversation memory
+memory = ConversationBufferMemory()
+# Define a prompt template for retrieval-augmented generation (RAG)
+RAG_PROMPT_TEMPLATE = '''
+Here is some important context that can help inform the Human's question:
+{context}
+Human: {human_input}
+Please provide a specific and accurate answer based on the provided context.
+Assistant:
+'''
+PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
+def get_model_response(human_input, chat_history=None):  # Add the second argument to handle chat history
+    try:
+        # Step 1: Embed the user input
+        query_embedding = embedding_model.embed_query(human_input)
+        # Step 2: Query Pinecone using the embedding vector
+        search_results = index.query(
+            vector=query_embedding,
+            top_k=5,
+            include_metadata=True  # Ensures metadata is included in the results
+        )
+        # Step 3: Extract relevant context (actual document content) from the search results
+        context_list = []
+        for ind, result in enumerate(search_results['matches']):
+            document_content = result.get('metadata', {}).get('content', 'No content found')
+            context_list.append(f"Document {ind+1}: {document_content}")
+        # Combine context into a string
+        context_string = '\n\n'.join(context_list)
+        # Step 4: Call OpenAI ChatCompletion API for responses
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}
+        ]
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            max_tokens=400,
+            temperature=0.7
+        )
+        # Extract and return the model’s output
+        output_text = response['choices'][0]['message']['content'].strip()
+        return output_text
+    except Exception as e:
+        return f"Error invoking model: {str(e)}"
+# Gradio ChatInterface
+gr_interface = gr.ChatInterface(
+    fn=get_model_response,
+    title="Amtrak Acela RMM Maintenance Assistant",
+    description="Ask questions related to the RMMM documents."
+)
+# Launch the Gradio app on Hugging Face Spaces
+gr_interface.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,11 @@
1	- ~~huggingface_hub~~==0.~~22.2~~

+openai==0.28
+pinecone-client
+langchain
+langchain-huggingface
+boto3
+gradio
+python-dotenv
+sentence-transformers
+transformers
+langchain-community
+poppler-utils