juan-demo commited on
Commit
9e773ef
·
verified ·
1 Parent(s): dc5fd20

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +4 -0
  2. app.py +139 -63
  3. requirements.txt +11 -1
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ AWS_ACCESS_KEY_ID=AKIAS55UQNUUDASJBMTY
2
+ AWS_SECRET_ACCESS_KEY=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
3
+ PINECONE_API_KEY=c20a0dc9-e5b2-4d70-87c1-0835feb1e920
4
+ OPENAI_API_KEY=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
app.py CHANGED
@@ -1,63 +1,139 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import pinecone
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.docstore.document import Document
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.memory import ConversationBufferMemory
8
+ import boto3
9
+ import os
10
+ from time import sleep
11
+ from dotenv import load_dotenv
12
+ import gradio as gr
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Load OpenAI and Pinecone API keys from environment variables
18
+ openai.api_key = os.getenv("OPENAI_API_KEY")
19
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
20
+ aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
21
+ aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
22
+
23
+ # Download the combined extracted text file from S3
24
+ s3_client = boto3.client('s3',
25
+ aws_access_key_id=aws_access_key,
26
+ aws_secret_access_key=aws_secret_key,
27
+ region_name='us-east-1')
28
+
29
+ bucket_name = 'amtrak-superliner-ai-poc' # Replace with your S3 bucket name
30
+ txt_file_name = 'combined_extracted_text.txt' # Name of the text file stored in S3
31
+ local_txt_path = f'/tmp/{txt_file_name}' # Temporary location to store the file locally
32
+
33
+ # Download the text file from S3
34
+ s3_client.download_file(bucket_name, txt_file_name, local_txt_path)
35
+
36
+ # Load the extracted text from the text file
37
+ with open(local_txt_path, 'r') as f:
38
+ doc = f.read()
39
+
40
+ # Split the document into smaller chunks (increase chunk size as needed)
41
+ text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500)
42
+ docs = [Document(page_content=doc)]
43
+ split_docs = text_splitter.split_documents(docs)
44
+
45
+ # Initialize the HuggingFace SciBERT model for embedding
46
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
47
+
48
+ # Create embeddings for the document chunks
49
+ doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs]
50
+
51
+ # Initialize Pinecone client using the old structure you prefer
52
+ pc = pinecone.Pinecone(api_key=pinecone_api_key)
53
+
54
+ # Create Pinecone index if it doesn't exist
55
+ index_name = "amtrak-acela-ai-demo"
56
+ embedding_dim = 768 # For SciBERT model
57
+ if index_name not in pc.list_indexes().names():
58
+ # Create Pinecone index if it doesn't exist
59
+ pc.create_index(
60
+ name=index_name,
61
+ dimension=embedding_dim,
62
+ metric="cosine",
63
+ spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
64
+ )
65
+
66
+ # Connect to the Pinecone index
67
+ index = pc.Index(index_name)
68
+
69
+ # Upload document embeddings to Pinecone with metadata
70
+ for i, doc in enumerate(split_docs):
71
+ index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})])
72
+
73
+ # Set up conversation memory
74
+ memory = ConversationBufferMemory()
75
+
76
+ # Define a prompt template for retrieval-augmented generation (RAG)
77
+ RAG_PROMPT_TEMPLATE = '''
78
+ Here is some important context that can help inform the Human's question:
79
+
80
+ {context}
81
+
82
+ Human: {human_input}
83
+
84
+ Please provide a specific and accurate answer based on the provided context.
85
+ Assistant:
86
+ '''
87
+
88
+ PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
89
+
90
+ def get_model_response(human_input, chat_history=None): # Add the second argument to handle chat history
91
+ try:
92
+ # Step 1: Embed the user input
93
+ query_embedding = embedding_model.embed_query(human_input)
94
+
95
+ # Step 2: Query Pinecone using the embedding vector
96
+ search_results = index.query(
97
+ vector=query_embedding,
98
+ top_k=5,
99
+ include_metadata=True # Ensures metadata is included in the results
100
+ )
101
+
102
+ # Step 3: Extract relevant context (actual document content) from the search results
103
+ context_list = []
104
+ for ind, result in enumerate(search_results['matches']):
105
+ document_content = result.get('metadata', {}).get('content', 'No content found')
106
+ context_list.append(f"Document {ind+1}: {document_content}")
107
+
108
+ # Combine context into a string
109
+ context_string = '\n\n'.join(context_list)
110
+
111
+ # Step 4: Call OpenAI ChatCompletion API for responses
112
+ messages = [
113
+ {"role": "system", "content": "You are a helpful assistant."},
114
+ {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}
115
+ ]
116
+
117
+ response = openai.ChatCompletion.create(
118
+ model="gpt-3.5-turbo",
119
+ messages=messages,
120
+ max_tokens=400,
121
+ temperature=0.7
122
+ )
123
+
124
+ # Extract and return the model’s output
125
+ output_text = response['choices'][0]['message']['content'].strip()
126
+ return output_text
127
+
128
+ except Exception as e:
129
+ return f"Error invoking model: {str(e)}"
130
+
131
+ # Gradio ChatInterface
132
+ gr_interface = gr.ChatInterface(
133
+ fn=get_model_response,
134
+ title="Amtrak Acela RMM Maintenance Assistant",
135
+ description="Ask questions related to the RMMM documents."
136
+ )
137
+
138
+ # Launch the Gradio app on Hugging Face Spaces
139
+ gr_interface.launch()
requirements.txt CHANGED
@@ -1 +1,11 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
1
+ openai==0.28
2
+ pinecone-client
3
+ langchain
4
+ langchain-huggingface
5
+ boto3
6
+ gradio
7
+ python-dotenv
8
+ sentence-transformers
9
+ transformers
10
+ langchain-community
11
+ poppler-utils