redfernstech commited on
Commit
1f7542b
·
verified ·
1 Parent(s): 9b29ee3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -121
app.py CHANGED
@@ -1,37 +1,54 @@
 
 
 
 
1
  import os
2
  import time
3
  from fastapi import FastAPI, Request
4
  from fastapi.responses import HTMLResponse
5
  from fastapi.staticfiles import StaticFiles
6
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
7
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
8
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
9
- from pydantic import BaseModel
 
 
 
 
10
  from fastapi.responses import JSONResponse
 
 
 
 
11
  import uuid # for generating unique IDs
12
  import datetime
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from fastapi.templating import Jinja2Templates
15
- from huggingface_hub import InferenceClient
16
  import json
17
- import re
18
  from deep_translator import GoogleTranslator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
20
 
21
-
22
- # Define Pydantic model for incoming request body
23
- class MessageRequest(BaseModel):
24
  message: str
25
  language: str
26
 
27
- repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
28
- llm_client = InferenceClient(
29
- model=repo_id,
30
- token=os.getenv("HF_TOKEN"),
31
- )
32
-
33
- os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
34
-
35
  app = FastAPI()
36
 
37
  @app.middleware("http")
@@ -40,8 +57,6 @@ async def add_security_headers(request: Request, call_next):
40
  response.headers["Content-Security-Policy"] = "frame-ancestors *; frame-src *; object-src *;"
41
  response.headers["X-Frame-Options"] = "ALLOWALL"
42
  return response
43
-
44
- # Allow CORS requests from any domain
45
  app.add_middleware(
46
  CORSMiddleware,
47
  allow_origins=["*"],
@@ -57,123 +72,29 @@ async def favicon():
57
  app.mount("/static", StaticFiles(directory="static"), name="static")
58
 
59
  templates = Jinja2Templates(directory="static")
60
-
61
- # Configure Llama index settings
62
- Settings.llm = HuggingFaceInferenceAPI(
63
- model_name="meta-llama/Meta-Llama-3-8B-Instruct",
64
- tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
65
- context_window=3000,
66
- token=os.getenv("HF_TOKEN"),
67
- max_new_tokens=512,
68
- generate_kwargs={"temperature": 0.1},
69
- )
70
- Settings.embed_model = HuggingFaceEmbedding(
71
- model_name="BAAI/bge-small-en-v1.5"
72
- )
73
-
74
- PERSIST_DIR = "db"
75
- PDF_DIRECTORY = 'data'
76
-
77
- # Ensure directories exist
78
- os.makedirs(PDF_DIRECTORY, exist_ok=True)
79
- os.makedirs(PERSIST_DIR, exist_ok=True)
80
- chat_history = []
81
- current_chat_history = []
82
-
83
- def data_ingestion_from_directory():
84
- documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
85
- storage_context = StorageContext.from_defaults()
86
- index = VectorStoreIndex.from_documents(documents)
87
- index.storage_context.persist(persist_dir=PERSIST_DIR)
88
-
89
- def initialize():
90
- start_time = time.time()
91
- data_ingestion_from_directory() # Process PDF ingestion at startup
92
- print(f"Data ingestion time: {time.time() - start_time} seconds")
93
-
94
- def split_name(full_name):
95
- # Split the name by spaces
96
- words = full_name.strip().split()
97
-
98
- # Logic for determining first name and last name
99
- if len(words) == 1:
100
- first_name = ''
101
- last_name = words[0]
102
- elif len(words) == 2:
103
- first_name = words[0]
104
- last_name = words[1]
105
- else:
106
- first_name = words[0]
107
- last_name = ' '.join(words[1:])
108
-
109
- return first_name, last_name
110
-
111
- initialize() # Run initialization tasks
112
-
113
- def handle_query(query):
114
- chat_text_qa_msgs = [
115
- (
116
- "user",
117
- """
118
- You are the Hotel voice chatbot and your name is hotel helper. Your goal is to provide accurate, professional, and helpful answers to user queries based on the hotel's data. Always ensure your responses are clear and concise. Give response within 10-15 words only. You need to give an answer in the same language used by the user.
119
- {context_str}
120
- Question:
121
- {query_str}
122
- """
123
- )
124
- ]
125
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
126
-
127
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
128
- index = load_index_from_storage(storage_context)
129
- context_str = ""
130
- for past_query, response in reversed(current_chat_history):
131
- if past_query.strip():
132
- context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
133
-
134
- query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
135
- print(query)
136
- answer = query_engine.query(query)
137
-
138
- if hasattr(answer, 'response'):
139
- response = answer.response
140
- elif isinstance(answer, dict) and 'response' in answer:
141
- response = answer['response']
142
- else:
143
- response = "Sorry, I couldn't find an answer."
144
- current_chat_history.append((query, response))
145
- return response
146
-
147
  @app.get("/ch/{id}", response_class=HTMLResponse)
148
  async def load_chat(request: Request, id: str):
149
  return templates.TemplateResponse("index.html", {"request": request, "user_id": id})
150
- @app.get("/voice/{id}", response_class=HTMLResponse)
151
- async def load_chat(request: Request, id: str):
152
- return templates.TemplateResponse("voice.html", {"request": request, "user_id": id})
153
-
154
-
155
-
156
  @app.post("/chat/")
157
  async def chat(request: MessageRequest):
158
  message = request.message # Access the message from the request body
159
  language = request.language
160
  language_code = request.language.split('-')[0]
161
- response = handle_query(message) # Process the message
162
- response1 = response
163
  try:
164
- translator = GoogleTranslator(source='en', target=language_code) # Translate to Tamil
165
- response1 = translator.translate(response)
166
- #response1 = translator.translate(response, dest=language_code).text
167
  print(response1)
168
  except Exception as e:
169
  # Handle translation errors
170
  print(f"Translation error: {e}")
171
- translated_response = "Sorry, I couldn't translate the response."
172
  print(f"Selected Language: {language}")
173
  message_data = {
174
  "sender": "User",
175
  "message": message,
176
- "response": response,
177
  "timestamp": datetime.datetime.now().isoformat()
178
  }
179
  chat_history.append(message_data)
@@ -181,5 +102,4 @@ async def chat(request: MessageRequest):
181
 
182
  @app.get("/")
183
  def read_root(request: Request):
184
- return templates.TemplateResponse("home.html", {"request": request})
185
-
 
1
+ import urllib
2
+ import warnings
3
+ from pathlib import Path as p
4
+ from pprint import pprint
5
  import os
6
  import time
7
  from fastapi import FastAPI, Request
8
  from fastapi.responses import HTMLResponse
9
  from fastapi.staticfiles import StaticFiles
10
+ import pandas as pd
11
+ from langchain import PromptTemplate
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.document_loaders import PyPDFLoader
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain.vectorstores import Chroma
16
+ from langchain.chains import RetrievalQA
17
+ import os
18
  from fastapi.responses import JSONResponse
19
+ from pydantic import BaseModel # Add this import
20
+ # restart python kernal if issues with langchain import.
21
+ from langchain_google_genai import ChatGoogleGenerativeAI
22
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
23
  import uuid # for generating unique IDs
24
  import datetime
25
  from fastapi.middleware.cors import CORSMiddleware
26
  from fastapi.templating import Jinja2Templates
 
27
  import json
 
28
  from deep_translator import GoogleTranslator
29
+ import re
30
+ GOOGLE_API_KEY= os.getenv("HF_TOKEN")
31
+ warnings.filterwarnings("ignore")
32
+ model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY,
33
+ temperature=0.2,convert_system_message_to_human=True)
34
+ pdf_loader = PyPDFLoader("/content/data_redf.pdf")
35
+ pages = pdf_loader.load_and_split()
36
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
37
+ context = "\n\n".join(str(p.page_content) for p in pages)
38
+ texts = text_splitter.split_text(context)
39
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)
40
+ vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":10})
41
+ qa_chain = RetrievalQA.from_chain_type(
42
+ model,
43
+ retriever=vector_index,
44
+ return_source_documents=True
45
 
46
+ )
47
 
48
+ class MessageRequest(BaseModel): # Define the MessageRequest model
 
 
49
  message: str
50
  language: str
51
 
 
 
 
 
 
 
 
 
52
  app = FastAPI()
53
 
54
  @app.middleware("http")
 
57
  response.headers["Content-Security-Policy"] = "frame-ancestors *; frame-src *; object-src *;"
58
  response.headers["X-Frame-Options"] = "ALLOWALL"
59
  return response
 
 
60
  app.add_middleware(
61
  CORSMiddleware,
62
  allow_origins=["*"],
 
72
  app.mount("/static", StaticFiles(directory="static"), name="static")
73
 
74
  templates = Jinja2Templates(directory="static")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  @app.get("/ch/{id}", response_class=HTMLResponse)
76
  async def load_chat(request: Request, id: str):
77
  return templates.TemplateResponse("index.html", {"request": request, "user_id": id})
 
 
 
 
 
 
78
  @app.post("/chat/")
79
  async def chat(request: MessageRequest):
80
  message = request.message # Access the message from the request body
81
  language = request.language
82
  language_code = request.language.split('-')[0]
83
+ response = qa_chain({"query": message})
84
+ response1 = response['result'] # Correctly access the response result
85
  try:
86
+ translator = GoogleTranslator(source='en', target=language_code) # Translate to target language
87
+ response1 = translator.translate(response1)
 
88
  print(response1)
89
  except Exception as e:
90
  # Handle translation errors
91
  print(f"Translation error: {e}")
92
+ response1 = "Sorry, I couldn't translate the response."
93
  print(f"Selected Language: {language}")
94
  message_data = {
95
  "sender": "User",
96
  "message": message,
97
+ "response": response1,
98
  "timestamp": datetime.datetime.now().isoformat()
99
  }
100
  chat_history.append(message_data)
 
102
 
103
  @app.get("/")
104
  def read_root(request: Request):
105
+ return templates.TemplateResponse("home.html", {"request": request})