testbot_v4

Running

App Files Files Community

testbot_v4 / app.py

soyleyicicem

Update app.py

48129e9 verified 3 months ago

raw

history blame

20.9 kB

	import uuid
	from embedding_loader import *
	from initialize_db import QdrantClientInitializer
	from pdf_loader import PDFLoader
	from IPython.display import display, Markdown
	import gradio as gr
	from langchain_core.messages import HumanMessage, AIMessage
	from langchain.memory import ConversationBufferMemory
	from langchain_core.chat_history import InMemoryChatMessageHistory
	from qdrant_client import QdrantClient, models
	from db_operations import DatabaseOperations
	from openai import AzureOpenAI
	import json
	from qdrant_client.http import models as rest
	import time
	from fastembed.sparse.bm25 import Bm25
	from fastembed.late_interaction import LateInteractionTextEmbedding

	dense_embedding_model = import_embedding()

	late_interaction_embedding_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")
	bm25_embedding_model = Bm25("Qdrant/bm25", language="turkish")

	AZURE_OPENAI_KEY = os.getenv('azure_api')
	os.environ['AZURE_OPENAI_KEY'] = AZURE_OPENAI_KEY
	openai.api_version = "2024-08-01-preview" # change it with your own version
	openai.azure_endpoint = os.getenv('azure_endpoint')

	model = "gpt-4o-mini" # deployment name on Azure OPENAI Studio
	client = AzureOpenAI(azure_endpoint = openai.azure_endpoint,
	api_key=AZURE_OPENAI_KEY,
	api_version=openai.api_version)

	obj_qdrant = QdrantClientInitializer()
	qclient = obj_qdrant.initialize_db()
	obj_loader = PDFLoader()

	# -----
	def retriever_db(client, query, collection_name, CAR_ID):

	dense_query_vector = list(dense_embedding_model.embed_documents([query]))[0]
	sparse_query_vector = list(bm25_embedding_model.query_embed(query))[0]
	late_query_vector = list(late_interaction_embedding_model.query_embed(query))[0].tolist()

	prefetch = [
	models.Prefetch(
	query=dense_query_vector,
	using="sfr-mistral",
	limit=30,
	),
	models.Prefetch(
	query=models.SparseVector(**sparse_query_vector.as_object()),
	using="bm25",
	limit=30,
	),
	models.Prefetch(
	query=late_query_vector,
	using="colbertv2.0",
	limit=30,
	),
	]

	results = client.query_points(
	collection_name,
	prefetch=prefetch,
	query=models.FusionQuery(
	fusion=models.Fusion.RRF,
	),
	with_payload=True,
	filter=models.Filter(
	must=[
	models.FieldCondition(key="car_id", match=models.MatchValue(value=CAR_ID))
	])

	limit=10,
	)
	retrieved_chunks = [doc.payload for doc in results.points]

	return retrieved_chunks

	## new version
	def chat_gpt(prompt=None, history=[], model=model, client=client, tools=[None]):

	if prompt is None:
	messages = history
	else:
	history.append({"role": "user", "content": f"{prompt}"})
	messages = history

	completion = client.chat.completions.create(
	model=model,
	messages=messages,
	tools=tools,
	tool_choice="required",
	temperature=0.0
	)
	return completion

	retrieval_functions = [
	{
	"type": "function",
	"function":{
	"name": "get_section_titles",
	"description": """Use this function to get the section, subsection and subsusbsection titles from a user manual table of content.""",
	"parameters": {
	"type": "object",
	"properties": {
	"section_title": {
	"type": "string",
	"description": "Title of the section in a table of content",
	},
	"sub_section_title": {
	"type": "string",
	"description": "Title of the subsection in a table of content",
	},
	"sub_sub_section_title": {
	"type": "string",
	"description": "Title of the subsubsection in a table of content",
	}
	},
	"required": ["section_title", "sub_section_title", "sub_sub_section_title"],
	}
	}
	}
	]

	def get_section_content(section_title, sub_section_title, sub_sub_section_title, content_path):

	with open(content_path, "r") as file:
	doc_section_content = json.loads(file.read())

	response = None
	try:
	response = doc_section_content["TableOfContents"][section_title][sub_section_title][sub_sub_section_title]["content"]
	pages = doc_section_content["TableOfContents"][section_title][sub_section_title][sub_sub_section_title]["pages"]

	response = {"metadata":{"pages": pages}, "page_content": response}
	except:
	pass

	return response

	def get_lead_result(question):
	hizmet_listesi = {"Bakım": """Check-Up, Periyodik Bakım, Aks Değişimi, Amortisör Değişimi, Amortisör Takozu Değişimi, Baskı Balata Değişimi, Benzin Filtresi Değişimi,
	Debriyaj Balatası Değişimi, Direksiyon Kutusu Değişimi, Dizel Araç Bakımı, Egzoz Muayenesi, Fren Kaliperi Değişimi, El Freni Teli Değişimi,
	Fren Balatası Değişimi, Fren Disk Değişimi, Hava Filtresi Değişimi, Helezon Yay Değişimi, Kampana Fren Balatası Değişimi,
	Kızdırma Bujisi Değişimi, Rot Başı Değişimi, Rot Kolu Değişimi, Rotil Değişimi, Silecek Değişimi, Süspansiyon, Triger Kayışı Değişimi,
	Triger Zinciri Değişimi, V Kayışı Değişimi, Yağ Filtresi Değişimi, Yakıt Filtresi Değişimi, Havayastığı Değişimi""",
	"Yağ ve Sıvılar": """Şanzıman Yağı Değişimi, Dizel Araçlarda Yağ Değişimi, Yağ Değişimi, Fren Hidrolik Değişimi, Antifriz Değişimi,""",
	"Akü": """Akü Şarj Etme, Akü Değişimi""",
	"Klima": """Oto Klima Kompresörü Tamiri, Oto Klima Tamiri, Araç Klima Temizliği, Araç Klima Bakteri Temizliği, Klima Gazı Dolumu, Klima Dezenfeksiyonu, Polen Filtresi Değişimi""",
	"Elektrik": """Servis Uyarı Lambası Sıfırlama,Buji Kablosu Değişimi, Arıza Tespit, Göstergelerin Kontrolü, Far Ayarı ve Ampul Değişimi, Buji Değişimi, Sigorta Değişimi""",
	"Lastik/ Jant": """Lastik Jant Satış, Lastik Değişimi, Balans Ayarı, Rot Ayarı, Rotasyon, Lastik Tamiri, Hava Kontrolü, Nitrojen Dolumu, Supap Değişimi, Lastik Saklama (Lastik Oteli), Jant Sökme Takma,""",
	"Diğer": """Cam Tamiri""",
	"Hibrit Araçlar": "Hibrit Araç Aküsü"}

	lead_functions = [
	{
	"type": "function",
	"function": {
	"name": "grade_service_relevance",
	"description": "Grade the relevance of services to a user question",
	"parameters": {
	"type": "object",
	"properties": {
	"binary_score": {
	"type": "string",
	"description": "Services are relevant to the question, 'yes' or 'no'",
	"enum": ["yes", "no"]
	}
	},
	"required": ["binary_score"]
	}
	}
	}
	]

	# System message

	system_message = """Soruyu cevaplarken:
	1- Önce soruyu düşün.
	2- Kullanıcının sorduğu soru, hizmet listesinde sunulan hizmetlerle alakalı mı?
	Alakalı ise "yes", değilse "no" olarak cevap ver."""

	def service_grader_relevance(hizmet_listesi: str, question: str) -> dict:
	completion = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": f"Provided services: \n\n {hizmet_listesi} \n\n User question: {question}"}
	],
	tools=lead_functions,
	tool_choice={"type": "function", "function": {"name": "grade_service_relevance"}}
	)

	tool_call = completion.choices[0].message.tool_calls[0]
	return json.loads(tool_call.function.arguments)


	result = service_grader_relevance(hizmet_listesi, question)
	return result['binary_score']

	def chat_gpt_nofn(prompt=None, history=[], model=model, client=client):

	if prompt is None:
	messages = history
	else:
	history.append({"role": "user", "content": f"{prompt}"})
	messages = history

	completion = client.chat.completions.create(
	model=model,
	messages=messages,
	stream=True)

	return completion

	def format_chat_prompt(chat_history):
	prompt = []

	print(chat_history)
	for turn in chat_history:
	user_message, ai_message = turn
	prompt.append({"role": "user", "content": user_message})
	prompt.append({"role": "assistant", "content": ai_message})

	return prompt

	class GradeDocuments(BaseModel):
	"""Binary score for relevance check on retrieved documents."""

	binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")

	def grade_document_with_openai(document: str, question: str) -> GradeDocuments:
	system_message = """
	You are a grader assessing relevance of a retrieved document to a user question.
	Consider the following when making your assessment:
	- Does the document directly or indiretly address the user's question?
	- Does it provide information or context that is pertinent to the question?
	- Does it discuss relevant risks, benefits, recommendations, or considerations related to the question?
	If the document contains keyword(s) or semantic meaning related or partially related to the question, grade it as relevant.
	Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
	"""
	response = client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": f'Retrieved document: \n\n {document} \n\n User question: {question}'}
	]
	)

	score = response.choices[0].message.content
	return GradeDocuments(binary_score=score.strip())

	liked_state = gr.State(None)
	last_interaction = gr.State(None)

	def chat(question, manual, history, liked):
	history = history or []

	conv = format_chat_prompt(history)
	print("History: ", history)
	print("CONV: ", conv)
	manual_list = {"Toyota_Corolla_2024_TR": -8580416610875007536,
	"Renault_Clio_2024_TR":-5514489544983735006,
	"Fiat_Egea_2024_TR":-2026113796962100812}

	collection_list = {"Toyota_Corolla_2024_TR": "HYBRID_TOYOTA_MANUAL_COLLECTION_EMBED3",
	"Renault_Clio_2024_TR": "HYBRID_RENAULT_MANUAL_COLLECTION_EMBED3",
	"Fiat_Egea_2024_TR": "HYBRID_FIAT_MANUAL_COLLECTION_EMBED3"}

	collection_name = collection_list[manual]

	toc_name = "ToC_" + manual + ".txt"

	start_time = time.time()
	with open("ToCs/" + toc_name, "r") as file:
	content = json.loads(file.read())
	print("ToCs:--- %s seconds ---" % (time.time() - start_time))

	# start_time = time.time()
	# db = obj_loader.load_from_database(embeddings=embeddings, collection_name=collection_name)
	# print("DB Load:--- %s seconds ---" % (time.time() - start_time))

	start_time = time.time()

	for i in range(3):
	first_hop = f"""Soruyu cevaplarken:
	1- Önce soruyu düşün.
	2- Kullanıcının sorduğu sorunun konu başlıkları neler olabilir?
	3- Sorulan soru bir arızaya işaret ediyo olabilir mi?
	4- Bu konu başlıkları kullanım kılavuzu içindekiler tablosu başlıkları ile alakalı mı?
	5- Alakalı olabilecek tüm başlıkları türet.
	Buna göre, aşağıda vereceğim kullanım kılavuzu içindekiler tablosu (başlıklar) bilgisini kullanarak bu içeriğe erişmek için uygun fonksiyonları üret.

	Eğer herhangi bir içeriğe ulaşamazsan, bunu belir ve sorunun cevabı hakkında yorum yapma.
	Kullanım Kılavuzu İçindekiler Tablosu:
	{content}
	"""
	# conv = [{"role": "system", "content": f"{first_hop}"}]
	# conv.append({"role": "system", "content": f"{first_hop}"})
	# first_hop_response = chat_gpt(prompt=f"Soru: {question}", history=conv, tools=retrieval_functions)
	# conv.append(first_hop_response.choices[-1].message)

	first_hop_response = chat_gpt(prompt=f"Soru: {question}",
	history=[{"role": "system", "content": f"{first_hop}"}],
	tools=retrieval_functions)

	tool_calls = first_hop_response.choices[-1].message.tool_calls
	arg_list = []

	if tool_calls:
	for tool_call in tool_calls:
	function_name = tool_call.function.name
	args = json.loads(tool_call.function.arguments)
	arg_list.append(args)
	print(f"Will call {function_name} with arguments {args}")
	break
	else:
	print("No tool calls")

	print("First_hop:--- %s seconds ---" % (time.time() - start_time))

	path = "Contents/" + manual + ".json"

	start_time = time.time()
	documents = []
	for i in arg_list:
	documents.append(get_section_content(str(i['section_title']), str(i['sub_section_title']), str(i['sub_sub_section_title']), content_path=path))

	CAR_ID = manual_list[manual]

	documents_db = retriever_db(qclient, query, collection_name, CAR_ID)

	for i in documents_db:
	documents.append(i)

	print("Retriever:--- %s seconds ---" % (time.time() - start_time))

	start_time = time.time()
	graded_doc = []
	for r in documents:

	grade = grade_document_with_openai(r, query)

	if grade.binary_score == "yes":
	graded_doc.append(r)
	print("Grade Doc:--- %s seconds ---" % (time.time() - start_time))


	start_time = time.time()
	if get_lead_result(query).lower() == 'yes':
	lead = """ Her zaman tamirci yerine geçen "Servis", "Yetkili Servis", ""... servisi", ""... yetkili servisi", "bakım" gibi ifadeler yerine yalnızca "OtoBakım Servisi" yazmalısın.
	Cevabın sonunda, "Nasıl devam edeceğinizden emin değilseniz." ifadesiyle başlayarak, kullanıcılara hızlı ve uygun fiyatlı çözümler için OtoBakım mağazalarını ziyaret etmelerini teklif etmelisin."""
	print('Lead: yes')
	else:
	lead = 'Her zaman tamirci yerine geçen "Servis", "Yetkili Servis", ""... servisi", ""... yetkili servisi", "bakım" gibi ifadeler yerine yalnızca "OtoBakım Servisi" yazmalısın. Sorunun tipine göre OtoBakım servislerine yönlendirme yapmalısın.'
	print('Lead: no')
	print("Lead Check:--- %s seconds ---" % (time.time() - start_time))

	start_time = time.time()
	prompt = f"""
	Sen, yalnızca araba ile ilgili sorunlara ve araç bilgilerine odaklanan, ARVI adında uzman bir asistansın.
	Amacın, araba sorunları, bakım, onarımlar, teknik özellikler ve diğer araçla ilgili konularla ilgili sorulara eğer dokümanlarda yeterli bilgi varsa doğru, yardımcı, net ve yorum yapmadan cevaplar vermektir.
	Temel nezaket etkileşimlerine uygun ve kibar bir şekilde yanıt vermek için tasarlandın.

	Soruları yanıtlarken aşağıdaki adımları izle: \n
	- Dokümanlar soruyla ilgiliyse, soruyu yanıtlamak için dokümanlardan yararlan.
	- Sorulara cevap verirken sana sağlanan bilgilerdeki uyarılara, tehlikelere vurgu yap ve öne çıkar.
	- Soruları yanıtlarken yorum yapma, kişisel görüşlerini belirtme ve önceki bilgilerini kullanma.
	- Dokümandakiler dışında terim ve bilgileri kullanma.
	- Eğer dokümanlarda bir işlemin nasıl yapıldığı adım adım anlatılıyorsa, bu adımları direkt şekilde ekle.
	- Dokümanlarda farklı motor modellerine göre bilgi veriliyorsa, aracın motor modelini belirt.
	- Kullanıcıya doğrudan cevap ver.
	- Cevaplar kısa ama anlamlı ve yeterli olsun.
	- Her cevabında kullandığın tüm kaynakları göster.
	- Cevap verirken aşağıdaki kaynak verme kurallarına uy:
	* Sayfa numaralarını küçükten büyüğe sırala.
	* Aynı cümle içinde tekrar eden sayfa numaralarını eleme.
	* Aynı numarayı birden fazla kez yazma.
	* Cevabın sonunda kullanılan tüm kaynakları listele:
	Kaynaklar:
	- Sayfa **: [Doküman adı veya kısa açıklama]
	- Sayfa **: [Doküman adı veya kısa açıklama]

	Ek yönerge: {lead} \n

	Son Kontrol:
	- Cevabın doğruluğunu ve tamlığını kontrol et.
	- Gereksiz bilgi veya yorum olup olmadığını kontrol et.
	- Referansların doğru eklendiğinden emin ol.

	Eğer dokümanlar boş ise: "Üzgünüm, kılavuzda bu konuyla ilgili bilgi bulamadım. Bu soruyu yanıtlayamıyorum."
	Soru çok genel ise, spesifik bilgi iste.

	Dokümanlar: {graded_doc}
	"""

	conv.append({"role": "system", "content": f"{prompt}"})
	final_response = chat_gpt_nofn(prompt=f"Soru: {question}", history=conv)
	# final_response = chat_gpt_nofn(prompt=prompt, history=conv)

	partial_response = ""
	print("Answer:--- %s seconds ---" % (time.time() - start_time))

	for chunk in final_response:
	try:
	if chunk.choices[0].delta.content is not None:
	partial_response += chunk.choices[0].delta.content

	yield partial_response, history + [(question, partial_response)]
	except:
	pass

	response = partial_response
	# conv.append({"role": "user", "content": prompt})
	conv.append({"role": "assistant", "content": response})


	history.append((question, response))
	print("Answer:--- %s seconds ---" % (time.time() - start_time))

	# Store the last interaction without saving to the database yet
	last_interaction.value = {
	"question": question,
	"response": response,
	"manual": manual,
	"point_id": uuid.uuid4().hex
	}

	yield response, history

	def save_last_interaction(feedback):
	if last_interaction.value:
	DatabaseOperations.save_user_history_demo(
	qclient,
	"USER_COLLECTION_EMBED3_v3",
	last_interaction.value["question"],
	last_interaction.value["response"],
	dense_embedding_model,
	last_interaction.value["point_id"],
	last_interaction.value["manual"],
	feedback
	)
	last_interaction.value = None

	manual_list = ["Toyota_Corolla_2024_TR", "Renault_Clio_2024_TR", "Fiat_Egea_2024_TR"]

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot(height=600)
	manual = gr.Dropdown(label="Kullanım Kılavuzları", value="Toyota_Corolla_2024_TR", choices=manual_list)
	textbox = gr.Textbox()
	clear = gr.ClearButton(components=[textbox, chatbot], value='Clear console')

	def handle_like(data: gr.LikeData):
	liked_state.value = data.liked
	if liked_state.value is not None:
	feedback = "LIKE" if liked_state.value else "DISLIKE"
	save_last_interaction(feedback)

	#def gradio_chat(question, manual, history):
	# save_last_interaction("N/A") # Save previous interaction before starting a new one
	# return chat(question, manual, history, liked_state.value)
	def gradio_chat(question, manual, history):
	save_last_interaction("N/A") # Save previous interaction before starting a new one
	chat_generator = chat(question, manual, history, liked_state.value)
	final_response = ""
	final_history = history
	for partial_response, updated_history in chat_generator:
	final_response += partial_response
	final_history = updated_history
	yield "", final_history
	return "", final_history

	textbox.submit(gradio_chat, [textbox, manual, chatbot], [textbox, chatbot])
	chatbot.like(handle_like, None, None)
	demo.queue()
	demo.launch()