File size: 2,892 Bytes
2953169 36b275d 2953169 36b275d 2953169 36b275d 2953169 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
import openai
import gradio as gr
import faiss
import os
# Load the CSV file and FAISS index
df = pd.read_csv('extracted_texts_df.csv')
faiss_index_path = "faiss_index.index"
index = faiss.read_index(faiss_index_path)
embedder = SentenceTransformer('all-mpnet-base-v2')
# Function to get content from indices
def get_content_from_indices(indices):
contents = []
for idx in indices[0]:
if idx < len(df):
contents.append(df.iloc[idx]['pdf_content'])
else:
contents.append("Content not found.")
return "\n\n".join(contents)
# Search function using FAISS and embeddings
def search(query_text, top_k=1):
# Embed the query
query_embedding = embedder.encode(query_text, convert_to_tensor=True)
query_embedding = query_embedding.cpu().numpy()
# Normalize the query embedding
query_embedding_normalized = query_embedding / np.linalg.norm(query_embedding)
# Reshape to a 2D array for FAISS
query_embedding_normalized = query_embedding_normalized.reshape(1, -1)
# Perform the search
distances, indices = index.search(query_embedding_normalized, top_k)
# Get the content based on indices
content = get_content_from_indices(indices)
return content
# Retrieve the API key from Hugging Face Secrets
api_key = os.getenv("OPENAI_API_KEY")
# Generate the answer using OpenAI API
def generate_answer(query):
prompt = f"""
Answer the following query based on the provided content from pharmaceutical documents.
Provide a detailed and accurate response in readable formatو make the user read comfortably, get a very summarized answer at the end for who want take the answer in short time, and if you do not know the answer say ('I don't have any idea) do not write anything more.'
Query:
"{query}"
Context:
"{search(query)}"
"""
messages = [
{"role": "system", "content": "You are a pharmacy assistant providing detailed answers based on document content."},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
max_tokens=1500,
n=1,
stop=None,
temperature=0.2,
messages=messages
)
generated_text = response.choices[0].message['content'].strip()
return generated_text
# Gradio interface
def pharmacy_assistant(query):
response = generate_answer(query)
return response
interface = gr.Interface(
fn=pharmacy_assistant,
inputs=gr.Textbox(lines=2, placeholder="Ask your pharmacy-related question here..."),
outputs="text",
title="Assistant",
description="Ask questions about pharmaceutical products, and get detailed answers based on document content."
)
# Try launching without debug mode
interface.launch() |