File size: 4,581 Bytes
213aef6
 
 
 
2fc54b2
 
213aef6
6121e00
 
213aef6
6121e00
3aa6e0c
c8181a4
ae89ac0
bef4c68
 
 
 
213aef6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6121e00
213aef6
c8181a4
2fc54b2
d0a25e9
c8181a4
 
 
 
 
 
6121e00
 
213aef6
6121e00
213aef6
6121e00
c8181a4
2fc54b2
6121e00
 
c8181a4
6121e00
 
 
e496e5b
6121e00
 
 
213aef6
 
ed053f2
 
213aef6
 
 
 
 
 
 
 
 
 
3aa6e0c
 
 
 
 
 
 
 
 
 
6121e00
3aa6e0c
 
 
 
 
 
 
 
 
6121e00
3aa6e0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_qdrant import QdrantVectorStore
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from operator import itemgetter
from langchain_community.embeddings import HuggingFaceEmbeddings  # Add this line

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("Technocoloredgeek/midterm-finetuned-embedding")

# Set up API keys
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]

# PDF links
pdf_links = [
    "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf",
    "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
]

@st.cache_resource
def load_and_process_pdfs(pdf_links):
    documents = []
    for link in pdf_links:
        loader = PyMuPDFLoader(file_path=link)
        documents.extend(loader.load())

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=40,
        length_function=len,
        is_separator_regex=False
    )

    return text_splitter.split_documents(documents)

@st.cache_resource
def setup_vectorstore():
    LOCATION = ":memory:"
    COLLECTION_NAME = "AI_Ethics_Framework"
    
    qdrant_client = QdrantClient(location=LOCATION)

    # Use your SentenceTransformer model for embeddings
    embeddings = HuggingFaceEmbeddings(model_name="Technocoloredgeek/midterm-finetuned-embedding")
    
    # Get the vector size from the embeddings
    VECTOR_SIZE = len(embeddings.embed_query("test"))

    # Create the collection
    qdrant_client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
    )

    # Create the vector store with the new embeddings
    qdrant_vector_store = QdrantVectorStore(
        client=qdrant_client,
        collection_name=COLLECTION_NAME,
        embedding=embeddings
    )

    # Load and add documents
    documents = load_and_process_pdfs(pdf_links)
    qdrant_vector_store.add_documents(documents)

    return qdrant_vector_store

@st.cache_resource
def create_rag_pipeline(_vector_store):
    retriever = _vector_store.as_retriever()
    
    template = """
    You are an expert AI assistant with deep knowledge of business, technology, and entrepreneurship. Your task is to provide accurate, insightful answers based solely on the given context. Follow these guidelines:
    1. Analyze the question carefully to understand the core information being sought.
    2. Thoroughly examine the provided context, identifying key relevant information.
    3. Formulate a clear, concise answer that directly addresses the question.
    4. Use specific details and examples from the context to support your answer.
    5. If the context doesn't contain sufficient information to fully answer the question, state this clearly and say,'I don't know'.
    6. Do not introduce any information not present in the context.
    7. If asked for an opinion or recommendation, base it strictly on insights from the context.
    8. Use a confident, authoritative tone while maintaining accuracy.
    9. If you cannot provide a clear answer to the question, reply with "I don't know".
    Question:
    {question}
    Context:
    {context}
    Answer:
    """

    prompt = ChatPromptTemplate.from_template(template)
    primary_qa_llm = ChatOpenAI(model_name="gpt-4", temperature=0)

    retrieval_augmented_qa_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
    )

    return retrieval_augmented_qa_chain

# Streamlit UI
st.title("Ask About AI Ethics!")

vector_store = setup_vectorstore()
rag_pipeline = create_rag_pipeline(vector_store)

user_query = st.text_input("Enter your question about AI Ethics:")

if user_query:
    with st.spinner("Generating response..."):
        result = rag_pipeline.invoke({"question": user_query})
    
    st.write("Response:")
    st.write(result["response"].content)

    st.write("Context Used:")
    st.write(result["context"])