|
import streamlit as st |
|
import PyPDF2 |
|
import openai |
|
import faiss |
|
import os |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def get_embeddings(text, model="text-embedding-ada-002"): |
|
response = openai.Embedding.create(input=[text], model=model) |
|
return response['data'][0]['embedding'] |
|
|
|
|
|
def search_similar(query_embedding, index, stored_texts, top_k=3): |
|
distances, indices = index.search([query_embedding], top_k) |
|
results = [(stored_texts[i], distances[0][idx]) for idx, i in enumerate(indices[0])] |
|
return results |
|
|
|
|
|
st.title("Course Query Assistant") |
|
|
|
|
|
openai_api_key = st.text_input("Enter your OpenAI API key:", type="password") |
|
|
|
if openai_api_key: |
|
openai.api_key = openai_api_key |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload Course Materials (PDFs)", type=["pdf"], accept_multiple_files=True) |
|
|
|
if uploaded_files: |
|
st.write("Processing uploaded course materials...") |
|
|
|
|
|
course_texts = [] |
|
for uploaded_file in uploaded_files: |
|
text = extract_text_from_pdf(uploaded_file) |
|
course_texts.append(text) |
|
|
|
|
|
combined_text = " ".join(course_texts) |
|
|
|
|
|
chunks = [combined_text[i:i+1000] for i in range(0, len(combined_text), 1000)] |
|
|
|
|
|
embeddings = [get_embeddings(chunk) for chunk in chunks] |
|
|
|
|
|
index = faiss.IndexFlatL2(len(embeddings[0])) |
|
index.add(embeddings) |
|
|
|
st.write("Course materials have been processed and indexed.") |
|
|
|
|
|
query = st.text_input("Enter your question about the course materials:") |
|
|
|
if query: |
|
|
|
query_embedding = get_embeddings(query) |
|
|
|
|
|
results = search_similar(query_embedding, index, chunks) |
|
|
|
|
|
context = "\n".join([result[0] for result in results]) |
|
modified_prompt = f"Context: {context}\n\nQuestion: {query}\n\nProvide a detailed answer based on the context." |
|
|
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": modified_prompt}] |
|
) |
|
|
|
|
|
st.write("### Intelligent Reply:") |
|
st.write(response['choices'][0]['message']['content']) |
|
|