|
import streamlit as st |
|
import PyPDF2 |
|
import openai |
|
import faiss |
|
import os |
|
import numpy as np |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from reportlab.lib.pagesizes import letter |
|
from reportlab.pdfgen import canvas |
|
from io import BytesIO |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def get_embeddings(text, model="text-embedding-ada-002"): |
|
response = openai.Embedding.create(input=[text], model=model) |
|
return response['data'][0]['embedding'] |
|
|
|
|
|
def search_similar(query_embedding, index, stored_texts, top_k=3): |
|
distances, indices = index.search(np.array([query_embedding]), top_k) |
|
results = [(stored_texts[i], distances[0][idx]) for idx, i in enumerate(indices[0])] |
|
return results |
|
|
|
|
|
def create_pdf(response_text): |
|
buffer = BytesIO() |
|
c = canvas.Canvas(buffer, pagesize=letter) |
|
width, height = letter |
|
|
|
|
|
c.drawString(30, height - 30, "Intelligent Reply:") |
|
text_object = c.beginText(30, height - 50) |
|
text_object.setFont("Helvetica", 10) |
|
text_object.setTextOrigin(30, height - 50) |
|
|
|
|
|
lines = response_text.split("\n") |
|
for line in lines: |
|
text_object.textLine(line) |
|
|
|
c.drawText(text_object) |
|
c.showPage() |
|
c.save() |
|
|
|
buffer.seek(0) |
|
return buffer |
|
|
|
|
|
st.title("Course Query Assistant") |
|
|
|
|
|
openai_api_key = st.text_input("Enter your OpenAI API key:", type="password") |
|
|
|
if openai_api_key: |
|
openai.api_key = openai_api_key |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload Course Materials (PDFs)", type=["pdf"], accept_multiple_files=True) |
|
|
|
if uploaded_files: |
|
st.write("Processing uploaded course materials...") |
|
|
|
|
|
course_texts = [] |
|
for uploaded_file in uploaded_files: |
|
text = extract_text_from_pdf(uploaded_file) |
|
course_texts.append(text) |
|
|
|
|
|
combined_text = " ".join(course_texts) |
|
|
|
|
|
chunks = [combined_text[i:i+1000] for i in range(0, len(combined_text), 1000)] |
|
|
|
|
|
embeddings = [get_embeddings(chunk) for chunk in chunks] |
|
|
|
|
|
embeddings_np = np.array(embeddings).astype("float32") |
|
|
|
|
|
index = faiss.IndexFlatL2(len(embeddings_np[0])) |
|
index.add(embeddings_np) |
|
|
|
st.write("Course materials have been processed and indexed.") |
|
|
|
|
|
query = st.text_input("Enter your question about the course materials:") |
|
|
|
if query: |
|
|
|
query_embedding = get_embeddings(query) |
|
|
|
|
|
results = search_similar(query_embedding, index, chunks) |
|
|
|
|
|
context = "\n".join([result[0] for result in results]) |
|
modified_prompt = f"Context: {context}\n\nQuestion: {query}\n\nProvide a detailed answer based on the context." |
|
|
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": modified_prompt}] |
|
) |
|
|
|
|
|
response_text = response['choices'][0]['message']['content'] |
|
|
|
|
|
st.write("### Intelligent Reply:") |
|
st.write(response_text) |
|
|
|
|
|
pdf_buffer = create_pdf(response_text) |
|
st.download_button( |
|
label="Download Intelligent Reply as PDF", |
|
data=pdf_buffer, |
|
file_name="intelligent_reply.pdf", |
|
mime="application/pdf" |
|
) |
|
|