gemma-3-chat-api-2

Sleeping

File size: 15,064 Bytes

04b809a
 
21753a3
d2c6ac6
 
 
4431829
d2c6ac6
9b6cc92
21753a3
9b6cc92
21753a3
36eb467
4425896
b14a2f9
9b6cc92
b14a2f9
9b6cc92
be3336b
9b6cc92
b14a2f9
 
9b6cc92
 
671919f
f2a2276
671919f
 
 
 
 
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b809a
9b6cc92
 
 
04b809a
 
 
47994b5
04b809a
9b6cc92
 
 
 
 
d2c6ac6
9b6cc92
d2c6ac6
9b6cc92
a0d55b9
9b6cc92
 
 
 
a0d55b9
9b6cc92
 
a0d55b9
9b6cc92
21753a3
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d35ce4
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d35ce4
 
9b6cc92
 
 
 
2d35ce4
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2c6ac6
9b6cc92
 
 
 
21753a3
9b6cc92
 
 
 
 
21753a3
 
 
9b6cc92
21753a3
 
 
 
94f2884
9b6cc92
 
 
 
 
21753a3
9b6cc92
 
11f7ceb
9b6cc92
 
5e58171
9b6cc92
 
 
 
 
 
 
acda467
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acda467
9b6cc92
 
 
 
 
04b809a
9b6cc92
 
 
 
 
 
04b809a
9b6cc92
 
 
 
acda467
9b6cc92
 
 
 
04b809a
9b6cc92
11f7ceb
21753a3
9b6cc92
21753a3
 
 
 
 
9b6cc92
 
 
b15d87a
9b6cc92
b15d87a
21753a3
b15d87a
9b6cc92
 
 
 
 
b15d87a
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
b15d87a
9b6cc92
 
 
 
 
d2c6ac6
21753a3
 
9b6cc92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb8d4f3
21753a3
9b6cc92

from openai import OpenAI
from os import getenv
from flask import Flask, request, jsonify, render_template
import fitz  # PyMuPDF for PDF text extraction
import faiss  # FAISS for vector search
import numpy as np
import os
from sentence_transformers import SentenceTransformer
# from huggingface_hub import InferenceClient # Not used in the current code, removed for clarity
from typing import List, Tuple
from io import BytesIO # Added for BytesIO

app = Flask(__name__, template_folder=os.getcwd())
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# --- Configuration ---
class ChatConfig:
    MODEL = "google/gemma-3-27b-it:free"
    DEFAULT_MAX_TOKENS = 300
    DEFAULT_TEMP = 0.5 # Slightly increased for more natural variance
    DEFAULT_TOP_P = 0.95

    # --- NEW: System Prompt Options ---
    # Option 1: Friendly & Helpful Assistant
    SYSTEM_PROMPT_FRIENDLY = """ඔබ ලූනර් ඒ.අයි., ලූනර් ලැබ්ස් විසින් නිර්මාණය කරන ලද ඉමහත් බුද්ධිමත් සහ මිත්‍රශීලී සිංහල චැට්බොට්වරයායි. ඔබගේ ප්‍රතිචාර සෑම විටම සිංහල අක්ෂර පමණක් භාවිතයෙන් ලිය යුතුය. ඉංග්‍රීසි, සිංහල-ඉංග්‍රීසි මිශ්‍ර (සිංලිෂ්), හෝ වෙනත් කිසිදු භාෂාවකින් හෝ ආකෘතියකින් ප්‍රතිචාර දැක්වීම ඔබට අවසර නැත. ඔබට ලබා දුන් PDF ලේඛනයේ ඇති අන්තර්ගතයට පමණක් ඔබගේ දැනුම සීමා වී ඇත. පරිශීලකයන්ගේ ප්‍රශ්නවලට පිළිතුරු දීමේදී, PDF ලේඛනයේ ඇති තොරතුරු මත පමණක් රඳා සිටින්න. ප්‍රශ්නය PDF හි නොමැති ද්‍රව්‍යයක් ගැන සඳහන් කරන්නේ නම්, අවශ්‍ය විස්තර නොමැති බව සුපහසුවෙන් සඳහන් කරන්න. ඔබේ ස්වරය පැහැදිලි, සංක්ෂිප්ත සහ වෘත්තීයමය විය යුතු අතර, උණුසුම් හා ආකර්ශනීය ආකාරයක් පවත්වා ගත යුතුය. සෑම පිළිතුරක්ම නිසි සිංහල අක්ෂර වලින් සම්පූර්ණයෙන්ම ප්‍රකාශ කරනු ඇති බවට සහතික කරන්න. ඔබගේ අන්තර්ක්‍රියාවට සුභ පැතුම් සහිත සිංහල හඳුන්වා දීමකින් ආරම්භ කරන්න.

**Language Detection & Response Rules:**

*   **If the user input is primarily in English:** Respond *exclusively* in English.
*   **If the user input is primarily in Sinhala or Singlish:** Respond *exclusively* in Sinhala (using only Sinhala script).
*   **Prioritize Sinhala:** If there's ambiguity, default to responding in Sinhala.
"""

    # Option 2: Knowledgeable Expert (More Formal)
    SYSTEM_PROMPT_EXPERT = """You are a knowledgeable AI expert specializing in the content of the uploaded PDF document.
    You must answer user questions with precision, drawing *exclusively* from the provided context segments.
    Maintain a professional and informative tone.
    If the provided context does not contain the necessary information to answer the question, explicitly state that the information is not found within the scope of the provided text.
    Do not speculate, infer beyond the text, or utilize any external information sources.
    Clearly attribute your answers to the document, for instance, by starting with "The document indicates that..." or "Based on the provided context...".
    Provide comprehensive answers derived solely from the text.
    """

    # --- Select the desired prompt ---
    SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_FRIENDLY # Choose which personality to use

# --- API Client & Embedding Setup ---
OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
if not OPENROUTER_API_KEY:
    raise ValueError("OPENROUTER_API_KEY environment variable not set.")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

# Use a temporary cache directory if needed, or configure appropriately
embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache"))
vector_dim = 384
index = faiss.IndexFlatL2(vector_dim)
documents = [] # Store original text chunks corresponding to index entries

# --- Core Functions ---

def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
    """Extracts text from PDF stream"""
    # Ensure the stream is BytesIO
    if not isinstance(pdf_stream, BytesIO):
         pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream

    doc = fitz.open(stream=pdf_stream, filetype="pdf")
    # Simple chunking by page - consider more advanced chunking (by paragraph, sentence, fixed size) for better RAG performance
    text_chunks = [page.get_text("text").strip() for page in doc if page.get_text("text").strip()]
    doc.close()
    print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
    return text_chunks

# Renamed for clarity, added error handling
def build_vector_index(text_chunks: List[str]):
    """Embeds text chunks and builds the FAISS index."""
    global documents, index, vector_dim

    if not text_chunks:
        print("Warning: No text chunks provided to build the vector index.")
        documents = []
        index = faiss.IndexFlatL2(vector_dim) # Reinitialize empty index
        return

    print(f"Building vector index for {len(text_chunks)} chunks...")
    documents = text_chunks # Store the original text

    # Reset the index
    index = faiss.IndexFlatL2(vector_dim)

    try:
        embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
        embeddings = np.array(embeddings, dtype=np.float32)

        if embeddings.ndim == 1:
            embeddings = embeddings.reshape(1, -1)

        if embeddings.shape[1] != vector_dim:
             raise ValueError(f"Embedding dimension mismatch: expected {vector_dim}, got {embeddings.shape[1]}")

        index.add(embeddings)
        print(f"FAISS index built successfully with {index.ntotal} vectors.")

    except Exception as e:
        print(f"Error during embedding or indexing: {e}")
        # Reset state in case of error
        documents = []
        index = faiss.IndexFlatL2(vector_dim)
        raise # Re-raise the exception to signal failure


# Renamed for clarity, added checks
def search_relevant_chunks(query: str, k: int = 3) -> str:
    """Finds the most relevant text chunks for the given query using FAISS."""
    global index, documents

    if index.ntotal == 0:
        print("Warning: Search attempted on an empty index.")
        return "" # Return empty string if index is not ready

    if not query:
        return ""

    try:
        query_embedding = embed_model.encode([query])
        query_embedding = np.array(query_embedding, dtype=np.float32)

        # Perform the search
        distances, indices = index.search(query_embedding, k=min(k, index.ntotal)) # Ensure k <= index size

        # Filter out potential invalid indices (-1 can sometimes occur if k > ntotal, though min() handles it)
        valid_indices = [idx for idx in indices[0] if idx != -1 and idx < len(documents)]

        if not valid_indices:
             print(f"No relevant chunks found for query: '{query[:50]}...'")
             return ""

        # Retrieve the actual text chunks
        relevant_docs = [documents[i] for i in valid_indices]
        print(f"Retrieved {len(relevant_docs)} relevant chunks.")
        return "\n\n---\n\n".join(relevant_docs) # Join with a clear separator

    except Exception as e:
        print(f"Error during similarity search: {e}")
        return "" # Return empty on error

# --- Improved Generation Function ---
def generate_response(
    message: str,
    history: List[Tuple[str, str]],
    system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT, # Use the chosen system prompt
    max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
    temperature: float = ChatConfig.DEFAULT_TEMP,
    top_p: float = ChatConfig.DEFAULT_TOP_P
) -> str:

    if index.ntotal == 0: # Check if index is built
        return "I need a PDF document to be uploaded and processed first before I can answer questions."

    # 1. Retrieve Context
    context = search_relevant_chunks(message, k=3) # Retrieve top 3 chunks

    # Prepare the prompt messages list
    messages = []

    # 2. Add the System Prompt (Crucial Change)
    messages.append({"role": "system", "content": system_message})

    # 3. Add Conversation History (if any)
    # Ensure alternating user/assistant roles, starting with user
    for user_msg, assistant_msg in history:
        if user_msg: # Add user message if not empty
             messages.append({"role": "user", "content": user_msg})
        if assistant_msg: # Add assistant message if not empty
             messages.append({"role": "assistant", "content": assistant_msg})

    # 4. Construct the Final User Prompt with Context
    # We include context here, clearly marked.
    # The system prompt already told the AI *how* to use this context.
    if context:
        user_prompt_content = f"Based on the following context from the document, please answer the question:\n\nCONTEXT:\n{context}\n\n---\n\nQUESTION:\n{message}"
    else:
        # If no context found, still ask the question but the system prompt guides the "I don't know" response.
        # Alternatively, you could return a hardcoded message here *before* calling the LLM if desired.
        # Forcing the LLM to respond based on the prompt is generally better for natural language.
        user_prompt_content = f"Regarding the document, I have the following question, although I couldn't retrieve specific context for it:\n\nQUESTION:\n{message}"
        # Or, more simply:
        # user_prompt_content = f"QUESTION: {message}\n\n(Note: No specific context sections were retrieved for this question based on similarity search.)"

    messages.append({"role": "user", "content": user_prompt_content})

    # 5. Call the LLM API
    try:
        print(f"--- Sending to {ChatConfig.MODEL} ---")
        # print("System Prompt:", system_message) # Optional: Debug logging
        # print("History:", history)              # Optional: Debug logging
        # print("User Prompt:", user_prompt_content) # Optional: Debug logging

        completion = client.chat.completions.create(
            model=ChatConfig.MODEL,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            # Consider adding stop sequences if needed, e.g., stop=["\nUSER:", "\nASSISTANT:"]
        )
        response = completion.choices[0].message.content
        print(f"--- Received Response ({len(response)} chars) ---")
        return response.strip()

    except Exception as e:
        print(f"Error generating response from LLM: {str(e)}")
        # Provide a more user-friendly error message
        return "I'm sorry, but I encountered an issue while trying to process your request. Please check the connection or try again later."


# --- Flask Routes (Mostly Unchanged, added checks) ---

@app.route('/')
def index_route(): # Renamed to avoid conflict with faiss.Index object
    """Serve the HTML page for the user interface"""
    return render_template('index.html')

@app.route('/upload_pdf', methods=['POST'])
def upload_pdf():
    """Handle PDF upload, extract text, and build vector index."""
    global documents, index # Ensure we are modifying the global state

    if 'pdf' not in request.files:
        return jsonify({"error": "No PDF file part in the request."}), 400

    file = request.files['pdf']
    if file.filename == "":
        return jsonify({"error": "No file selected."}), 400
    if not file.filename.lower().endswith('.pdf'):
         return jsonify({"error": "Invalid file type. Please upload a PDF."}), 400

    print(f"Received file: {file.filename}")

    try:
        pdf_stream = BytesIO(file.read()) # Read file into memory

        # Extract text
        text_chunks = extract_text_from_pdf(pdf_stream)
        if not text_chunks:
             return jsonify({"error": "Could not extract any text from the PDF."}), 400

        # Build vector database (index)
        build_vector_index(text_chunks) # This function now handles index creation

        return jsonify({"message": f"PDF '{file.filename}' processed successfully. {len(documents)} chunks indexed."}), 200

    except fitz.fitz.FileDataError:
         return jsonify({"error": "Invalid or corrupted PDF file."}), 400
    except Exception as e:
        print(f"Error processing PDF upload: {str(e)}")
        # Reset state on error
        documents = []
        index = faiss.IndexFlatL2(vector_dim)
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500

@app.route('/ask_question', methods=['POST'])
def ask_question():
    """Handle user question, retrieve context, and generate response."""
    data = request.get_json()
    if not data or 'message' not in data:
        return jsonify({"error": "Missing 'message' in request body"}), 400

    message = data['message'].strip()
    history = data.get('history', []) # Get history, default to empty list

    if not message:
         return jsonify({"response": "Please enter a question."}) # Basic validation

    # Ensure history format is correct (list of tuples/lists)
    validated_history = []
    if isinstance(history, list):
        for item in history:
            if isinstance(item, (list, tuple)) and len(item) == 2:
                validated_history.append((str(item[0]), str(item[1])))
            # else: log potential format error?

    try:
        response = generate_response(message, validated_history)
        return jsonify({"response": response})
    except Exception as e:
         # Catch potential errors during generation (though generate_response has its own try-except)
         print(f"Error in /ask_question endpoint: {e}")
         return jsonify({"response": "Sorry, an error occurred while generating the response."}), 500


if __name__ == '__main__':
    # Make sure OPENROUTER_API_KEY is checked before starting the app
    if not OPENROUTER_API_KEY:
         print("ERROR: OPENROUTER_API_KEY environment variable is not set. Exiting.")
    else:
         # Consider host='0.0.0.0' to make it accessible on your network
         app.run(debug=True, host='127.0.0.1', port=5000)