Spaces:

vaseacc
/

Vase

Build error

App Files Files Community

vaseacc commited on Jul 18

Commit

72a9a55

verified ·

1 Parent(s): 6a152c1

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -53

app.py CHANGED Viewed

@@ -1,68 +1,94 @@
 import gradio as gr
-from transformers import pipeline
-# --- 1. LOAD OUR AI MODELS ---
-# We load the models once when the app starts.
-# This is more efficient than loading them for every request.
-question_answerer = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
-# --- 2. DEFINE THE CORE FUNCTIONS ---
-# These functions will take user input and use the appropriate model.
-def answer_question(context, question):
-    """Uses the QA model to find an answer within a given text."""
-    if not context or not question:
-        return "(Please provide both context and a question.)"
-    result = question_answerer(question=question, context=context)
-    return result['answer']
-def summarize_text(text):
-    """Uses the Summarization model to shorten a piece of text."""
-    if not text:
-        return "(Please provide text to summarize.)"
-    # We add some parameters for better, shorter summaries on a CPU
-    summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
-    return summary[0]['summary_text']
-# --- 3. BUILD THE GRADIO INTERFACE ---
-# This is where we design the web app's layout and connect it to our functions.
-with gr.Blocks() as demo:
-    gr.Markdown("# Ultimate AI Assistant (CPU Edition) 🤖")
-    gr.Markdown("An experiment in combining multiple small, efficient AI models into one application.")
-    with gr.Tabs():
-        # --- First Tab: Question Answering ---
-        with gr.TabItem("❓ Ask a Question"):
-            gr.Markdown("Give the AI some text (context) and ask a question about it.")
-            with gr.Row():
-                qa_context_input = gr.Textbox(lines=7, label="Context", placeholder="Paste a paragraph or article here...")
-                qa_question_input = gr.Textbox(label="Question", placeholder="What do you want to know?")
-            qa_button = gr.Button("Get Answer")
-            qa_output = gr.Textbox(label="Answer")
-        # --- Second Tab: Summarization ---
-        with gr.TabItem("📚 Summarize Text"):
-            gr.Markdown("Paste in a long piece of text and the AI will create a short summary.")
-            summarize_input = gr.Textbox(lines=10, label="Text to Summarize", placeholder="Paste a long article or text here...")
-            summarize_button = gr.Button("Summarize")
-            summarize_output = gr.Textbox(label="Summary")
-    # --- 4. Connect Buttons to Functions ---
-    qa_button.click(
-        fn=answer_question,
-        inputs=[qa_context_input, qa_question_input],
-        outputs=qa_output
     )
-    summarize_button.click(
-        fn=summarize_text,
-        inputs=summarize_input,
-        outputs=summarize_output
-    )
-# --- 5. LAUNCH THE APP! ---
-demo.launch()

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# --- 1. MODEL LOADING ---
+# We still load the quantized GGUF model, which is perfect for CPU.
+# We will focus on Llama-3 as it's best for a general-purpose assistant.
+model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
+model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"
+try:
+    model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file)
+except Exception as e:
+    raise RuntimeError(f"Failed to download the model. Error: {e}")
+# Load the model with llama-cpp-python
+# n_ctx is the context window size; 2048 is a safe bet for CPU Spaces.
+# n_gpu_layers=0 ensures it runs entirely on the CPU.
+try:
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=4, # Set to a reasonable number of threads for the CPU
+        n_gpu_layers=0,
+        verbose=False
+    )
+except Exception as e:
+    raise RuntimeError(f"Failed to load the GGUF model. Error: {e}")
+# --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) ---
+# This is the most critical part. We tell the AI how to behave.
+# This prompt guides it to be helpful, analytical, and honest about its limitations.
+SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer.
+Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased.
+When a user asks a question, follow these steps:
+1.  **Analyze the Intent:** First, understand the user's *true* goal. If they ask "cost to build building?", they don't want you to invent a number. They need a *checklist* of cost categories to research. If their question is vague, identify what they are likely trying to accomplish.
+2.  **Provide a Direct Answer:** If you can directly answer, do so clearly and concisely.
+3.  **Elaborate and Add Value:** After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for.
+4.  **Acknowledge Limitations:** You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts.
+5.  **Maintain a Friendly, Encouraging Tone:** Be a partner in the user's creative or analytical process.
+"""
+# --- 3. THE GRADIO CHAT INTERFACE ---
+def predict(message, history):
+    """
+    This function is called by the Gradio ChatInterface for each new message.
+    'message' is the new user input.
+    'history' is the entire conversation history as a list of lists.
+    """
+    # Format the conversation history for the model
+    # The history format is [['user_message', 'assistant_response'], ...]
+    chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for user_msg, assistant_msg in history:
+        chat_history_formatted.append({"role": "user", "content": user_msg})
+        chat_history_formatted.append({"role": "assistant", "content": assistant_msg})
+    # Add the latest user message
+    chat_history_formatted.append({"role": "user", "content": message})
+    # Use the model to generate a response stream
+    # stream=True allows the text to appear token-by-token for a better UX
+    generator = llm.create_chat_completion(
+        messages=chat_history_formatted,
+        max_tokens=1024,
+        temperature=0.7,
+        stream=True
     )
+    # Yield partial responses to create the streaming effect
+    partial_message = ""
+    for chunk in generator:
+        delta = chunk['choices'][0]['delta']
+        if 'content' in delta:
+            partial_message += delta['content']
+            yield partial_message
+# We use gr.ChatInterface, which creates a complete chat UI for us.
+# It manages history, input boxes, and message display automatically.
+gr.ChatInterface(
+    fn=predict,
+    title="🤖 NexusAI Assistant",
+    description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!",
+    examples=[
+        ["How do I learn to code?"],
+        ["Explain the concept of 'supply and demand' like I'm five."],
+        ["I want to build a PC, where do I start?"],
+        ["I am building a building, how much would it cost me"] # The "bad" prompt from before!
+    ],
+    theme="soft"
+).launch()