Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,68 +1,94 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
|
|
3 |
|
4 |
-
# --- 1.
|
5 |
-
# We load the
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
12 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
def answer_question(context, question):
|
15 |
-
"""Uses the QA model to find an answer within a given text."""
|
16 |
-
if not context or not question:
|
17 |
-
return "(Please provide both context and a question.)"
|
18 |
-
result = question_answerer(question=question, context=context)
|
19 |
-
return result['answer']
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
return "(Please provide text to summarize.)"
|
25 |
-
# We add some parameters for better, shorter summaries on a CPU
|
26 |
-
summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
|
27 |
-
return summary[0]['summary_text']
|
28 |
|
|
|
29 |
|
30 |
-
|
31 |
-
# This is where we design the web app's layout and connect it to our functions.
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
# --- First Tab: Question Answering ---
|
39 |
-
with gr.TabItem("β Ask a Question"):
|
40 |
-
gr.Markdown("Give the AI some text (context) and ask a question about it.")
|
41 |
-
with gr.Row():
|
42 |
-
qa_context_input = gr.Textbox(lines=7, label="Context", placeholder="Paste a paragraph or article here...")
|
43 |
-
qa_question_input = gr.Textbox(label="Question", placeholder="What do you want to know?")
|
44 |
-
qa_button = gr.Button("Get Answer")
|
45 |
-
qa_output = gr.Textbox(label="Answer")
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
59 |
)
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
|
67 |
-
#
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
from llama_cpp import Llama
|
4 |
|
5 |
+
# --- 1. MODEL LOADING ---
|
6 |
+
# We still load the quantized GGUF model, which is perfect for CPU.
|
7 |
+
# We will focus on Llama-3 as it's best for a general-purpose assistant.
|
8 |
+
model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
|
9 |
+
model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"
|
10 |
|
11 |
+
try:
|
12 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file)
|
13 |
+
except Exception as e:
|
14 |
+
raise RuntimeError(f"Failed to download the model. Error: {e}")
|
15 |
|
16 |
+
# Load the model with llama-cpp-python
|
17 |
+
# n_ctx is the context window size; 2048 is a safe bet for CPU Spaces.
|
18 |
+
# n_gpu_layers=0 ensures it runs entirely on the CPU.
|
19 |
+
try:
|
20 |
+
llm = Llama(
|
21 |
+
model_path=model_path,
|
22 |
+
n_ctx=2048,
|
23 |
+
n_threads=4, # Set to a reasonable number of threads for the CPU
|
24 |
+
n_gpu_layers=0,
|
25 |
+
verbose=False
|
26 |
+
)
|
27 |
+
except Exception as e:
|
28 |
+
raise RuntimeError(f"Failed to load the GGUF model. Error: {e}")
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
# --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) ---
|
32 |
+
# This is the most critical part. We tell the AI how to behave.
|
33 |
+
# This prompt guides it to be helpful, analytical, and honest about its limitations.
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer.
|
36 |
|
37 |
+
Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased.
|
|
|
38 |
|
39 |
+
When a user asks a question, follow these steps:
|
40 |
+
1. **Analyze the Intent:** First, understand the user's *true* goal. If they ask "cost to build building?", they don't want you to invent a number. They need a *checklist* of cost categories to research. If their question is vague, identify what they are likely trying to accomplish.
|
41 |
+
2. **Provide a Direct Answer:** If you can directly answer, do so clearly and concisely.
|
42 |
+
3. **Elaborate and Add Value:** After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for.
|
43 |
+
4. **Acknowledge Limitations:** You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts.
|
44 |
+
5. **Maintain a Friendly, Encouraging Tone:** Be a partner in the user's creative or analytical process.
|
45 |
+
"""
|
46 |
|
47 |
+
# --- 3. THE GRADIO CHAT INTERFACE ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
def predict(message, history):
|
50 |
+
"""
|
51 |
+
This function is called by the Gradio ChatInterface for each new message.
|
52 |
+
'message' is the new user input.
|
53 |
+
'history' is the entire conversation history as a list of lists.
|
54 |
+
"""
|
55 |
+
# Format the conversation history for the model
|
56 |
+
# The history format is [['user_message', 'assistant_response'], ...]
|
57 |
+
chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}]
|
58 |
+
for user_msg, assistant_msg in history:
|
59 |
+
chat_history_formatted.append({"role": "user", "content": user_msg})
|
60 |
+
chat_history_formatted.append({"role": "assistant", "content": assistant_msg})
|
61 |
+
# Add the latest user message
|
62 |
+
chat_history_formatted.append({"role": "user", "content": message})
|
63 |
|
64 |
+
# Use the model to generate a response stream
|
65 |
+
# stream=True allows the text to appear token-by-token for a better UX
|
66 |
+
generator = llm.create_chat_completion(
|
67 |
+
messages=chat_history_formatted,
|
68 |
+
max_tokens=1024,
|
69 |
+
temperature=0.7,
|
70 |
+
stream=True
|
71 |
)
|
72 |
|
73 |
+
# Yield partial responses to create the streaming effect
|
74 |
+
partial_message = ""
|
75 |
+
for chunk in generator:
|
76 |
+
delta = chunk['choices'][0]['delta']
|
77 |
+
if 'content' in delta:
|
78 |
+
partial_message += delta['content']
|
79 |
+
yield partial_message
|
80 |
|
81 |
+
# We use gr.ChatInterface, which creates a complete chat UI for us.
|
82 |
+
# It manages history, input boxes, and message display automatically.
|
83 |
+
gr.ChatInterface(
|
84 |
+
fn=predict,
|
85 |
+
title="π€ NexusAI Assistant",
|
86 |
+
description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!",
|
87 |
+
examples=[
|
88 |
+
["How do I learn to code?"],
|
89 |
+
["Explain the concept of 'supply and demand' like I'm five."],
|
90 |
+
["I want to build a PC, where do I start?"],
|
91 |
+
["I am building a building, how much would it cost me"] # The "bad" prompt from before!
|
92 |
+
],
|
93 |
+
theme="soft"
|
94 |
+
).launch()
|