vaseacc commited on
Commit
72a9a55
Β·
verified Β·
1 Parent(s): 6a152c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -53
app.py CHANGED
@@ -1,68 +1,94 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
 
4
- # --- 1. LOAD OUR AI MODELS ---
5
- # We load the models once when the app starts.
6
- # This is more efficient than loading them for every request.
7
- question_answerer = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
8
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
9
 
 
 
 
 
10
 
11
- # --- 2. DEFINE THE CORE FUNCTIONS ---
12
- # These functions will take user input and use the appropriate model.
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def answer_question(context, question):
15
- """Uses the QA model to find an answer within a given text."""
16
- if not context or not question:
17
- return "(Please provide both context and a question.)"
18
- result = question_answerer(question=question, context=context)
19
- return result['answer']
20
 
21
- def summarize_text(text):
22
- """Uses the Summarization model to shorten a piece of text."""
23
- if not text:
24
- return "(Please provide text to summarize.)"
25
- # We add some parameters for better, shorter summaries on a CPU
26
- summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
27
- return summary[0]['summary_text']
28
 
 
29
 
30
- # --- 3. BUILD THE GRADIO INTERFACE ---
31
- # This is where we design the web app's layout and connect it to our functions.
32
 
33
- with gr.Blocks() as demo:
34
- gr.Markdown("# Ultimate AI Assistant (CPU Edition) πŸ€–")
35
- gr.Markdown("An experiment in combining multiple small, efficient AI models into one application.")
 
 
 
 
36
 
37
- with gr.Tabs():
38
- # --- First Tab: Question Answering ---
39
- with gr.TabItem("❓ Ask a Question"):
40
- gr.Markdown("Give the AI some text (context) and ask a question about it.")
41
- with gr.Row():
42
- qa_context_input = gr.Textbox(lines=7, label="Context", placeholder="Paste a paragraph or article here...")
43
- qa_question_input = gr.Textbox(label="Question", placeholder="What do you want to know?")
44
- qa_button = gr.Button("Get Answer")
45
- qa_output = gr.Textbox(label="Answer")
46
 
47
- # --- Second Tab: Summarization ---
48
- with gr.TabItem("πŸ“š Summarize Text"):
49
- gr.Markdown("Paste in a long piece of text and the AI will create a short summary.")
50
- summarize_input = gr.Textbox(lines=10, label="Text to Summarize", placeholder="Paste a long article or text here...")
51
- summarize_button = gr.Button("Summarize")
52
- summarize_output = gr.Textbox(label="Summary")
 
 
 
 
 
 
 
 
53
 
54
- # --- 4. Connect Buttons to Functions ---
55
- qa_button.click(
56
- fn=answer_question,
57
- inputs=[qa_context_input, qa_question_input],
58
- outputs=qa_output
 
 
59
  )
60
 
61
- summarize_button.click(
62
- fn=summarize_text,
63
- inputs=summarize_input,
64
- outputs=summarize_output
65
- )
 
 
66
 
67
- # --- 5. LAUNCH THE APP! ---
68
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
 
5
+ # --- 1. MODEL LOADING ---
6
+ # We still load the quantized GGUF model, which is perfect for CPU.
7
+ # We will focus on Llama-3 as it's best for a general-purpose assistant.
8
+ model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
9
+ model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"
10
 
11
+ try:
12
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file)
13
+ except Exception as e:
14
+ raise RuntimeError(f"Failed to download the model. Error: {e}")
15
 
16
+ # Load the model with llama-cpp-python
17
+ # n_ctx is the context window size; 2048 is a safe bet for CPU Spaces.
18
+ # n_gpu_layers=0 ensures it runs entirely on the CPU.
19
+ try:
20
+ llm = Llama(
21
+ model_path=model_path,
22
+ n_ctx=2048,
23
+ n_threads=4, # Set to a reasonable number of threads for the CPU
24
+ n_gpu_layers=0,
25
+ verbose=False
26
+ )
27
+ except Exception as e:
28
+ raise RuntimeError(f"Failed to load the GGUF model. Error: {e}")
29
 
 
 
 
 
 
 
30
 
31
+ # --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) ---
32
+ # This is the most critical part. We tell the AI how to behave.
33
+ # This prompt guides it to be helpful, analytical, and honest about its limitations.
 
 
 
 
34
 
35
+ SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer.
36
 
37
+ Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased.
 
38
 
39
+ When a user asks a question, follow these steps:
40
+ 1. **Analyze the Intent:** First, understand the user's *true* goal. If they ask "cost to build building?", they don't want you to invent a number. They need a *checklist* of cost categories to research. If their question is vague, identify what they are likely trying to accomplish.
41
+ 2. **Provide a Direct Answer:** If you can directly answer, do so clearly and concisely.
42
+ 3. **Elaborate and Add Value:** After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for.
43
+ 4. **Acknowledge Limitations:** You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts.
44
+ 5. **Maintain a Friendly, Encouraging Tone:** Be a partner in the user's creative or analytical process.
45
+ """
46
 
47
+ # --- 3. THE GRADIO CHAT INTERFACE ---
 
 
 
 
 
 
 
 
48
 
49
+ def predict(message, history):
50
+ """
51
+ This function is called by the Gradio ChatInterface for each new message.
52
+ 'message' is the new user input.
53
+ 'history' is the entire conversation history as a list of lists.
54
+ """
55
+ # Format the conversation history for the model
56
+ # The history format is [['user_message', 'assistant_response'], ...]
57
+ chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}]
58
+ for user_msg, assistant_msg in history:
59
+ chat_history_formatted.append({"role": "user", "content": user_msg})
60
+ chat_history_formatted.append({"role": "assistant", "content": assistant_msg})
61
+ # Add the latest user message
62
+ chat_history_formatted.append({"role": "user", "content": message})
63
 
64
+ # Use the model to generate a response stream
65
+ # stream=True allows the text to appear token-by-token for a better UX
66
+ generator = llm.create_chat_completion(
67
+ messages=chat_history_formatted,
68
+ max_tokens=1024,
69
+ temperature=0.7,
70
+ stream=True
71
  )
72
 
73
+ # Yield partial responses to create the streaming effect
74
+ partial_message = ""
75
+ for chunk in generator:
76
+ delta = chunk['choices'][0]['delta']
77
+ if 'content' in delta:
78
+ partial_message += delta['content']
79
+ yield partial_message
80
 
81
+ # We use gr.ChatInterface, which creates a complete chat UI for us.
82
+ # It manages history, input boxes, and message display automatically.
83
+ gr.ChatInterface(
84
+ fn=predict,
85
+ title="πŸ€– NexusAI Assistant",
86
+ description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!",
87
+ examples=[
88
+ ["How do I learn to code?"],
89
+ ["Explain the concept of 'supply and demand' like I'm five."],
90
+ ["I want to build a PC, where do I start?"],
91
+ ["I am building a building, how much would it cost me"] # The "bad" prompt from before!
92
+ ],
93
+ theme="soft"
94
+ ).launch()