ArBaltee commited on
Commit
8e17c8c
·
verified ·
1 Parent(s): 3c2bd02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -52
app.py CHANGED
@@ -1,64 +1,129 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
27
 
28
- response = ""
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
1
+ # NORTHERN_AI
2
+ # Created by AR.BALTEE
3
 
 
 
 
 
4
 
5
+ import os
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+ from flask import Flask, request, jsonify
9
 
10
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
11
 
12
+ # Choose a lightweight open model that can run on limited hardware
13
+ # Options include:
14
+ # - GPT2-small (if you have ~2GB RAM for the model)
15
+ # - Hugging Face's inference endpoints (cloud-based, some free tiers)
16
+ # - Models like DialoGPT-small, BLOOM-560M, or OPT-350M
17
 
18
+ # Configuration
19
+ MODEL_NAME = "EleutherAI/gpt-neo-125M" # A relatively small model, replace with your choice
20
+ USE_CLOUD_INFERENCE = True # Set to True to use Hugging Face's Inference API instead of local model
21
 
22
+ # Hugging Face API Token (sign up for free at huggingface.co)
23
+ HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") # Store your token as an environment variable for security
24
 
25
+ # System prompt that defines your AI assistant's personality
26
+ SYSTEM_PROMPT = """NORTHERN_AI is a helpful AI assistant created by AR.BALTEE.
27
+ It aims to provide accurate and helpful information to users' questions.
28
+ NORTHERN_AI is friendly, concise, and knowledgeable."""
 
 
 
 
29
 
30
+ class NorthernAI:
31
+ def __init__(self):
32
+ self.system_prompt = SYSTEM_PROMPT
33
+
34
+ if not USE_CLOUD_INFERENCE:
35
+ print("Loading model locally (requires sufficient RAM)...")
36
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
37
+ # Load in 8-bit to reduce memory requirements
38
+ self.model = AutoModelForCausalLM.from_pretrained(
39
+ MODEL_NAME,
40
+ torch_dtype=torch.float16,
41
+ low_cpu_mem_usage=True,
42
+ device_map="auto"
43
+ )
44
+ else:
45
+ print("Using cloud inference API (minimal RAM required)...")
46
+ # For cloud inference, we'll just need the API endpoint
47
+ from huggingface_hub import InferenceClient
48
+ self.client = InferenceClient(token=HF_API_TOKEN)
49
+
50
+ def generate_response(self, user_input):
51
+ prompt = f"{self.system_prompt}\n\nUser: {user_input}\nNORTHERN_AI:"
52
+
53
+ if USE_CLOUD_INFERENCE:
54
+ # Use Hugging Face's Inference API
55
+ response = self.client.text_generation(
56
+ prompt,
57
+ model=MODEL_NAME,
58
+ max_new_tokens=150,
59
+ temperature=0.7,
60
+ top_p=0.95,
61
+ repetition_penalty=1.1
62
+ )
63
+ return response.strip()
64
+ else:
65
+ # Local generation
66
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
67
+ with torch.no_grad():
68
+ output = self.model.generate(
69
+ **inputs,
70
+ max_new_tokens=150,
71
+ temperature=0.7,
72
+ top_p=0.95,
73
+ repetition_penalty=1.1
74
+ )
75
+ return self.tokenizer.decode(output[0], skip_special_tokens=True).split("NORTHERN_AI:")[-1].strip()
76
 
77
+ # Initialize the AI assistant
78
+ northern_ai = NorthernAI()
79
 
80
+ @app.route('/api/chat', methods=['POST'])
81
+ def chat():
82
+ data = request.json
83
+ user_message = data.get('message', '')
84
+ response = northern_ai.generate_response(user_message)
85
+ return jsonify({"response": response})
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ @app.route('/')
88
+ def home():
89
+ return """
90
+ <html>
91
+ <head><title>NORTHERN_AI by AR.BALTEE</title></head>
92
+ <body>
93
+ <h1>Welcome to NORTHERN_AI</h1>
94
+ <form id="chat-form">
95
+ <input type="text" id="user-input" placeholder="Ask NORTHERN_AI something...">
96
+ <button type="submit">Send</button>
97
+ </form>
98
+ <div id="chat-history"></div>
99
+
100
+ <script>
101
+ document.getElementById('chat-form').addEventListener('submit', async (e) => {
102
+ e.preventDefault();
103
+ const input = document.getElementById('user-input');
104
+ const message = input.value;
105
+ input.value = '';
106
+
107
+ // Display user message
108
+ const chatHistory = document.getElementById('chat-history');
109
+ chatHistory.innerHTML += `<p><strong>You:</strong> ${message}</p>`;
110
+
111
+ // Get AI response
112
+ const response = await fetch('/api/chat', {
113
+ method: 'POST',
114
+ headers: {'Content-Type': 'application/json'},
115
+ body: JSON.stringify({message})
116
+ });
117
+
118
+ const data = await response.json();
119
+ chatHistory.innerHTML += `<p><strong>NORTHERN_AI:</strong> ${data.response}</p>`;
120
+ });
121
+ </script>
122
+ </body>
123
+ </html>
124
+ """
125
 
126
+ if __name__ == '__main__':
127
+ # Use the PORT environment variable provided by most free hosting services
128
+ port = int(os.environ.get("PORT", 5000))
129
+ app.run(host='0.0.0.0', port=port)