lab2 / app.py
Filip
update
ab546a4
raw
history blame
4.54 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import json
import re
def load_model():
repo_id = "forestav/gguf_lora_model"
model_file = "unsloth.F16.gguf"
local_path = hf_hub_download(
repo_id=repo_id,
filename=model_file
)
print(f"Loading model from: {local_path}")
model = Llama(
model_path=local_path,
n_ctx=2048,
n_threads=8
)
return model
# Enhanced generation with multiple modes
def generate_response(message, history, mode='chat'):
# Preprocessing based on mode
if mode == 'code':
system_prompt = "You are an expert coding assistant. Provide clean, efficient code solutions."
elif mode == 'creative':
system_prompt = "You are a creative writing assistant. Generate imaginative and engaging content."
elif mode == 'analytical':
system_prompt = "You are an analytical assistant. Provide deep, structured insights and reasoning."
else:
system_prompt = "You are a helpful AI assistant."
# Prepare messages with system context
messages = [
{"role": "system", "content": system_prompt},
*[{"role": "user" if i % 2 == 0 else "assistant", "content": msg}
for i, msg in enumerate(sum(history, []))],
{"role": "user", "content": message}
]
# Generate response
response = model.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.95,
)
return response['choices'][0]['message']['content']
# Extract structured data from text
def extract_structured_data(text):
try:
# Try to extract JSON-like structures
json_match = re.search(r'\{.*\}', text, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group(0))
except json.JSONDecodeError:
pass
# Fall back to custom parsing for key-value pairs
data = {}
for line in text.split('\n'):
if ':' in line:
key, value = line.split(':', 1)
data[key.strip()] = value.strip()
return data
except Exception as e:
return {"error": str(e)}
# Create Gradio interface with multiple tabs
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Multi-Mode AI Assistant")
with gr.Tabs():
# Chat Interface
with gr.TabItem("Conversational Chat"):
chat_interface = gr.ChatInterface(
fn=lambda message, history: generate_response(message, history, 'chat'),
title="Conversational AI",
description="General-purpose conversation mode"
)
# Code Generation Tab
with gr.TabItem("Code Assistant"):
code_interface = gr.ChatInterface(
fn=lambda message, history: generate_response(message, history, 'code'),
title="AI Code Generator",
description="Generate code snippets and solve programming challenges"
)
# Creative Writing Tab
with gr.TabItem("Creative Writing"):
creative_interface = gr.ChatInterface(
fn=lambda message, history: generate_response(message, history, 'creative'),
title="Creative Writing Assistant",
description="Generate stories, poems, and creative content"
)
# Data Extraction Tab
with gr.TabItem("Data Extractor"):
with gr.Row():
text_input = gr.Textbox(label="Input Text")
extract_btn = gr.Button("Extract Structured Data")
json_output = gr.JSON(label="Extracted Data")
extract_btn.click(
fn=extract_structured_data,
inputs=text_input,
outputs=json_output
)
return demo
# Load model globally
print("Starting model loading...")
model = load_model()
print("Model loaded successfully!")
# Create and launch the interface
demo = create_interface()
demo.launch(
server_name="0.0.0.0", # Necessary for Spaces
server_port=7860, # Standard port for Spaces
share=True # Don't need share link in Spaces
)