Spaces:
Running
Running
File size: 7,159 Bytes
41b743c c7cb95e 937d315 c7cb95e 937d315 c7cb95e b1e21d8 41b743c 937d315 c7cb95e 937d315 c7cb95e b1e21d8 937d315 41b743c 937d315 41b743c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import yaml
import argparse
import sys
import os
# Load environment variables from .env file (for local development only)
try:
from dotenv import load_dotenv
load_dotenv()
print("✅ .env file loaded successfully (local development)")
except ImportError:
print("Warning: python-dotenv not installed. Install with: pip install python-dotenv")
print("Or set NVIDIA_API_KEY environment variable manually")
except FileNotFoundError:
print("ℹ️ No .env file found - using environment variables directly")
sys.path.append(os.path.join(os.path.dirname(__file__), 'model'))
from model.multi_task_graph_router import graph_router_prediction
import pandas as pd
from openai import OpenAI
import torch
parser = argparse.ArgumentParser()
parser.add_argument("--config_file", type=str, default="configs/config.yaml")
parser.add_argument("--query", type=str, default="What is the derivative of f(x) = x^3 + 2x^2 - x + 5?",
help="Input query to process")
args = parser.parse_args()
# Check for API key
if os.getenv("NVIDIA_API_KEY") is None:
print("❌ NVIDIA_API_KEY not found in environment variables")
print("For local development: Create a .env file with: NVIDIA_API_KEY=your_api_key_here")
print("For Hugging Face Spaces: Set NVIDIA_API_KEY in Repository Secrets")
print("⚠️ Exiting due to missing API key")
sys.exit(1)
print("✅ NVIDIA API key loaded from environment")
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=os.getenv("NVIDIA_API_KEY"),
timeout=60,
max_retries=2
)
def model_prompting(
llm_model: str,
prompt: str,
max_token_num: int = 1024,
temperature: float = 0.2,
top_p: float = 0.7,
stream: bool = True,
) -> str:
"""
Get a response from an LLM model using the OpenAI-compatible NVIDIA API.
Args:
llm_model: Name of the model to use (e.g., "meta/llama-3.1-8b-instruct")
prompt: Input prompt text
max_token_num: Maximum number of tokens to generate
temperature: Sampling temperature
top_p: Top-p sampling parameter
stream: Whether to stream the response
Returns:
Generated text response
"""
try:
completion = client.chat.completions.create(
model=llm_model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_token_num,
temperature=temperature,
top_p=top_p,
stream=stream
)
response_text = ""
for chunk in completion:
if chunk.choices[0].delta.content is not None:
response_text += chunk.choices[0].delta.content
return response_text
except Exception as e:
raise Exception(f"API call failed: {str(e)}")
def generate_task_description(query: str) -> str:
"""
Generate a concise task description using LLM API.
Args:
query: The user's input query
Returns:
A concise task description
"""
prompt = f"""Analyze the following query and provide a concise task description that identifies the type of task and domain it belongs to. Focus on the core problem type and relevant domain areas.
Query: {query}
Please provide a brief, focused task description that captures:
1. The primary task type (e.g., mathematical calculation, text analysis, coding, reasoning, etc.)
2. The relevant domain or subject area
3. The complexity level or approach needed
Keep the description concise and informative. Respond with just the task description, no additional formatting."""
try:
task_description = model_prompting(
llm_model="meta/llama-3.1-8b-instruct",
prompt=prompt,
max_token_num=256,
temperature=0.1,
top_p=0.9,
stream=True
)
import pdb; pdb.set_trace()
return task_description.strip()
except Exception as e:
print(f"Warning: Failed to generate task description via API: {str(e)}")
# Fallback to a generic description
return "General query processing task requiring analysis and response generation."
from transformers import LongformerTokenizer, LongformerModel
def get_cls_embedding(text, model_name="allenai/longformer-base-4096", device="cpu"):
"""
Extracts the [CLS] embedding from a given text using Longformer.
Args:
text (str): Input text
model_name (str): Hugging Face model name
device (str): "cpu" or "cuda"
Returns:
torch.Tensor: CLS embedding of shape (1, hidden_size)
"""
# Load tokenizer and model
tokenizer = LongformerTokenizer.from_pretrained(model_name)
model = LongformerModel.from_pretrained(model_name).to(device)
model.eval()
# Tokenize input
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=4096).to(device)
with torch.no_grad():
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :] # (1, hidden_size)
return cls_embedding
def main():
"""Main execution function with error handling."""
try:
with open(args.config_file, 'r', encoding='utf-8') as file:
config = yaml.safe_load(file)
print("Loading training data...")
train_df = pd.read_csv(config['train_data_path'])
train_df = train_df[train_df["task_name"] != 'quac']
print(f"Loaded {len(train_df)} training samples")
input_user_query = args.query
print(f"Input Query: {input_user_query}")
# Generate embeddings for the query
print("Generating query embedding...")
user_query_embedding = get_cls_embedding(input_user_query).squeeze(0)
# Call LLM to generate user_task_description
print("Generating task description using LLM API...")
user_task_description = generate_task_description(input_user_query)
print(f"Generated Task Description: {user_task_description}")
# Generate embeddings for the task description
print("Generating task description embedding...")
user_task_embedding = get_cls_embedding(user_task_description).squeeze(0)
# Prepare test dataframe
test_df = train_df.head(config['llm_num']).copy()
test_df['query'] = input_user_query
test_df['task_description'] = user_task_description
test_df.loc[0, 'query_embedding'] = str(user_query_embedding)
test_df.loc[0, 'task_description'] = str(user_task_embedding)
print("Running graph router prediction...")
graph_router_prediction(router_data_train=train_df, router_data_test=test_df, llm_path=config['llm_description_path'],
llm_embedding_path=config['llm_embedding_path'], config=config)
print("Pipeline completed successfully!")
except FileNotFoundError as e:
print(f"Error: Configuration file not found - {e}")
sys.exit(1)
except Exception as e:
print(f"Error during execution: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
|