Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on May 27

Commit

c74c7e0

verified ·

1 Parent(s): 2c98904

Update intent_utils.py

Browse files

Files changed (1) hide show

intent_utils.py +0 -110

intent_utils.py CHANGED Viewed

@@ -1,124 +1,14 @@
-import os
-import torch
-import json
-import shutil
 import re
-import traceback
-from datasets import Dataset
-from transformers import (
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-    Trainer,
-    TrainingArguments,
-    default_data_collator,
-    AutoConfig,
-)
-from log import log
-from core import llm_models
-async def detect_intent(text, project_name):
-    llm_model_instance = llm_models.get(project_name)
-    if not llm_model_instance or not llm_model_instance.intent_model:
-        raise Exception(f"'{project_name}' için intent modeli yüklenmemiş.")
-    tokenizer = llm_model_instance.intent_tokenizer
-    model = llm_model_instance.intent_model
-    label2id = llm_model_instance.intent_label2id
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    outputs = model(**inputs)
-    predicted_id = outputs.logits.argmax(dim=-1).item()
-    detected_intent = [k for k, v in label2id.items() if v == predicted_id][0]
-    confidence = outputs.logits.softmax(dim=-1).max().item()
-    return detected_intent, confidence
-def background_training(project_name, intents, model_id, output_path, confidence_threshold):
-    try:
-        log(f"🔧 Intent eğitimi başlatıldı (proje: {project_name})")
-        texts, labels, label2id = [], [], {}
-        for idx, intent in enumerate(intents):
-            label2id[intent["name"]] = idx
-            for ex in intent["examples"]:
-                texts.append(ex)
-                labels.append(idx)
-        dataset = Dataset.from_dict({"text": texts, "label": labels})
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        config = AutoConfig.from_pretrained(model_id)
-        config.problem_type = "single_label_classification"
-        config.num_labels = len(label2id)
-        model = AutoModelForSequenceClassification.from_pretrained(model_id, config=config)
-        tokenized_data = {"input_ids": [], "attention_mask": [], "label": []}
-        for row in dataset:
-            out = tokenizer(row["text"], truncation=True, padding="max_length", max_length=128)
-            tokenized_data["input_ids"].append(out["input_ids"])
-            tokenized_data["attention_mask"].append(out["attention_mask"])
-            tokenized_data["label"].append(row["label"])
-        tokenized = Dataset.from_dict(tokenized_data)
-        tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
-        if os.path.exists(output_path):
-            shutil.rmtree(output_path)
-        os.makedirs(output_path, exist_ok=True)
-        trainer = Trainer(
-            model=model,
-            args=TrainingArguments(output_path, per_device_train_batch_size=4, num_train_epochs=3, logging_steps=10, save_strategy="no", report_to=[]),
-            train_dataset=tokenized,
-            data_collator=default_data_collator,
-        )
-        trainer.train()
-        log("🔧 Başarı raporu üretiliyor...")
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model.to(device)
-        input_ids_tensor = torch.tensor(tokenized["input_ids"]).to(device)
-        attention_mask_tensor = torch.tensor(tokenized["attention_mask"]).to(device)
-        with torch.no_grad():
-            outputs = model(input_ids=input_ids_tensor, attention_mask=attention_mask_tensor)
-            predictions = outputs.logits.argmax(dim=-1).tolist()
-        actuals = tokenized["label"]
-        counts, correct = {}, {}
-        for pred, actual in zip(predictions, actuals):
-            intent_name = list(label2id.keys())[list(label2id.values()).index(actual)]
-            counts[intent_name] = counts.get(intent_name, 0) + 1
-            if pred == actual:
-                correct[intent_name] = correct.get(intent_name, 0) + 1
-        for intent_name, total in counts.items():
-            accuracy = correct.get(intent_name, 0) / total
-            log(f"📊 Intent '{intent_name}' doğruluk: {accuracy:.2f} — {total} örnek")
-            if accuracy < confidence_threshold or total < 5:
-                log(f"⚠️ Yetersiz performanslı intent: '{intent_name}' — Doğruluk: {accuracy:.2f}, Örnek: {total}")
-        # Eğitim sonrası model ve tokenizer'ı diske kaydet
-        model.save_pretrained(output_path)
-        tokenizer.save_pretrained(output_path)
-        with open(os.path.join(output_path, "label2id.json"), "w") as f:
-            json.dump(label2id, f)
-        log(f"✅ Intent eğitimi tamamlandı ve '{project_name}' için model disk üzerinde hazır.")
-    except Exception as e:
-        log(f"❌ Intent eğitimi hatası: {e}")
-        traceback.print_exc()
 def extract_parameters(variables_list, user_input):
     extracted_params = []
     for pattern in variables_list:
-        # Örneğin: from_location:{Ankara} to_location:{İstanbul}
         regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
         match = re.match(regex, user_input)
         if match:
             extracted_params = [{"key": k, "value": v} for k, v in match.groupdict().items()]
             break
-    # Ek özel basit yakalama: iki şehir birden yazılırsa → sırayla atama
     if not extracted_params:
         city_pattern = r"(\bAnkara\b|\bİstanbul\b|\bİzmir\b)"
         cities = re.findall(city_pattern, user_input)

 import re
 def extract_parameters(variables_list, user_input):
     extracted_params = []
     for pattern in variables_list:
         regex = re.sub(r"(\w+):\{(.+?)\}", r"(?P<\1>.+?)", pattern)
         match = re.match(regex, user_input)
         if match:
             extracted_params = [{"key": k, "value": v} for k, v in match.groupdict().items()]
             break
     if not extracted_params:
         city_pattern = r"(\bAnkara\b|\bİstanbul\b|\bİzmir\b)"
         cities = re.findall(city_pattern, user_input)