Spaces:
Running
Running
File size: 4,853 Bytes
bd97f47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class ModelManager:
def __init__(self):
self.toxicity_model = None
self.toxicity_tokenizer = None
self.quality_model = None
self.quality_tokenizer = None
self.device = "cuda" if torch.cuda.is_available() else "cpu"
logging.info(f"Using device: {self.device}")
def load_toxicity_model(self, model_name="savasy/bert-base-turkish-sentiment"):
"""
Zararlılık tespiti için model yükleme
"""
try:
logging.info(f"Loading toxicity model: {model_name}")
self.toxicity_tokenizer = AutoTokenizer.from_pretrained(model_name)
self.toxicity_model = AutoModelForSequenceClassification.from_pretrained(model_name)
logging.info("Toxicity model loaded successfully")
return True
except Exception as e:
logging.error(f"Error loading toxicity model: {str(e)}")
# Alternatif model deneyelim
try:
backup_model = "dbmdz/bert-base-turkish-cased"
logging.info(f"Trying backup Turkish model: {backup_model}")
self.toxicity_tokenizer = AutoTokenizer.from_pretrained(backup_model)
self.toxicity_model = AutoModelForSequenceClassification.from_pretrained(backup_model)
logging.info("Backup Turkish model loaded successfully")
return True
except Exception as e2:
logging.error(f"Error loading backup Turkish model: {str(e2)}")
try:
english_model = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
logging.info(f"Trying English sentiment model: {english_model}")
self.toxicity_tokenizer = AutoTokenizer.from_pretrained(english_model)
self.toxicity_model = AutoModelForSequenceClassification.from_pretrained(english_model)
logging.info("English sentiment model loaded successfully")
return True
except Exception as e3:
logging.error(f"Error loading English model: {str(e3)}")
return False
def load_quality_model(self, model_name="sshleifer/distilbart-cnn-6-6"):
"""
Metin kalitesi değerlendirmesi için model yükleme (özetleme kapasitesi olan bir model)
"""
try:
logging.info(f"Loading quality model: {model_name}")
self.quality_pipeline = pipeline(
"text2text-generation",
model=model_name,
tokenizer=model_name,
device=0 if self.device == "cuda" else -1
)
logging.info("Quality model loaded successfully")
return True
except Exception as e:
logging.error(f"Error loading quality model: {str(e)}")
# Daha hafif bir model deneyelim
try:
backup_model = "Helsinki-NLP/opus-mt-tc-big-tr-en"
logging.info(f"Trying Turkish translation model for quality: {backup_model}")
self.quality_pipeline = pipeline(
"translation",
model=backup_model,
tokenizer=backup_model,
device=0 if self.device == "cuda" else -1
)
logging.info("Turkish translation model loaded successfully")
return True
except Exception as e2:
logging.error(f"Error loading Turkish translation model: {str(e2)}")
try:
light_model = "sshleifer/distilbart-xsum-12-6"
logging.info(f"Trying lighter quality model: {light_model}")
self.quality_pipeline = pipeline(
"text2text-generation",
model=light_model,
tokenizer=light_model,
device=0 if self.device == "cuda" else -1
)
logging.info("Lighter quality model loaded successfully")
return True
except Exception as e3:
logging.error(f"Error loading lighter quality model: {str(e3)}")
return False
def get_models(self):
"""
Yüklenen modelleri döndürür
"""
return {
"toxicity_model": self.toxicity_model,
"toxicity_tokenizer": self.toxicity_tokenizer,
"quality_pipeline": self.quality_pipeline if hasattr(self, 'quality_pipeline') else None
} |