MagicMeWizard commited on
Commit
ccc5d44
·
verified ·
1 Parent(s): ed05d05

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +374 -141
config.py CHANGED
@@ -1,204 +1,437 @@
1
  """
2
- Configuration settings for AI Web Scraper
3
- Centralized configuration management for security, performance, and features
4
  """
5
 
6
  import os
7
- from typing import Dict, List, Optional
8
  from dataclasses import dataclass
 
9
 
10
  @dataclass
11
- class SecurityConfig:
12
- """Security-related configuration"""
13
- # URL validation settings
14
- allowed_schemes: List[str] = None
15
- blocked_domains: List[str] = None
16
- max_url_length: int = 2048
17
 
18
- # Rate limiting
 
 
 
 
 
19
  requests_per_minute: int = 30
20
- requests_per_hour: int = 500
 
 
 
 
 
 
 
21
 
22
- # Content safety
23
- max_content_size: int = 10 * 1024 * 1024 # 10MB
24
- max_processing_time: int = 60 # seconds
 
 
 
 
25
 
26
  def __post_init__(self):
27
- if self.allowed_schemes is None:
28
- self.allowed_schemes = ['http', 'https']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if self.blocked_domains is None:
31
  self.blocked_domains = [
32
- 'localhost', '127.0.0.1', '0.0.0.0',
33
- '192.168.', '10.', '172.16.', '172.17.',
34
- '172.18.', '172.19.', '172.20.', '172.21.',
35
- '172.22.', '172.23.', '172.24.', '172.25.',
36
- '172.26.', '172.27.', '172.28.', '172.29.',
37
- '172.30.', '172.31.'
 
 
 
 
38
  ]
39
 
40
  @dataclass
41
  class ModelConfig:
42
- """AI model configuration"""
43
- # Primary summarization model
44
- primary_model: str = "facebook/bart-large-cnn"
45
 
46
- # Fallback model for faster processing
47
- fallback_model: str = "sshleifer/distilbart-cnn-12-6"
 
 
48
 
49
- # Model parameters
50
- max_input_length: int = 1024
51
- max_summary_length: int = 500
52
- min_summary_length: int = 30
53
 
54
- # Performance settings
55
  device: str = "auto" # auto, cpu, cuda
56
- batch_size: int = 1
57
- use_fast_tokenizer: bool = True
 
 
 
 
 
 
 
 
 
58
 
59
  @dataclass
60
- class ScrapingConfig:
61
- """Web scraping configuration"""
62
- # Request settings
63
- timeout: int = 15
64
- max_retries: int = 3
65
- retry_delay: int = 1
66
 
67
- # User agent string
68
- user_agent: str = "Mozilla/5.0 (compatible; AI-WebScraper/1.0; Research Tool)"
 
 
69
 
70
- # Content extraction
71
- min_content_length: int = 100
72
- max_content_length: int = 100000
 
 
 
 
 
 
73
 
74
- # Robots.txt settings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  respect_robots_txt: bool = True
76
- robots_cache_duration: int = 3600 # seconds
 
 
 
77
 
78
  @dataclass
79
  class UIConfig:
80
  """User interface configuration"""
81
- # Default values
82
- default_summary_length: int = 300
83
- max_summary_length: int = 500
84
- min_summary_length: int = 100
85
 
86
- # Interface settings
87
- enable_batch_processing: bool = True
88
- max_batch_size: int = 10
89
- show_advanced_options: bool = False
90
 
91
- # Export settings
92
- supported_export_formats: List[str] = None
 
 
93
 
94
- def __post_init__(self):
95
- if self.supported_export_formats is None:
96
- self.supported_export_formats = ["CSV", "JSON"]
 
97
 
 
98
  class Config:
99
- """Main configuration class"""
100
 
101
  def __init__(self):
102
- self.security = SecurityConfig()
103
- self.models = ModelConfig()
104
  self.scraping = ScrapingConfig()
 
 
 
105
  self.ui = UIConfig()
106
 
107
- # Load from environment variables if available
108
- self._load_from_env()
109
-
110
- def _load_from_env(self):
111
- """Load configuration from environment variables"""
112
- # Security settings
113
- if os.getenv('MAX_REQUESTS_PER_MINUTE'):
114
- self.security.requests_per_minute = int(os.getenv('MAX_REQUESTS_PER_MINUTE'))
115
-
116
- if os.getenv('MAX_CONTENT_SIZE'):
117
- self.security.max_content_size = int(os.getenv('MAX_CONTENT_SIZE'))
118
-
119
- # Model settings
120
- if os.getenv('PRIMARY_MODEL'):
121
- self.models.primary_model = os.getenv('PRIMARY_MODEL')
122
-
123
- if os.getenv('FALLBACK_MODEL'):
124
- self.models.fallback_model = os.getenv('FALLBACK_MODEL')
125
-
126
- if os.getenv('DEVICE'):
127
- self.models.device = os.getenv('DEVICE')
128
 
129
- # Scraping settings
130
- if os.getenv('REQUEST_TIMEOUT'):
131
- self.scraping.timeout = int(os.getenv('REQUEST_TIMEOUT'))
132
-
133
- if os.getenv('USER_AGENT'):
134
- self.scraping.user_agent = os.getenv('USER_AGENT')
135
-
136
- if os.getenv('RESPECT_ROBOTS_TXT'):
137
- self.scraping.respect_robots_txt = os.getenv('RESPECT_ROBOTS_TXT').lower() == 'true'
138
-
139
- def get_model_device(self) -> str:
140
- """Get the appropriate device for model inference"""
141
- if self.models.device == "auto":
142
- try:
143
- import torch
144
- return "cuda" if torch.cuda.is_available() else "cpu"
145
- except ImportError:
146
- return "cpu"
147
- return self.models.device
148
-
149
- def is_url_allowed(self, url: str) -> bool:
150
- """Check if URL is allowed based on security settings"""
151
  from urllib.parse import urlparse
152
 
153
  try:
154
  parsed = urlparse(url)
155
 
156
  # Check scheme
157
- if parsed.scheme not in self.security.allowed_schemes:
158
  return False
159
 
160
- # Check blocked domains
161
- hostname = parsed.hostname or ''
162
  for blocked in self.security.blocked_domains:
163
- if blocked in hostname:
164
  return False
165
 
166
- # Check URL length
167
- if len(url) > self.security.max_url_length:
168
- return False
 
 
 
 
 
169
 
170
  return True
171
 
172
  except Exception:
173
  return False
174
-
175
- def get_request_headers(self) -> Dict[str, str]:
176
- """Get standard request headers"""
177
- return {
178
- 'User-Agent': self.scraping.user_agent,
179
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
180
- 'Accept-Language': 'en-US,en;q=0.5',
181
- 'Accept-Encoding': 'gzip, deflate',
182
- 'Connection': 'keep-alive',
183
- 'Upgrade-Insecure-Requests': '1',
184
- }
185
 
186
- # Global configuration instance
187
  config = Config()
188
 
189
- # Environment-specific overrides for Hugging Face Spaces
190
- if os.getenv('SPACE_ID'):
191
- # Running on Hugging Face Spaces
192
- config.models.device = "auto"
193
- config.security.requests_per_minute = 20 # More conservative on shared infrastructure
194
- config.scraping.timeout = 10 # Shorter timeout on shared infrastructure
195
-
196
- # Enable GPU if available
197
- if os.getenv('CUDA_VISIBLE_DEVICES'):
198
- config.models.device = "cuda"
199
-
200
- # Development mode overrides
201
- if os.getenv('ENVIRONMENT') == 'development':
202
- config.security.requests_per_minute = 100
203
- config.scraping.timeout = 30
204
- config.ui.show_advanced_options = True
 
1
  """
2
+ ⚙️ Configuration settings for AI Dataset Studio with Perplexity integration
 
3
  """
4
 
5
  import os
 
6
  from dataclasses import dataclass
7
+ from typing import List, Dict, Optional
8
 
9
  @dataclass
10
+ class PerplexityConfig:
11
+ """Configuration for Perplexity AI integration"""
 
 
 
 
12
 
13
+ # API Configuration
14
+ api_key: Optional[str] = os.getenv('PERPLEXITY_API_KEY')
15
+ base_url: str = "https://api.perplexity.ai"
16
+ model: str = "llama-3.1-sonar-large-128k-online"
17
+
18
+ # Rate Limiting
19
  requests_per_minute: int = 30
20
+ request_timeout: int = 30
21
+ max_retries: int = 3
22
+ min_request_interval: float = 1.0 # seconds
23
+
24
+ # Search Configuration
25
+ default_max_sources: int = 20
26
+ max_sources_limit: int = 50
27
+ min_sources: int = 5
28
 
29
+ # Quality Thresholds
30
+ min_relevance_score: float = 3.0
31
+ min_content_length: int = 100
32
+ max_content_length: int = 10_000_000 # 10MB
33
+
34
+ # Search Templates
35
+ search_templates: Dict[str, str] = None
36
 
37
  def __post_init__(self):
38
+ """Initialize search templates after creation"""
39
+ if self.search_templates is None:
40
+ self.search_templates = {
41
+ "sentiment_analysis": """
42
+ Find {max_sources} high-quality sources containing text with clear emotional sentiment for machine learning training:
43
+
44
+ PROJECT: {project_description}
45
+
46
+ REQUIREMENTS:
47
+ - Sources with clear positive, negative, or neutral sentiment
48
+ - Text suitable for sentiment classification training
49
+ - Diverse content types (reviews, social media, news, forums)
50
+ - Avoid heavily biased or extreme content
51
+ - Include metadata when possible (ratings, timestamps, etc.)
52
+
53
+ SEARCH FOCUS:
54
+ - Product reviews and customer feedback
55
+ - Social media posts and comments
56
+ - News articles with opinion content
57
+ - Blog posts with clear sentiment
58
+ - Forum discussions and community posts
59
+
60
+ OUTPUT FORMAT:
61
+ For each source provide:
62
+ 1. **URL**: Direct link to content
63
+ 2. **Title**: Clear, descriptive title
64
+ 3. **Description**: Why this source is good for sentiment analysis
65
+ 4. **Content Type**: [review/social/news/blog/forum]
66
+ 5. **Expected Sentiment Distribution**: Estimate of positive/negative/neutral content
67
+ 6. **Quality Score**: 1-10 rating for ML training suitability
68
+ """,
69
+
70
+ "text_classification": """
71
+ Find {max_sources} diverse, well-categorized sources for text classification training:
72
+
73
+ PROJECT: {project_description}
74
+
75
+ REQUIREMENTS:
76
+ - Sources with clear, distinct categories or topics
77
+ - Consistent content structure within categories
78
+ - Sufficient variety within each category
79
+ - Professional or semi-professional content quality
80
+ - Avoid overly niche or specialized content
81
+
82
+ SEARCH FOCUS:
83
+ - News articles with clear sections (politics, sports, technology, etc.)
84
+ - Academic papers with subject classifications
85
+ - E-commerce product descriptions with categories
86
+ - Blog posts with clear topical focus
87
+ - Government documents with departmental classifications
88
+
89
+ OUTPUT FORMAT:
90
+ For each source provide:
91
+ 1. **URL**: Direct link to content
92
+ 2. **Title**: Clear, descriptive title
93
+ 3. **Description**: Content type and classification scheme
94
+ 4. **Categories Available**: List of categories/classes present
95
+ 5. **Content Volume**: Estimated amount of data per category
96
+ 6. **Quality Score**: 1-10 rating for classification training
97
+ """,
98
+
99
+ "named_entity_recognition": """
100
+ Find {max_sources} text-rich sources with clear named entities for NER training:
101
+
102
+ PROJECT: {project_description}
103
+
104
+ REQUIREMENTS:
105
+ - Rich in named entities (people, places, organizations, dates, etc.)
106
+ - Clear, well-written text (not fragmented or poorly formatted)
107
+ - Diverse entity types and contexts
108
+ - Professional writing quality
109
+ - Entities are clearly identifiable in context
110
+
111
+ SEARCH FOCUS:
112
+ - News articles and press releases
113
+ - Biographical content and profiles
114
+ - Business and financial reports
115
+ - Historical documents and articles
116
+ - Academic papers and research
117
+ - Government publications
118
+
119
+ OUTPUT FORMAT:
120
+ For each source provide:
121
+ 1. **URL**: Direct link to content
122
+ 2. **Title**: Clear, descriptive title
123
+ 3. **Description**: Types of entities commonly found
124
+ 4. **Entity Density**: Expected frequency of named entities
125
+ 5. **Text Quality**: Assessment of writing clarity
126
+ 6. **Quality Score**: 1-10 rating for NER training
127
+ """,
128
+
129
+ "question_answering": """
130
+ Find {max_sources} sources with clear question-answer patterns for QA training:
131
+
132
+ PROJECT: {project_description}
133
+
134
+ REQUIREMENTS:
135
+ - Explicit Q&A format OR clear factual content suitable for QA generation
136
+ - Questions and answers are clearly delineated
137
+ - Factual, verifiable information
138
+ - Diverse question types (factual, definitional, procedural, etc.)
139
+ - Professional quality content
140
+
141
+ SEARCH FOCUS:
142
+ - FAQ pages and help documentation
143
+ - Interview transcripts and Q&A sessions
144
+ - Educational content with questions
145
+ - Technical documentation with examples
146
+ - Customer support knowledge bases
147
+ - Stack Overflow and similar Q&A platforms
148
+
149
+ OUTPUT FORMAT:
150
+ For each source provide:
151
+ 1. **URL**: Direct link to content
152
+ 2. **Title**: Clear, descriptive title
153
+ 3. **Description**: Q&A format type and subject matter
154
+ 4. **Question Types**: Types of questions typically found
155
+ 5. **Answer Quality**: Assessment of answer completeness
156
+ 6. **Quality Score**: 1-10 rating for QA training
157
+ """,
158
+
159
+ "text_summarization": """
160
+ Find {max_sources} sources with substantial, well-structured content for summarization training:
161
+
162
+ PROJECT: {project_description}
163
+
164
+ REQUIREMENTS:
165
+ - Long-form content (articles, reports, papers)
166
+ - Clear structure with main points
167
+ - Professional writing quality
168
+ - Self-contained content (doesn't rely heavily on external references)
169
+ - Diverse content types and subjects
170
+
171
+ SEARCH FOCUS:
172
+ - News articles and investigative reports
173
+ - Research papers and academic articles
174
+ - Long-form blog posts and essays
175
+ - Government reports and white papers
176
+ - Industry analysis and market reports
177
+ - Review articles and meta-analyses
178
+
179
+ OUTPUT FORMAT:
180
+ For each source provide:
181
+ 1. **URL**: Direct link to content
182
+ 2. **Title**: Clear, descriptive title
183
+ 3. **Description**: Content length and structure
184
+ 4. **Main Topics**: Key subjects covered
185
+ 5. **Summarization Potential**: How well-suited for summary generation
186
+ 6. **Quality Score**: 1-10 rating for summarization training
187
+ """,
188
+
189
+ "translation": """
190
+ Find {max_sources} parallel or multilingual content for translation training:
191
+
192
+ PROJECT: {project_description}
193
+
194
+ REQUIREMENTS:
195
+ - Content available in multiple languages
196
+ - High translation quality (professional or native-level)
197
+ - Parallel content alignment when possible
198
+ - Diverse domains and text types
199
+ - Clear source and target language identification
200
+
201
+ SEARCH FOCUS:
202
+ - Multilingual news websites
203
+ - International organization publications
204
+ - Government documents in multiple languages
205
+ - Educational content with translations
206
+ - Software documentation with localization
207
+ - Cultural and literary translations
208
+
209
+ OUTPUT FORMAT:
210
+ For each source provide:
211
+ 1. **URL**: Direct link to content
212
+ 2. **Title**: Clear, descriptive title
213
+ 3. **Description**: Languages available and content type
214
+ 4. **Language Pairs**: Specific language combinations
215
+ 5. **Translation Quality**: Assessment of translation accuracy
216
+ 6. **Quality Score**: 1-10 rating for translation training
217
+ """
218
+ }
219
+
220
+ @dataclass
221
+ class ScrapingConfig:
222
+ """Configuration for web scraping"""
223
+
224
+ # Request settings
225
+ timeout: int = 15
226
+ max_retries: int = 3
227
+ retry_delay: float = 1.0
228
+
229
+ # Rate limiting
230
+ requests_per_second: float = 0.5 # Conservative rate limiting
231
+ burst_requests: int = 5
232
+
233
+ # Content filtering
234
+ min_content_length: int = 100
235
+ max_content_length: int = 1_000_000 # 1MB per page
236
+
237
+ # User agent rotation
238
+ user_agents: List[str] = None
239
+
240
+ # Blocked domains (respect robots.txt)
241
+ blocked_domains: List[str] = None
242
+
243
+ # Content extraction settings
244
+ extract_metadata: bool = True
245
+ clean_html: bool = True
246
+ preserve_structure: bool = False
247
+
248
+ def __post_init__(self):
249
+ """Initialize default values"""
250
+ if self.user_agents is None:
251
+ self.user_agents = [
252
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
253
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
254
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
255
+ ]
256
 
257
  if self.blocked_domains is None:
258
  self.blocked_domains = [
259
+ 'localhost',
260
+ '127.0.0.1',
261
+ '0.0.0.0',
262
+ '10.',
263
+ '172.',
264
+ '192.168.',
265
+ 'internal.',
266
+ 'staging.',
267
+ 'test.',
268
+ 'dev.'
269
  ]
270
 
271
  @dataclass
272
  class ModelConfig:
273
+ """Configuration for AI models"""
 
 
274
 
275
+ # Model selection
276
+ sentiment_model: str = "cardiffnlp/twitter-roberta-base-sentiment-latest"
277
+ summarization_model: str = "facebook/bart-large-cnn"
278
+ ner_model: str = "dbmdz/bert-large-cased-finetuned-conll03-english"
279
 
280
+ # Fallback models (lighter/faster)
281
+ sentiment_fallback: str = "distilbert-base-uncased-finetuned-sst-2-english"
282
+ summarization_fallback: str = "sshleifer/distilbart-cnn-12-6"
283
+ ner_fallback: str = "distilbert-base-cased"
284
 
285
+ # Device configuration
286
  device: str = "auto" # auto, cpu, cuda
287
+ use_gpu: bool = True
288
+ max_memory_mb: int = 4000
289
+
290
+ # Processing settings
291
+ max_sequence_length: int = 512
292
+ batch_size: int = 8
293
+ confidence_threshold: float = 0.7
294
+
295
+ # Cache settings
296
+ cache_models: bool = True
297
+ model_cache_dir: str = "./model_cache"
298
 
299
  @dataclass
300
+ class ExportConfig:
301
+ """Configuration for dataset export"""
 
 
 
 
302
 
303
+ # File settings
304
+ max_file_size_mb: int = 100
305
+ compression: bool = True
306
+ encoding: str = "utf-8"
307
 
308
+ # Format-specific settings
309
+ json_indent: int = 2
310
+ csv_delimiter: str = ","
311
+ csv_quoting: int = 1 # csv.QUOTE_ALL
312
+
313
+ # HuggingFace dataset settings
314
+ hf_dataset_name_template: str = "ai-dataset-studio-{timestamp}"
315
+ hf_private: bool = True
316
+ hf_token: Optional[str] = os.getenv('HF_TOKEN')
317
 
318
+ # Metadata inclusion
319
+ include_source_urls: bool = True
320
+ include_timestamps: bool = True
321
+ include_processing_info: bool = True
322
+ include_confidence_scores: bool = True
323
+
324
+ @dataclass
325
+ class SecurityConfig:
326
+ """Security and safety configuration"""
327
+
328
+ # URL validation
329
+ allow_local_urls: bool = False
330
+ allow_private_ips: bool = False
331
+ max_redirects: int = 5
332
+
333
+ # Content filtering
334
+ filter_adult_content: bool = True
335
+ filter_spam: bool = True
336
+ max_duplicate_content: float = 0.8 # Similarity threshold
337
+
338
+ # Rate limiting enforcement
339
+ enforce_rate_limits: bool = True
340
  respect_robots_txt: bool = True
341
+
342
+ # Safety checks
343
+ scan_for_malware: bool = False # Requires additional dependencies
344
+ validate_ssl: bool = True
345
 
346
  @dataclass
347
  class UIConfig:
348
  """User interface configuration"""
 
 
 
 
349
 
350
+ # Theme settings
351
+ theme: str = "soft"
352
+ custom_css: bool = True
353
+ dark_mode: bool = False
354
 
355
+ # Interface settings
356
+ max_preview_items: int = 10
357
+ preview_text_length: int = 200
358
+ show_progress_bars: bool = True
359
 
360
+ # Advanced features
361
+ enable_debug_mode: bool = False
362
+ show_model_info: bool = True
363
+ enable_export_preview: bool = True
364
 
365
+ # Global configuration instance
366
  class Config:
367
+ """Main configuration class combining all settings"""
368
 
369
  def __init__(self):
370
+ self.perplexity = PerplexityConfig()
 
371
  self.scraping = ScrapingConfig()
372
+ self.models = ModelConfig()
373
+ self.export = ExportConfig()
374
+ self.security = SecurityConfig()
375
  self.ui = UIConfig()
376
 
377
+ # Application settings
378
+ self.app_name = "AI Dataset Studio"
379
+ self.version = "2.0.0"
380
+ self.debug = os.getenv('DEBUG', 'false').lower() == 'true'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
+ # Logging
383
+ self.log_level = os.getenv('LOG_LEVEL', 'INFO')
384
+ self.log_format = '%(asctime)s - %(levelname)s - %(message)s'
385
+
386
+ def is_perplexity_enabled(self) -> bool:
387
+ """Check if Perplexity AI is properly configured"""
388
+ return bool(self.perplexity.api_key)
389
+
390
+ def get_search_template(self, template_type: str, **kwargs) -> str:
391
+ """Get formatted search template for Perplexity"""
392
+ template = self.perplexity.search_templates.get(template_type, "")
393
+ if template:
394
+ return template.format(**kwargs)
395
+ return ""
396
+
397
+ def validate_url(self, url: str) -> bool:
398
+ """Validate URL against security settings"""
 
 
 
 
 
399
  from urllib.parse import urlparse
400
 
401
  try:
402
  parsed = urlparse(url)
403
 
404
  # Check scheme
405
+ if parsed.scheme not in ['http', 'https']:
406
  return False
407
 
408
+ # Check for blocked domains
409
+ netloc = parsed.netloc.lower()
410
  for blocked in self.security.blocked_domains:
411
+ if blocked in netloc:
412
  return False
413
 
414
+ # Check for local/private IPs if not allowed
415
+ if not self.security.allow_local_urls:
416
+ if any(local in netloc for local in ['localhost', '127.0.0.1', '0.0.0.0']):
417
+ return False
418
+
419
+ if not self.security.allow_private_ips:
420
+ if any(private in netloc for private in ['10.', '172.', '192.168.']):
421
+ return False
422
 
423
  return True
424
 
425
  except Exception:
426
  return False
 
 
 
 
 
 
 
 
 
 
 
427
 
428
+ # Create global config instance
429
  config = Config()
430
 
431
+ # Export commonly used configurations
432
+ PERPLEXITY_CONFIG = config.perplexity
433
+ SCRAPING_CONFIG = config.scraping
434
+ MODEL_CONFIG = config.models
435
+ EXPORT_CONFIG = config.export
436
+ SECURITY_CONFIG = config.security
437
+ UI_CONFIG = config.ui