RedSparkie commited on
Commit
43d1339
Β·
verified Β·
1 Parent(s): a981dce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -1,41 +1,48 @@
1
  import gradio as gr
2
  import random
3
  import re
 
4
  from pathlib import Path
5
  from huggingface_hub import InferenceClient, snapshot_download
6
- from datasets import load_dataset
7
  import shutil
8
 
9
- # Descargar y preparar dataset de tarot
10
  def prepare_tarot_dataset():
 
11
  dataset_path = Path("tarot_dataset")
12
  image_dir = dataset_path / "images"
13
  image_dir.mkdir(parents=True, exist_ok=True)
14
 
15
- # Descargar dataset
16
- dataset = load_dataset("multimodalart/1920-raider-waite-tarot-public-domain", split="train")
17
 
18
- # Procesar metadatos y organizar imΓ‘genes
 
19
  card_map = {}
20
- pattern = re.compile(r'"([^"]+)"$') # Para extraer el nombre de la carta
21
-
22
- for item in dataset:
23
- text = item["text"]
24
- match = pattern.search(text)
25
- if match:
26
- card_name = match.group(1).lower()
27
- src_path = Path(item["file_name"])
28
- dest_path = image_dir / f"{card_name.replace(' ', '_')}.jpg"
29
-
30
- if not dest_path.exists():
31
- try:
32
- # Copiar imagen desde el dataset descargado
33
- shutil.copy(src_path, dest_path)
34
- except:
35
- continue
36
-
37
- card_map[card_name] = str(dest_path)
38
-
 
 
 
 
 
 
39
  return card_map
40
 
41
  # Preparar dataset y obtener mapeo de cartas
 
1
  import gradio as gr
2
  import random
3
  import re
4
+ import json
5
  from pathlib import Path
6
  from huggingface_hub import InferenceClient, snapshot_download
 
7
  import shutil
8
 
 
9
  def prepare_tarot_dataset():
10
+ repo_id = "multimodalart/1920-raider-waite-tarot-public-domain"
11
  dataset_path = Path("tarot_dataset")
12
  image_dir = dataset_path / "images"
13
  image_dir.mkdir(parents=True, exist_ok=True)
14
 
15
+ # Descargar repositorio completo
16
+ repo_path = snapshot_download(repo_id=repo_id, repo_type="dataset")
17
 
18
+ # Procesar archivo JSONL manualmente
19
+ jsonl_path = Path(repo_path) / "output_file.jsonl"
20
  card_map = {}
21
+ pattern = re.compile(r'"([^"]+)"$')
22
+
23
+ with open(jsonl_path, "r") as f:
24
+ for line in f:
25
+ try:
26
+ data = json.loads(line)
27
+ text = data["text"]
28
+ file_name = data["file_name"]
29
+
30
+ # Extraer nombre de la carta
31
+ match = pattern.search(text)
32
+ if match:
33
+ card_name = match.group(1).lower().replace('"', '')
34
+ src_image = Path(repo_path) / file_name
35
+ dest_image = image_dir / f"{card_name.replace(' ', '_')}.jpg"
36
+
37
+ if not dest_image.exists():
38
+ shutil.copy(src_image, dest_image)
39
+
40
+ card_map[card_name] = str(dest_image)
41
+
42
+ except (KeyError, json.JSONDecodeError) as e:
43
+ print(f"Error procesando lΓ­nea: {e}")
44
+ continue
45
+
46
  return card_map
47
 
48
  # Preparar dataset y obtener mapeo de cartas