Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 24, 2024

Commit

cc75240

verified ·

1 Parent(s): 426fa6d

Update utils.py

Browse files

Volvemos a versión que funciona visualmente, aunque no se pueda editar dentro del propio repositorio.

Files changed (1) hide show

utils.py +43 -51

utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-import streamlit as st
 import pandas as pd
 import numpy as np
 import warnings
@@ -9,56 +9,57 @@ from sklearn.metrics.pairwise import cosine_similarity
 from joblib import dump, load
 from sklearn.preprocessing import normalize
 import re
-from datasets import load_dataset, Dataset
-# Load the dataset from Hugging Face Datasets
-def load_files_from_huggingface():
-    dataset = load_dataset("GMARTINEZMILLA/deepsinisghtz_dataset", split="train")
-    # Load CSV file
-    cestas_file = dataset['cestas_final.csv']
-    cestas = pd.read_csv(cestas_file)
-    # Load joblib files
-    count_matrix_file = dataset['count_matrix_0001.joblib']
-    count_vectorizer_file = dataset['count_vectorizer_0001.joblib']
-    tf_matrix = load(count_matrix_file)
-    count_vectorizer = load(count_vectorizer_file)
-    return cestas, tf_matrix, count_vectorizer
-# Save updated files back to Hugging Face Datasets
-def save_files_to_huggingface(cestas, tf_matrix, count_vectorizer):
-    # Save updated CSV file
-    cestas.to_csv('cestas_final.csv', index=False)
-    # Create new dataset and push to Hugging Face
-    dataset = Dataset.from_pandas(cestas)
-    dataset.push_to_hub("GMARTINEZMILLA/deepsinisghtz_dataset")
-    # Save updated joblib files
-    dump(tf_matrix, 'count_matrix_0002.joblib')  # Increment version
-    dump(count_vectorizer, 'count_vectorizer_0002.joblib')  # Increment version
-    # Optionally, push joblib files back to Hugging Face Datasets (if supported)
-    # You can manually add these files to the dataset in the Hugging Face interface if needed
-def get_next_version(file_prefix):
-    """Return the next version number for joblib files."""
-    # You can hardcode or generate a new version name (e.g., 0002, 0003, etc.)
-    return f"{file_prefix}_0002.joblib"
 def recomienda_tf(new_basket, cestas, productos):
-    # Load the latest versions of the matrix and vectorizer
-    tf_matrix_file = 'count_matrix_0001.joblib'
-    count_vectorizer_file = 'count_vectorizer_0001.joblib'
     tf_matrix = load(tf_matrix_file)
-    count_vectorizer = load(count_vectorizer_file)
     # Convert the new basket into TF (Term Frequency) format
     new_basket_str = ' '.join(new_basket)
-    new_basket_vector = count_vectorizer.transform([new_basket_str])
     new_basket_tf = normalize(new_basket_vector, norm='l1')  # Normalize the count matrix for the current basket
     # Compare the new basket with previous ones
@@ -113,20 +114,15 @@ def retroalimentacion(cestas, cesta_nueva):
     # Convert basket from list to string
     cesta_unida = ' '.join(cesta_nueva)
-    # Debugging message
-    st.write(f"DEBUG: La nueva cesta es {cesta_unida}")
     # Add the new basket to the historical baskets if it doesn't already exist
     if not cestas['Cestas'].isin([cesta_unida]).any():
         cestas.loc[len(cestas)] = cesta_unida
-        st.success("✓ Cesta añadida al DataFrame.")
-        # Save the updated DataFrame and joblib files back to Hugging Face Datasets
-        save_files_to_huggingface(cestas, tf_matrix, count_vectorizer)
-        st.write("DEBUG: Los archivos se han guardado en Hugging Face Datasets.")
     else:
-        st.warning("⚠️ La cesta ya existe en el DataFrame.")
     # Re-vectorize the basket DataFrame
     count_vectorizer = CountVectorizer()
@@ -136,13 +132,9 @@ def retroalimentacion(cestas, cesta_nueva):
     # Save new versions of the vectorizer and matrix
     count_vectorizer_file = get_next_version('count_vectorizer')
-    tf_matrix_file = get_next_version('count_matrix')
     dump(count_vectorizer, count_vectorizer_file)
     dump(tf_matrix, tf_matrix_file)
-    # Debugging messages
-    st.write(f"DEBUG: Se ha generado la nueva versión del count_vectorizer: {count_vectorizer_file}")
-    st.write(f"DEBUG: Se ha generado la nueva versión del tf_matrix: {tf_matrix_file}")
     return None

+parte de utils.py
 import os
 import pandas as pd
 import numpy as np
 import warnings
 from joblib import dump, load
 from sklearn.preprocessing import normalize
 import re
+def get_next_version(file_prefix, folder='RecommendationFiles/'):
+    """Find the latest version of a file and return the next version's filename."""
+    if not os.path.exists(folder):
+        os.makedirs(folder)  # Ensure the folder exists
+    # Regular expression to match files like 'file_0001.joblib'
+    pattern = re.compile(rf"{file_prefix}_(\d+)\.joblib")
+    files = [f for f in os.listdir(folder) if pattern.match(f)]
+    # Extract version numbers from matching files
+    versions = [int(pattern.match(f).group(1)) for f in files]
+    # Determine the next version number
+    next_version = max(versions) + 1 if versions else 1
+    # Return the next version filename with the folder path
+    return os.path.join(folder, f"{file_prefix}_{next_version:04d}.joblib")
+def get_latest_version(file_prefix, folder='RecommendationFiles/'):
+    """Find the latest version of a file to load."""
+    if not os.path.exists(folder):
+        raise FileNotFoundError(f"Folder '{folder}' does not exist")
+    # Regular expression to match files like 'file_0001.joblib'
+    pattern = re.compile(rf"{file_prefix}_(\d+)\.joblib")
+    files = [f for f in os.listdir(folder) if pattern.match(f)]
+    # Extract version numbers from matching files
+    versions = [int(pattern.match(f).group(1)) for f in files]
+    if versions:
+        latest_version = max(versions)
+        return os.path.join(folder, f"{file_prefix}_{latest_version:04d}.joblib")
+    else:
+        raise FileNotFoundError(f"No versions found for {file_prefix} in folder '{folder}'")
 def recomienda_tf(new_basket, cestas, productos):
+    # Get the latest versions of the matrix and vectorizer from the folder
+    tf_matrix_file = get_latest_version('count_matrix')
+    count_vectorizer_file = get_latest_version('count_vectorizer')
+    # Load the matrix TF and the vectorizer
     tf_matrix = load(tf_matrix_file)
+    count = load(count_vectorizer_file)
     # Convert the new basket into TF (Term Frequency) format
     new_basket_str = ' '.join(new_basket)
+    new_basket_vector = count.transform([new_basket_str])
     new_basket_tf = normalize(new_basket_vector, norm='l1')  # Normalize the count matrix for the current basket
     # Compare the new basket with previous ones
     # Convert basket from list to string
     cesta_unida = ' '.join(cesta_nueva)
     # Add the new basket to the historical baskets if it doesn't already exist
     if not cestas['Cestas'].isin([cesta_unida]).any():
         cestas.loc[len(cestas)] = cesta_unida
+        print("Cesta añadida.")
+        # Re-save the updated baskets DataFrame
+        cestas.to_csv('RecommendationFiles/cestas_final.csv', index=False)
     else:
+        print("La cesta ya existe en el DataFrame.")
     # Re-vectorize the basket DataFrame
     count_vectorizer = CountVectorizer()
     # Save new versions of the vectorizer and matrix
     count_vectorizer_file = get_next_version('count_vectorizer')
+    tf_matrix_file = get_next_version('tf_matrix')
     dump(count_vectorizer, count_vectorizer_file)
     dump(tf_matrix, tf_matrix_file)
     return None