Spaces:

peterkros
/

COFOG-Bert-AutoClassifier

Sleeping

peterkros commited on Dec 20, 2023

Commit

f0b48bb

1 Parent(s): a3c22a3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -171,15 +171,30 @@ def predict(text):
 def classify_csv(file_obj):
     # Read the CSV file
     df = pd.read_csv(file_obj)
-    # Assuming you have a column 'text' in your CSV that you want to classify
-    predictions = []
-    for _, row in df.iterrows():
-        prediction = predict(row['text'])
-        predictions.append(prediction)
-    # Convert the predictions to a DataFrame
-    results_df = pd.DataFrame(predictions, columns=["Prediction"])
     return results_df
 # Define the markdown text with bullet points

 def classify_csv(file_obj):
     # Read the CSV file
     df = pd.read_csv(file_obj)
+    # Check if the 'text' column is in the CSV file
+    if 'text' not in df.columns:
+        return "There is no column named 'text' in the file."
+    # Process the file if the 'text' column exists
+    results = []
+    for i in range(len(df)):
+        # Combine the current line with the 5 preceding lines for context
+        context_start = max(0, i - 5)
+        context = " ".join(df['text'][context_start:i+1])
+        # Truncate the context to fit within the model's max length
+        inputs = tokenizer_level1(context, truncation=True, max_length=512, return_tensors="pt")
+        # Extract the truncated text for prediction
+        truncated_context = tokenizer_level1.decode(inputs['input_ids'][0])
+        # Make a prediction using the truncated context
+        prediction = predict(truncated_context)
+        results.append((df['text'][i], prediction))
+    # Convert the results to a DataFrame with columns 'Line' and 'Prediction'
+    results_df = pd.DataFrame(results, columns=["Budget Line", "Prediction"])
     return results_df
 # Define the markdown text with bullet points