peterkros commited on
Commit
f0b48bb
·
1 Parent(s): a3c22a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -171,15 +171,30 @@ def predict(text):
171
  def classify_csv(file_obj):
172
  # Read the CSV file
173
  df = pd.read_csv(file_obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # Assuming you have a column 'text' in your CSV that you want to classify
176
- predictions = []
177
- for _, row in df.iterrows():
178
- prediction = predict(row['text'])
179
- predictions.append(prediction)
180
-
181
- # Convert the predictions to a DataFrame
182
- results_df = pd.DataFrame(predictions, columns=["Prediction"])
183
  return results_df
184
 
185
  # Define the markdown text with bullet points
 
171
  def classify_csv(file_obj):
172
  # Read the CSV file
173
  df = pd.read_csv(file_obj)
174
+
175
+ # Check if the 'text' column is in the CSV file
176
+ if 'text' not in df.columns:
177
+ return "There is no column named 'text' in the file."
178
+
179
+ # Process the file if the 'text' column exists
180
+ results = []
181
+ for i in range(len(df)):
182
+ # Combine the current line with the 5 preceding lines for context
183
+ context_start = max(0, i - 5)
184
+ context = " ".join(df['text'][context_start:i+1])
185
+
186
+ # Truncate the context to fit within the model's max length
187
+ inputs = tokenizer_level1(context, truncation=True, max_length=512, return_tensors="pt")
188
+
189
+ # Extract the truncated text for prediction
190
+ truncated_context = tokenizer_level1.decode(inputs['input_ids'][0])
191
+
192
+ # Make a prediction using the truncated context
193
+ prediction = predict(truncated_context)
194
+ results.append((df['text'][i], prediction))
195
 
196
+ # Convert the results to a DataFrame with columns 'Line' and 'Prediction'
197
+ results_df = pd.DataFrame(results, columns=["Budget Line", "Prediction"])
 
 
 
 
 
 
198
  return results_df
199
 
200
  # Define the markdown text with bullet points