Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -171,15 +171,30 @@ def predict(text):
|
|
171 |
def classify_csv(file_obj):
|
172 |
# Read the CSV file
|
173 |
df = pd.read_csv(file_obj)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
-
#
|
176 |
-
|
177 |
-
for _, row in df.iterrows():
|
178 |
-
prediction = predict(row['text'])
|
179 |
-
predictions.append(prediction)
|
180 |
-
|
181 |
-
# Convert the predictions to a DataFrame
|
182 |
-
results_df = pd.DataFrame(predictions, columns=["Prediction"])
|
183 |
return results_df
|
184 |
|
185 |
# Define the markdown text with bullet points
|
|
|
171 |
def classify_csv(file_obj):
|
172 |
# Read the CSV file
|
173 |
df = pd.read_csv(file_obj)
|
174 |
+
|
175 |
+
# Check if the 'text' column is in the CSV file
|
176 |
+
if 'text' not in df.columns:
|
177 |
+
return "There is no column named 'text' in the file."
|
178 |
+
|
179 |
+
# Process the file if the 'text' column exists
|
180 |
+
results = []
|
181 |
+
for i in range(len(df)):
|
182 |
+
# Combine the current line with the 5 preceding lines for context
|
183 |
+
context_start = max(0, i - 5)
|
184 |
+
context = " ".join(df['text'][context_start:i+1])
|
185 |
+
|
186 |
+
# Truncate the context to fit within the model's max length
|
187 |
+
inputs = tokenizer_level1(context, truncation=True, max_length=512, return_tensors="pt")
|
188 |
+
|
189 |
+
# Extract the truncated text for prediction
|
190 |
+
truncated_context = tokenizer_level1.decode(inputs['input_ids'][0])
|
191 |
+
|
192 |
+
# Make a prediction using the truncated context
|
193 |
+
prediction = predict(truncated_context)
|
194 |
+
results.append((df['text'][i], prediction))
|
195 |
|
196 |
+
# Convert the results to a DataFrame with columns 'Line' and 'Prediction'
|
197 |
+
results_df = pd.DataFrame(results, columns=["Budget Line", "Prediction"])
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
return results_df
|
199 |
|
200 |
# Define the markdown text with bullet points
|