Spaces:
Runtime error
Runtime error
updates
Browse files- .gitignore +1 -0
- data.dvc +4 -4
- src/data/process_data.py +3 -3
.gitignore
CHANGED
|
@@ -91,3 +91,4 @@ coverage.xml
|
|
| 91 |
|
| 92 |
.idea
|
| 93 |
.vscode
|
|
|
|
|
|
| 91 |
|
| 92 |
.idea
|
| 93 |
.vscode
|
| 94 |
+
/data
|
data.dvc
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
outs:
|
| 2 |
-
- md5:
|
| 3 |
-
path: data
|
| 4 |
-
size:
|
| 5 |
-
nfiles:
|
|
|
|
| 1 |
outs:
|
| 2 |
+
- md5: 4088e0a288132d141c28bd020548d107.dir
|
| 3 |
+
path: data
|
| 4 |
+
size: 2720315628
|
| 5 |
+
nfiles: 6
|
src/data/process_data.py
CHANGED
|
@@ -3,12 +3,12 @@ import pandas as pd
|
|
| 3 |
|
| 4 |
def process_data(split='train'):
|
| 5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
| 6 |
-
df.
|
| 7 |
-
print(df.
|
| 8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
| 9 |
|
| 10 |
|
| 11 |
-
if __name__ == '
|
| 12 |
process_data(split='train')
|
| 13 |
process_data(split='test')
|
| 14 |
process_data(split='validation')
|
|
|
|
| 3 |
|
| 4 |
def process_data(split='train'):
|
| 5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
| 6 |
+
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
|
| 7 |
+
print(df.columns)
|
| 8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
| 9 |
|
| 10 |
|
| 11 |
+
if __name__ == '__main__':
|
| 12 |
process_data(split='train')
|
| 13 |
process_data(split='test')
|
| 14 |
process_data(split='validation')
|