File size: 2,311 Bytes
123bbaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding: utf-8 -*-
"""08 - BagOfWords.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/16K9eNawK7Oli4ZnUm0r1nLcTiWRuTYW_
"""

# Commented out IPython magic to ensure Python compatibility.
# %%writefile 08-BagOfWords.py
# import csv
# 
# class BagOfWords:
#     def transform(self, processed_data):
#         """
#         This function creates a Bag of Words (BoW) representation of the data.
# 
#         Steps:
#         1. Read unique words from a file.
#         2. Process the input data (processed_data) and count the occurrences of each unique word.
#         3. Save the BoW representation to a CSV file.
#         """
# 
#         # Step 1: Reading the unique words from "unique_words.txt"
#         unique_words = []  # List to store unique words
#         with open("05 - unique words.txt", "r") as in_file:
#             for line in in_file:
#                 unique_words.append(line.strip())  # Add each word to the unique_words list
# 
#         print(f"Unique words: {len(unique_words)}")  # Print the count of unique words
# 
#         # Step 2: Writing the columns (unique words) in the output "BagOfWords.csv"
#         with open("08 - BagOfWords.csv", mode="w", newline='') as out_file:
#             writer = csv.writer(out_file)
# 
#             # Write the header (unique words)
#             writer.writerow(unique_words)
# 
#             # Step 3: Creating the Bag of Words file
#             for data in processed_data:
#                 word_count = {}  # Dictionary to store word counts for the current sentence
# 
#                 # Count the occurrences of words in the current sentence
#                 for word in data:
#                     word_count[word] = word_count.get(word, 0) + 1
# 
#                 # Write the word counts for each unique word in the CSV file
#                 row = []
#                 for word in unique_words:
#                     if word in word_count:
#                         row.append(word_count[word])
#                     else:
#                         row.append(0)
# 
#                 writer.writerow(row)  # Write the row to the CSV file
# 
#                 print(f"Processed sentence {processed_data.index(data) + 1}")
# 
#

!python /content/08-BagOfWords.py