Spam_Email_Detection / 08_bagofwords.py
KarthikaRajagopal's picture
Upload 3 files
123bbaa verified
# -*- coding: utf-8 -*-
"""08 - BagOfWords.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/16K9eNawK7Oli4ZnUm0r1nLcTiWRuTYW_
"""
# Commented out IPython magic to ensure Python compatibility.
# %%writefile 08-BagOfWords.py
# import csv
#
# class BagOfWords:
# def transform(self, processed_data):
# """
# This function creates a Bag of Words (BoW) representation of the data.
#
# Steps:
# 1. Read unique words from a file.
# 2. Process the input data (processed_data) and count the occurrences of each unique word.
# 3. Save the BoW representation to a CSV file.
# """
#
# # Step 1: Reading the unique words from "unique_words.txt"
# unique_words = [] # List to store unique words
# with open("05 - unique words.txt", "r") as in_file:
# for line in in_file:
# unique_words.append(line.strip()) # Add each word to the unique_words list
#
# print(f"Unique words: {len(unique_words)}") # Print the count of unique words
#
# # Step 2: Writing the columns (unique words) in the output "BagOfWords.csv"
# with open("08 - BagOfWords.csv", mode="w", newline='') as out_file:
# writer = csv.writer(out_file)
#
# # Write the header (unique words)
# writer.writerow(unique_words)
#
# # Step 3: Creating the Bag of Words file
# for data in processed_data:
# word_count = {} # Dictionary to store word counts for the current sentence
#
# # Count the occurrences of words in the current sentence
# for word in data:
# word_count[word] = word_count.get(word, 0) + 1
#
# # Write the word counts for each unique word in the CSV file
# row = []
# for word in unique_words:
# if word in word_count:
# row.append(word_count[word])
# else:
# row.append(0)
#
# writer.writerow(row) # Write the row to the CSV file
#
# print(f"Processed sentence {processed_data.index(data) + 1}")
#
#
!python /content/08-BagOfWords.py