Spaces:

dura-garage
/

nep-spell

Sleeping

nep-spell / src /pages /📊DatasetsPreparation.py

Update src/pages/📊DatasetsPreparation.py

89f34cc verified over 1 year ago

1.26 kB

	import streamlit as st
	import pandas as pd

	st.set_page_config(page_title="Datasets Preparation", page_icon="👋", layout="wide")

	##########################################

	# Read lines from the text file
	with open("datafiles/sample_nep_corpus.txt") as file:
	items = file.readlines()

	# Split each line into separate columns
	datacorpus = pd.DataFrame(items, columns=["Content"])
	# datacorpus.columns =["Content"]


	# st.write(f"{datacorpus}")

	datasentences = pd.read_csv("datafiles/sample_nep_sentences.csv")

	data100k = pd.read_csv(
	r"datafiles/sample_nep_spell_100k.csv",
	nrows=50,
	)


	###########################################


	st.title("Dataset Preparation")

	st.write("---")
	st.header(
	"""
	A Large Scale Nepali Text Corpus
	"""
	)

	st.caption("Table 1. A Large Scale Nepali Text Corpus")

	st.dataframe(datacorpus, use_container_width=True)

	st.write("---")
	st.header(
	"""
	Sentence extracted from A Large Scale Nepali Text Corpus
	"""
	)
	st.caption("Table 2. Extracted sentences")
	st.dataframe(datasentences, use_container_width=True)

	st.write("---")
	st.header(
	"""
	Parallel dataset using extracted sentences
	"""
	)
	st.caption("Table 3. 100k Dataset used for training")
	st.dataframe(data100k, use_container_width=True)