Spaces:

wadhwani-ai
/

KKMS-Smart-Search-Demo

Runtime error

Chintan Donda

KKMS Kisan Smart Search Demo App and their scripts

5273d83 about 2 years ago

1.61 kB

	import os
	import re
	import pandas as pd
	from urllib.parse import urlparse

	import logging
	logger = logging.getLogger(__name__)
	logging.basicConfig(
	format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
	)


	class UTILS:
	def __init__(self):
	pass


	def split_text(
	self,
	text
	):
	text = text.split(',')
	text = [t.strip() for t in text]
	return text


	def replace_newlines_and_spaces(
	self,
	text
	):
	# Replace all newline characters with spaces
	text = text.replace("\n", " ")
	# Replace multiple spaces with a single space
	text = re.sub(r'\s+', ' ', text)
	return text


	def clean_df(
	self,
	df,
	dropna=True,
	fillna=False
	):
	if fillna:
	df.fillna('', inplace=True)
	if dropna:
	df.dropna(inplace=True)
	# df = df[~df.isna()]
	df = df.drop_duplicates().reset_index(drop=True)
	return df


	def validate_url_format(
	self,
	urls,
	url_type='urls'
	):
	valid_urls = []
	for url in urls:
	result = urlparse(url)
	# Check if the url is valid
	if all([result.scheme, result.netloc]):
	# Online PDF urls should end with .pdf extension
	if url_type == 'online_pdf' and not url.endswith('.pdf'):
	continue
	valid_urls.append(url)
	logging.info(f'Valid URLs are: {valid_urls}')
	return valid_urls