#from flask import Flask, request, render_template import requests import PyPDF2 import io import os import googletrans import re import pandas as pd import gradio as gr def upload(url or file): file = gr.Inputs.File('file') url = gr.Textbox('url') if file: filename = file.filename file_extension = os.path.splitext(filename)[1] file.save(filename) # Check file extension and read the content if file_extension == '.pdf': # Read pdf file pdf_file = PyPDF2.PdfFileReader(file) text = '' for page in range(pdf_file.getNumPages()): text += pdf_file.getPage(page).extractText() + ' ' elif file_extension == '.txt': # Read txt file text = file.read().decode('utf-8') else: return 'Invalid file format' elif url: response = requests.get(url) file_extension = os.path.splitext(url)[1] # Check file extension and read the content if file_extension == '.pdf': # Read pdf file pdf_file = PyPDF2.PdfFileReader(io.BytesIO(response.content)) text = '' for page in range(pdf_file.getNumPages()): text += pdf_file.getPage(page).extractText() + ' ' elif file_extension == '.txt': # Read txt file text = response.text else: return 'Invalid file format' else: return 'No file or URL found' # Check if the language of the text is English, otherwise translate it try: src_lang = googletrans.LANGUAGES[googletrans.detect(text).lang] if src_lang != 'en': # Initialize the translator translator = googletrans.Translator() # Translate the text to English text = translator.translate(text, dest='en').text # Display a message indicating the text has been translated print('The text has been translated from {} to English'.format(src_lang)) except Exception as e: print('Error:', e) # Convert the text to a dataset lines = re.split(r'[.!?]+', text) lines = [line.strip() for line in lines if line.strip() != ''] data = {'sentence': lines} df = pd.DataFrame(data) # Save the dataset to a CSV file df.to_csv('dataset.csv', index=False) return 'Dataset created successfully!' gr.Interface(upload,inputs=[url or file], outputs="text").launch()