CamiloVega commited on
Commit
a6f5353
·
verified ·
1 Parent(s): 22888c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -157
app.py CHANGED
@@ -4,31 +4,27 @@ import whisper
4
  import tempfile
5
  import gradio as gr
6
  from pydub import AudioSegment
7
- import fitz # PyMuPDF para manejar PDFs
8
- import docx # Para manejar archivos .docx
9
- import pandas as pd # Para manejar archivos .xlsx y .csv
10
- #from google.colab import userdata # Importa userdata de google.colab
11
  import requests
12
  from bs4 import BeautifulSoup
13
  from moviepy.editor import VideoFileClip
14
  import yt_dlp
15
  import logging
16
 
17
- # Configurar logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
- # Configura tu clave API de OpenAI usando Google Colab userdata
22
- #openai.api_key = userdata.get('OPENAI_API_KEY')
23
-
24
- # Cargar las variables de entorno desde el entorno de Hugging Face
25
  openai.api_key = os.getenv("OPENAI_API_KEY")
26
 
27
- # Cargar el modelo Whisper de mayor calidad una vez
28
  model = whisper.load_model("large")
29
 
30
  def download_social_media_video(url):
31
- """Descarga un video de redes sociales."""
32
  ydl_opts = {
33
  'format': 'bestaudio/best',
34
  'postprocessors': [{
@@ -42,256 +38,272 @@ def download_social_media_video(url):
42
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
43
  info_dict = ydl.extract_info(url, download=True)
44
  audio_file = f"{info_dict['id']}.mp3"
45
- logger.info(f"Video descargado exitosamente: {audio_file}")
46
  return audio_file
47
  except Exception as e:
48
- logger.error(f"Error al descargar el video: {str(e)}")
49
  raise
50
 
51
  def convert_video_to_audio(video_file):
52
- """Convierte un archivo de video a audio."""
53
  try:
54
  video = VideoFileClip(video_file)
55
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
56
  video.audio.write_audiofile(temp_file.name)
57
- logger.info(f"Video convertido a audio: {temp_file.name}")
58
  return temp_file.name
59
  except Exception as e:
60
- logger.error(f"Error al convertir el video a audio: {str(e)}")
61
  raise
62
 
63
  def preprocess_audio(audio_file):
64
- """Preprocesa el archivo de audio para mejorar la calidad."""
65
  try:
66
  audio = AudioSegment.from_file(audio_file)
67
  audio = audio.apply_gain(-audio.dBFS + (-20))
68
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
69
  audio.export(temp_file.name, format="mp3")
70
- logger.info(f"Audio preprocesado: {temp_file.name}")
71
  return temp_file.name
72
  except Exception as e:
73
- logger.error(f"Error al preprocesar el archivo de audio: {str(e)}")
74
  raise
75
 
76
- def transcribir_audio(file):
77
- """Transcribe un archivo de audio o video."""
78
  try:
79
  if isinstance(file, str) and file.startswith('http'):
80
- logger.info(f"Descargando video de red social: {file}")
81
- archivo_path = download_social_media_video(file)
82
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
83
- logger.info(f"Convirtiendo video local a audio: {file}")
84
- archivo_path = convert_video_to_audio(file)
85
  else:
86
- logger.info(f"Preprocesando archivo de audio: {file}")
87
- archivo_path = preprocess_audio(file)
88
-
89
- logger.info(f"Transcribiendo audio: {archivo_path}")
90
- resultado = model.transcribe(archivo_path)
91
- transcripcion = resultado.get("text", "Error en la transcripción")
92
- logger.info(f"Transcripción completada: {transcripcion[:50]}...")
93
- return transcripcion
94
  except Exception as e:
95
- logger.error(f"Error al procesar el archivo: {str(e)}")
96
- return f"Error al procesar el archivo: {str(e)}"
97
 
98
- def leer_documento(documento_path):
99
- """Lee el contenido de un documento PDF, DOCX, XLSX o CSV."""
100
  try:
101
- if documento_path.endswith(".pdf"):
102
- doc = fitz.open(documento_path)
103
- return "\n".join([pagina.get_text() for pagina in doc])
104
- elif documento_path.endswith(".docx"):
105
- doc = docx.Document(documento_path)
106
- return "\n".join([parrafo.text for parrafo in doc.paragraphs])
107
- elif documento_path.endswith(".xlsx"):
108
- return pd.read_excel(documento_path).to_string()
109
- elif documento_path.endswith(".csv"):
110
- return pd.read_csv(documento_path).to_string()
111
  else:
112
- return "Tipo de archivo no soportado. Por favor suba un documento PDF, DOCX, XLSX o CSV."
113
  except Exception as e:
114
- return f"Error al leer el documento: {str(e)}"
115
 
116
- def leer_url(url):
117
- """Lee el contenido de una URL."""
118
  try:
119
  response = requests.get(url)
120
  response.raise_for_status()
121
  soup = BeautifulSoup(response.content, 'html.parser')
122
  return soup.get_text()
123
  except Exception as e:
124
- return f"Error al leer la URL: {str(e)}"
125
 
126
- def procesar_contenido_social(url):
127
- """Procesa el contenido de una URL de red social, manejando tanto texto como video."""
128
  try:
129
- # Primero, intentamos leer el contenido como texto
130
- contenido_texto = leer_url(url)
131
 
132
- # Luego, intentamos procesar como video
133
  try:
134
- contenido_video = transcribir_audio(url)
135
  except Exception:
136
- contenido_video = None
137
 
138
  return {
139
- "texto": contenido_texto,
140
- "video": contenido_video
141
  }
142
  except Exception as e:
143
- logger.error(f"Error al procesar contenido social: {str(e)}")
144
  return None
145
 
146
- def generar_noticia(instrucciones, hechos, tamaño, tono, *args):
147
- """Genera una noticia a partir de instrucciones, hechos, URLs, documentos, transcripciones y contenido de redes sociales."""
148
- base_de_conocimiento = {
149
- "instrucciones": instrucciones,
150
- "hechos": hechos,
151
- "contenido_documentos": [],
152
  "audio_data": [],
153
- "contenido_urls": [],
154
- "contenido_social": []
155
  }
156
- num_audios = 5 * 3 # 5 audios/videos * 3 campos (archivo, nombre, cargo)
157
- num_social_urls = 3 * 3 # 3 URLs de redes sociales * 3 campos (URL, nombre, contexto)
158
- num_urls = 5 # 5 URLs generales
159
  audios = args[:num_audios]
160
  social_urls = args[num_audios:num_audios+num_social_urls]
161
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
162
- documentos = args[num_audios+num_social_urls+num_urls:]
163
 
164
  for url in urls:
165
  if url:
166
- base_de_conocimiento["contenido_urls"].append(leer_url(url))
167
 
168
- for documento in documentos:
169
- if documento is not None:
170
- base_de_conocimiento["contenido_documentos"].append(leer_documento(documento.name))
171
 
172
  for i in range(0, len(audios), 3):
173
- audio_file, nombre, cargo = audios[i:i+3]
174
  if audio_file is not None:
175
- base_de_conocimiento["audio_data"].append({"audio": audio_file, "nombre": nombre, "cargo": cargo})
176
 
177
  for i in range(0, len(social_urls), 3):
178
- social_url, social_nombre, social_contexto = social_urls[i:i+3]
179
  if social_url:
180
- contenido_social = procesar_contenido_social(social_url)
181
- if contenido_social:
182
- base_de_conocimiento["contenido_social"].append({
183
  "url": social_url,
184
- "nombre": social_nombre,
185
- "contexto": social_contexto,
186
- "texto": contenido_social["texto"],
187
- "video": contenido_social["video"]
188
  })
189
- logger.info(f"Contenido de red social procesado: {social_url}")
190
 
191
- transcripciones_texto, transcripciones_brutas = "", ""
192
 
193
- for idx, data in enumerate(base_de_conocimiento["audio_data"]):
194
  if data["audio"] is not None:
195
- transcripcion = transcribir_audio(data["audio"])
196
- transcripcion_texto = f'"{transcripcion}" - {data["nombre"]}, {data["cargo"]}'
197
- transcripcion_bruta = f'[Audio/Video {idx + 1}]: "{transcripcion}" - {data["nombre"]}, {data["cargo"]}'
198
- transcripciones_texto += transcripcion_texto + "\n"
199
- transcripciones_brutas += transcripcion_bruta + "\n\n"
200
-
201
- for data in base_de_conocimiento["contenido_social"]:
202
- if data["texto"]:
203
- transcripcion_texto = f'[Texto de red social]: "{data["texto"][:200]}..." - {data["nombre"]}, {data["contexto"]}'
204
- transcripciones_texto += transcripcion_texto + "\n"
205
- transcripciones_brutas += transcripcion_texto + "\n\n"
206
  if data["video"]:
207
- transcripcion_video = f'[Video de red social]: "{data["video"]}" - {data["nombre"]}, {data["contexto"]}'
208
- transcripciones_texto += transcripcion_video + "\n"
209
- transcripciones_brutas += transcripcion_video + "\n\n"
210
-
211
- contenido_documentos = "\n\n".join(base_de_conocimiento["contenido_documentos"])
212
- contenido_urls = "\n\n".join(base_de_conocimiento["contenido_urls"])
213
-
214
- prompt_interno = """
215
- Instrucciones para el modelo:
216
- - Debes seguir los principios de una noticia: es decir, procura siempre responder las 5 W de una noticia en el primer párrafo (Who?, What?, When?, Where?, Why?).
217
- - Asegúrate de que al menos el 80% de las citas sean directas y estén entrecomilladas.
218
- - El 20% restante puede ser citas indirectas.
219
- - No inventes información nueva.
220
- - riguroso con los hechos proporcionados.
221
- - Al procesar los documentos cargados, extrae y resalta citas importantes y testimonios textuales de las fuentes.
222
- - Al procesar los documentos cargados, extrae y resalta cifras clave.
223
- - Evita usar la fecha al comienzo del cuerpo de la noticia. Empieza directamente con las 5W.
224
- - Incluye el contenido de las redes sociales de manera relevante, citando la fuente y proporcionando el contexto adecuado.
225
- - Asegúrate de relacionar el contexto proporcionado para el contenido de red social con su transcripción o texto correspondiente.
226
  """
227
 
228
  prompt = f"""
229
- {prompt_interno}
230
- Escribe una noticia con la siguiente información, incluyendo un título, un gancho de 15 palabras (el gancho es lo que se conoce en inglés como hook, información adicional que complementa el título), y el cuerpo del contenido cuyo tamaño es {tamaño} palabras. El tono debe ser {tono}.
231
- Instrucciones: {base_de_conocimiento["instrucciones"]}
232
- Hechos: {base_de_conocimiento["hechos"]}
233
- Contenido adicional de los documentos: {contenido_documentos}
234
- Contenido adicional de las URLs: {contenido_urls}
235
- Utiliza las siguientes transcripciones como citas directas e indirectas (sin cambiar ni inventar contenido):
236
- {transcripciones_texto}
237
  """
238
 
239
  try:
240
- respuesta = openai.ChatCompletion.create(
241
  model="gpt-4o-mini",
242
  messages=[{"role": "user", "content": prompt}],
243
  temperature=0.1
244
  )
245
- noticia = respuesta['choices'][0]['message']['content']
246
- return noticia, transcripciones_brutas
247
  except Exception as e:
248
- logger.error(f"Error al generar la noticia: {str(e)}")
249
- return f"Error al generar la noticia: {str(e)}", ""
250
 
251
  with gr.Blocks() as demo:
252
- gr.Markdown("## Generador de noticias todo en uno")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  with gr.Row():
254
  with gr.Column(scale=2):
255
- instrucciones = gr.Textbox(label="Instrucciones para la noticia", lines=2)
256
- hechos = gr.Textbox(label="Describe los hechos de la noticia", lines=4)
257
- tamaño = gr.Number(label="Tamaño del cuerpo de la noticia (en palabras)", value=100)
258
- tono = gr.Dropdown(label="Tono de la noticia", choices=["serio", "neutral", "divertido"], value="neutral")
259
  with gr.Column(scale=3):
260
- inputs_list = [instrucciones, hechos, tamaño, tono]
261
  with gr.Tabs():
262
  for i in range(1, 6):
263
  with gr.TabItem(f"Audio/Video {i}"):
264
  file = gr.File(label=f"Audio/Video {i}", type="filepath", file_types=["audio", "video"])
265
- nombre = gr.Textbox(label="Nombre", scale=1)
266
- cargo = gr.Textbox(label="Cargo", scale=1)
267
- inputs_list.extend([file, nombre, cargo])
268
  for i in range(1, 4):
269
- with gr.TabItem(f"Red Social {i}"):
270
- social_url = gr.Textbox(label=f"URL de red social {i}", lines=1)
271
- social_nombre = gr.Textbox(label=f"Nombre de persona/cuenta {i}", scale=1)
272
- social_contexto = gr.Textbox(label=f"Contexto del contenido {i}", lines=2)
273
- inputs_list.extend([social_url, social_nombre, social_contexto])
274
  for i in range(1, 6):
275
  with gr.TabItem(f"URL {i}"):
276
  url = gr.Textbox(label=f"URL {i}", lines=1)
277
  inputs_list.append(url)
278
  for i in range(1, 6):
279
- with gr.TabItem(f"Documento {i}"):
280
- documento = gr.File(label=f"Documento {i}", type="filepath", file_count="single")
281
- inputs_list.append(documento)
282
 
283
- gr.Markdown("---") # Separador visual
284
 
285
  with gr.Row():
286
- transcripciones_output = gr.Textbox(label="Transcripciones", lines=10)
287
 
288
- gr.Markdown("---") # Separador visual
289
 
290
  with gr.Row():
291
- generar = gr.Button("Generar borrador")
292
  with gr.Row():
293
- noticia_output = gr.Textbox(label="Borrador generado", lines=20)
294
 
295
- generar.click(fn=generar_noticia, inputs=inputs_list, outputs=[noticia_output, transcripciones_output])
296
 
297
  demo.launch(share=True)
 
4
  import tempfile
5
  import gradio as gr
6
  from pydub import AudioSegment
7
+ import fitz # PyMuPDF for handling PDFs
8
+ import docx # For handling .docx files
9
+ import pandas as pd # For handling .xlsx and .csv files
 
10
  import requests
11
  from bs4 import BeautifulSoup
12
  from moviepy.editor import VideoFileClip
13
  import yt_dlp
14
  import logging
15
 
16
+ # Configure logging
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+ # Configure your OpenAI API key
 
 
 
21
  openai.api_key = os.getenv("OPENAI_API_KEY")
22
 
23
+ # Load the highest quality Whisper model once
24
  model = whisper.load_model("large")
25
 
26
  def download_social_media_video(url):
27
+ """Downloads a video from social media."""
28
  ydl_opts = {
29
  'format': 'bestaudio/best',
30
  'postprocessors': [{
 
38
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
  info_dict = ydl.extract_info(url, download=True)
40
  audio_file = f"{info_dict['id']}.mp3"
41
+ logger.info(f"Video successfully downloaded: {audio_file}")
42
  return audio_file
43
  except Exception as e:
44
+ logger.error(f"Error downloading video: {str(e)}")
45
  raise
46
 
47
  def convert_video_to_audio(video_file):
48
+ """Converts a video file to audio."""
49
  try:
50
  video = VideoFileClip(video_file)
51
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
52
  video.audio.write_audiofile(temp_file.name)
53
+ logger.info(f"Video converted to audio: {temp_file.name}")
54
  return temp_file.name
55
  except Exception as e:
56
+ logger.error(f"Error converting video to audio: {str(e)}")
57
  raise
58
 
59
  def preprocess_audio(audio_file):
60
+ """Preprocesses the audio file to improve quality."""
61
  try:
62
  audio = AudioSegment.from_file(audio_file)
63
  audio = audio.apply_gain(-audio.dBFS + (-20))
64
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
65
  audio.export(temp_file.name, format="mp3")
66
+ logger.info(f"Audio preprocessed: {temp_file.name}")
67
  return temp_file.name
68
  except Exception as e:
69
+ logger.error(f"Error preprocessing audio file: {str(e)}")
70
  raise
71
 
72
+ def transcribe_audio(file):
73
+ """Transcribes an audio or video file."""
74
  try:
75
  if isinstance(file, str) and file.startswith('http'):
76
+ logger.info(f"Downloading social media video: {file}")
77
+ file_path = download_social_media_video(file)
78
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
79
+ logger.info(f"Converting local video to audio: {file}")
80
+ file_path = convert_video_to_audio(file)
81
  else:
82
+ logger.info(f"Preprocessing audio file: {file}")
83
+ file_path = preprocess_audio(file)
84
+
85
+ logger.info(f"Transcribing audio: {file_path}")
86
+ result = model.transcribe(file_path)
87
+ transcription = result.get("text", "Error in transcription")
88
+ logger.info(f"Transcription completed: {transcription[:50]}...")
89
+ return transcription
90
  except Exception as e:
91
+ logger.error(f"Error processing file: {str(e)}")
92
+ return f"Error processing file: {str(e)}"
93
 
94
+ def read_document(document_path):
95
+ """Reads content from PDF, DOCX, XLSX or CSV documents."""
96
  try:
97
+ if document_path.endswith(".pdf"):
98
+ doc = fitz.open(document_path)
99
+ return "\n".join([page.get_text() for page in doc])
100
+ elif document_path.endswith(".docx"):
101
+ doc = docx.Document(document_path)
102
+ return "\n".join([paragraph.text for paragraph in doc.paragraphs])
103
+ elif document_path.endswith(".xlsx"):
104
+ return pd.read_excel(document_path).to_string()
105
+ elif document_path.endswith(".csv"):
106
+ return pd.read_csv(document_path).to_string()
107
  else:
108
+ return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
109
  except Exception as e:
110
+ return f"Error reading document: {str(e)}"
111
 
112
+ def read_url(url):
113
+ """Reads content from a URL."""
114
  try:
115
  response = requests.get(url)
116
  response.raise_for_status()
117
  soup = BeautifulSoup(response.content, 'html.parser')
118
  return soup.get_text()
119
  except Exception as e:
120
+ return f"Error reading URL: {str(e)}"
121
 
122
+ def process_social_content(url):
123
+ """Processes content from a social media URL, handling both text and video."""
124
  try:
125
+ # First, try to read content as text
126
+ text_content = read_url(url)
127
 
128
+ # Then, try to process as video
129
  try:
130
+ video_content = transcribe_audio(url)
131
  except Exception:
132
+ video_content = None
133
 
134
  return {
135
+ "text": text_content,
136
+ "video": video_content
137
  }
138
  except Exception as e:
139
+ logger.error(f"Error processing social content: {str(e)}")
140
  return None
141
 
142
+ def generate_news(instructions, facts, size, tone, *args):
143
+ """Generates a news article from instructions, facts, URLs, documents, transcriptions, and social media content."""
144
+ knowledge_base = {
145
+ "instructions": instructions,
146
+ "facts": facts,
147
+ "document_content": [],
148
  "audio_data": [],
149
+ "url_content": [],
150
+ "social_content": []
151
  }
152
+ num_audios = 5 * 3 # 5 audios/videos * 3 fields (file, name, position)
153
+ num_social_urls = 3 * 3 # 3 social media URLs * 3 fields (URL, name, context)
154
+ num_urls = 5 # 5 general URLs
155
  audios = args[:num_audios]
156
  social_urls = args[num_audios:num_audios+num_social_urls]
157
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
158
+ documents = args[num_audios+num_social_urls+num_urls:]
159
 
160
  for url in urls:
161
  if url:
162
+ knowledge_base["url_content"].append(read_url(url))
163
 
164
+ for document in documents:
165
+ if document is not None:
166
+ knowledge_base["document_content"].append(read_document(document.name))
167
 
168
  for i in range(0, len(audios), 3):
169
+ audio_file, name, position = audios[i:i+3]
170
  if audio_file is not None:
171
+ knowledge_base["audio_data"].append({"audio": audio_file, "name": name, "position": position})
172
 
173
  for i in range(0, len(social_urls), 3):
174
+ social_url, social_name, social_context = social_urls[i:i+3]
175
  if social_url:
176
+ social_content = process_social_content(social_url)
177
+ if social_content:
178
+ knowledge_base["social_content"].append({
179
  "url": social_url,
180
+ "name": social_name,
181
+ "context": social_context,
182
+ "text": social_content["text"],
183
+ "video": social_content["video"]
184
  })
185
+ logger.info(f"Social media content processed: {social_url}")
186
 
187
+ transcriptions_text, raw_transcriptions = "", ""
188
 
189
+ for idx, data in enumerate(knowledge_base["audio_data"]):
190
  if data["audio"] is not None:
191
+ transcription = transcribe_audio(data["audio"])
192
+ transcription_text = f'"{transcription}" - {data["name"]}, {data["position"]}'
193
+ raw_transcription = f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}'
194
+ transcriptions_text += transcription_text + "\n"
195
+ raw_transcriptions += raw_transcription + "\n\n"
196
+
197
+ for data in knowledge_base["social_content"]:
198
+ if data["text"]:
199
+ transcription_text = f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}'
200
+ transcriptions_text += transcription_text + "\n"
201
+ raw_transcriptions += transcription_text + "\n\n"
202
  if data["video"]:
203
+ transcription_video = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}'
204
+ transcriptions_text += transcription_video + "\n"
205
+ raw_transcriptions += transcription_video + "\n\n"
206
+
207
+ document_content = "\n\n".join(knowledge_base["document_content"])
208
+ url_content = "\n\n".join(knowledge_base["url_content"])
209
+
210
+ internal_prompt = """
211
+ Instructions for the model:
212
+ - Follow news article principles: answer the 5 Ws in the first paragraph (Who?, What?, When?, Where?, Why?).
213
+ - Ensure at least 80% of quotes are direct and in quotation marks.
214
+ - The remaining 20% can be indirect quotes.
215
+ - Don't invent new information.
216
+ - Be rigorous with provided facts.
217
+ - When processing uploaded documents, extract and highlight important quotes and testimonials from sources.
218
+ - When processing uploaded documents, extract and highlight key figures.
219
+ - Avoid using the date at the beginning of the news body. Start directly with the 5Ws.
220
+ - Include social media content relevantly, citing the source and providing proper context.
221
+ - Make sure to relate the provided context for social media content with its corresponding transcription or text.
222
  """
223
 
224
  prompt = f"""
225
+ {internal_prompt}
226
+ Write a news article with the following information, including a title, a 15-word hook (additional information that complements the title), and the content body with {size} words. The tone should be {tone}.
227
+ Instructions: {knowledge_base["instructions"]}
228
+ Facts: {knowledge_base["facts"]}
229
+ Additional content from documents: {document_content}
230
+ Additional content from URLs: {url_content}
231
+ Use the following transcriptions as direct and indirect quotes (without changing or inventing content):
232
+ {transcriptions_text}
233
  """
234
 
235
  try:
236
+ response = openai.ChatCompletion.create(
237
  model="gpt-4o-mini",
238
  messages=[{"role": "user", "content": prompt}],
239
  temperature=0.1
240
  )
241
+ news = response['choices'][0]['message']['content']
242
+ return news, raw_transcriptions
243
  except Exception as e:
244
+ logger.error(f"Error generating news article: {str(e)}")
245
+ return f"Error generating news article: {str(e)}", ""
246
 
247
  with gr.Blocks() as demo:
248
+ gr.Markdown("## All-in-One News Generator")
249
+
250
+ # Add tool description and attribution
251
+ gr.Markdown("""
252
+ ### About this tool
253
+
254
+ This AI-powered news generator helps journalists and content creators produce news articles by processing multiple types of input:
255
+ - Audio and video files with automatic transcription
256
+ - Social media content
257
+ - Documents (PDF, DOCX, XLSX, CSV)
258
+ - Web URLs
259
+
260
+ The tool uses advanced AI to generate well-structured news articles following journalistic principles and maintaining the integrity of source quotes.
261
+
262
+ Created by [Camilo Vega](https://www.linkedin.com/in/camilo-vega-169084b1/), AI Consultant
263
+ """)
264
+
265
  with gr.Row():
266
  with gr.Column(scale=2):
267
+ instructions = gr.Textbox(label="News article instructions", lines=2)
268
+ facts = gr.Textbox(label="Describe the news facts", lines=4)
269
+ size = gr.Number(label="Content body size (in words)", value=100)
270
+ tone = gr.Dropdown(label="News tone", choices=["serious", "neutral", "lighthearted"], value="neutral")
271
  with gr.Column(scale=3):
272
+ inputs_list = [instructions, facts, size, tone]
273
  with gr.Tabs():
274
  for i in range(1, 6):
275
  with gr.TabItem(f"Audio/Video {i}"):
276
  file = gr.File(label=f"Audio/Video {i}", type="filepath", file_types=["audio", "video"])
277
+ name = gr.Textbox(label="Name", scale=1)
278
+ position = gr.Textbox(label="Position", scale=1)
279
+ inputs_list.extend([file, name, position])
280
  for i in range(1, 4):
281
+ with gr.TabItem(f"Social Media {i}"):
282
+ social_url = gr.Textbox(label=f"Social media URL {i}", lines=1)
283
+ social_name = gr.Textbox(label=f"Person/account name {i}", scale=1)
284
+ social_context = gr.Textbox(label=f"Content context {i}", lines=2)
285
+ inputs_list.extend([social_url, social_name, social_context])
286
  for i in range(1, 6):
287
  with gr.TabItem(f"URL {i}"):
288
  url = gr.Textbox(label=f"URL {i}", lines=1)
289
  inputs_list.append(url)
290
  for i in range(1, 6):
291
+ with gr.TabItem(f"Document {i}"):
292
+ document = gr.File(label=f"Document {i}", type="filepath", file_count="single")
293
+ inputs_list.append(document)
294
 
295
+ gr.Markdown("---") # Visual separator
296
 
297
  with gr.Row():
298
+ transcriptions_output = gr.Textbox(label="Transcriptions", lines=10)
299
 
300
+ gr.Markdown("---") # Visual separator
301
 
302
  with gr.Row():
303
+ generate = gr.Button("Generate Draft")
304
  with gr.Row():
305
+ news_output = gr.Textbox(label="Generated Draft", lines=20)
306
 
307
+ generate.click(fn=generate_news, inputs=inputs_list, outputs=[news_output, transcriptions_output])
308
 
309
  demo.launch(share=True)