Miguel Castro commited on
Commit
ecff187
·
1 Parent(s): 9ecbafc

Explicitly add [PAD] token, add Geist icons and styles

Browse files
Files changed (1) hide show
  1. script_analyzer.py +17 -24
script_analyzer.py CHANGED
@@ -2,27 +2,30 @@ import gradio as gr
2
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
3
  import matplotlib.pyplot as plt
4
 
5
- # Load advanced sentiment analysis model
6
  tokenizer_sentiment = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
7
  model_sentiment = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
8
  sentiment_classifier = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_sentiment)
9
 
10
- # Load open-source language model for description and music cue generation (GPT-J or GPT-Neo)
11
  tokenizer_gpt = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
12
  model_gpt = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
13
 
14
- # Updated generate_text function with attention_mask and pad_token_id
15
  def generate_text(prompt):
 
 
 
 
16
  # Prepare the input tensors with attention_mask and padding
17
  inputs = tokenizer_gpt(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50)
18
  # Generate text using max_new_tokens instead of max_length
19
  outputs = model_gpt.generate(
20
  inputs["input_ids"],
21
  attention_mask=inputs["attention_mask"],
22
- max_new_tokens=50, # Controls the new tokens generated beyond input length
23
  num_return_sequences=1,
24
  no_repeat_ngram_size=2,
25
- pad_token_id=tokenizer_gpt.eos_token_id # Sets padding to eos_token_id to prevent issues
26
  )
27
  generated_text = tokenizer_gpt.decode(outputs[0], skip_special_tokens=True)
28
  return generated_text.strip()
@@ -37,25 +40,20 @@ def analyze_script(script):
37
  analysis_results = []
38
 
39
  for i, line in enumerate(lines):
40
- # Perform sentiment analysis with advanced model
41
  result = sentiment_classifier(line)[0]
42
  sentiment = result['label']
43
  score = result['score']
44
 
45
- # Generate a detailed scene description based on sentiment and line content
46
  description_prompt = f"Describe a scene with the sentiment '{sentiment}' for the line: '{line}'"
47
  description = generate_text(description_prompt)
48
 
49
- # Generate a specific music cue suggestion based on sentiment
50
  music_cue_prompt = f"Suggest music elements (like tempo, key, and instrumentation) that would fit a scene with the sentiment '{sentiment}': '{line}'"
51
  music_cue = generate_text(music_cue_prompt)
52
 
53
- # Append data for display and graph
54
  all_scores.append(score)
55
  descriptions.append(description)
56
  music_cues.append(music_cue)
57
 
58
- # Format analysis results
59
  analysis_results.append(
60
  {
61
  "Line": f"Line {i + 1}: {line}",
@@ -65,7 +63,6 @@ def analyze_script(script):
65
  }
66
  )
67
 
68
- # Generate the emotional arc graph for the entire script
69
  graph_path = generate_script_graph()
70
  return analysis_results, graph_path
71
 
@@ -74,7 +71,6 @@ def generate_script_graph():
74
  plt.figure(figsize=(12, 6))
75
  plt.plot(all_scores, marker='o', linestyle='-', color='b', label='Sentiment Intensity')
76
 
77
- # Add text labels for music cues along the graph
78
  for i, score in enumerate(all_scores):
79
  plt.text(i, score, music_cues[i], fontsize=8, ha='right', rotation=45)
80
 
@@ -84,7 +80,6 @@ def generate_script_graph():
84
  plt.legend()
85
  plt.tight_layout()
86
 
87
- # Save plot as image file
88
  plot_path = "script_emotional_arc.png"
89
  plt.savefig(plot_path)
90
  plt.close()
@@ -95,19 +90,19 @@ def format_dashboard(results):
95
  formatted_results = ""
96
  for result in results:
97
  formatted_results += f"""
98
- <div style="border:1px solid #ddd; padding:10px; margin-bottom:10px; border-radius:5px;">
99
- <p>🎬 <strong>{result['Line']}</strong></p>
100
- <p>📊 <strong>Sentiment:</strong> {result['Sentiment']}</p>
101
- <p>💡 <strong>Description Suggestion:</strong> {result['Description Suggestion']}</p>
102
- <p>🎶 <strong>Music Cue:</strong> {result['Music Cue']}</p>
103
  </div>
104
  """
105
  return formatted_results
106
 
107
  # Gradio interface to analyze script and display the dashboard
108
- with gr.Blocks() as interface:
109
- gr.Markdown("## Script Sentiment and Music Cue Analyzer")
110
- gr.Markdown("Enter your script line-by-line, and this tool will analyze sentiment, generate scene descriptions, suggest music cues, and show an emotional and musical arc.")
111
 
112
  script_input = gr.Textbox(lines=10, placeholder="Enter your script here, one line per thought or dialogue.", label="Script")
113
  display_dashboard_button = gr.Button("Analyze Script")
@@ -115,7 +110,6 @@ with gr.Blocks() as interface:
115
  output_dashboard = gr.HTML(label="Dashboard Results")
116
  output_graph = gr.Image(label="Emotional and Musical Arc for Entire Script")
117
 
118
- # Display dashboard functionality
119
  def display_dashboard(script):
120
  analysis_results, graph_path = analyze_script(script)
121
  dashboard_content = format_dashboard(analysis_results)
@@ -123,5 +117,4 @@ with gr.Blocks() as interface:
123
 
124
  display_dashboard_button.click(display_dashboard, inputs=script_input, outputs=[output_dashboard, output_graph])
125
 
126
- # Launch the Gradio app (no need for share=True in Hugging Face Spaces)
127
- interface.launch()
 
2
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
3
  import matplotlib.pyplot as plt
4
 
5
+ # Load sentiment and generation models as previously set up
6
  tokenizer_sentiment = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
7
  model_sentiment = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
8
  sentiment_classifier = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_sentiment)
9
 
 
10
  tokenizer_gpt = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
11
  model_gpt = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
12
 
13
+ # Updated generate_text function with padding token fix
14
  def generate_text(prompt):
15
+ # Add '[PAD]' as a padding token if not set
16
+ if tokenizer_gpt.pad_token is None:
17
+ tokenizer_gpt.add_special_tokens({'pad_token': '[PAD]'})
18
+
19
  # Prepare the input tensors with attention_mask and padding
20
  inputs = tokenizer_gpt(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50)
21
  # Generate text using max_new_tokens instead of max_length
22
  outputs = model_gpt.generate(
23
  inputs["input_ids"],
24
  attention_mask=inputs["attention_mask"],
25
+ max_new_tokens=50,
26
  num_return_sequences=1,
27
  no_repeat_ngram_size=2,
28
+ pad_token_id=tokenizer_gpt.pad_token_id # Ensure pad_token_id is set
29
  )
30
  generated_text = tokenizer_gpt.decode(outputs[0], skip_special_tokens=True)
31
  return generated_text.strip()
 
40
  analysis_results = []
41
 
42
  for i, line in enumerate(lines):
 
43
  result = sentiment_classifier(line)[0]
44
  sentiment = result['label']
45
  score = result['score']
46
 
 
47
  description_prompt = f"Describe a scene with the sentiment '{sentiment}' for the line: '{line}'"
48
  description = generate_text(description_prompt)
49
 
 
50
  music_cue_prompt = f"Suggest music elements (like tempo, key, and instrumentation) that would fit a scene with the sentiment '{sentiment}': '{line}'"
51
  music_cue = generate_text(music_cue_prompt)
52
 
 
53
  all_scores.append(score)
54
  descriptions.append(description)
55
  music_cues.append(music_cue)
56
 
 
57
  analysis_results.append(
58
  {
59
  "Line": f"Line {i + 1}: {line}",
 
63
  }
64
  )
65
 
 
66
  graph_path = generate_script_graph()
67
  return analysis_results, graph_path
68
 
 
71
  plt.figure(figsize=(12, 6))
72
  plt.plot(all_scores, marker='o', linestyle='-', color='b', label='Sentiment Intensity')
73
 
 
74
  for i, score in enumerate(all_scores):
75
  plt.text(i, score, music_cues[i], fontsize=8, ha='right', rotation=45)
76
 
 
80
  plt.legend()
81
  plt.tight_layout()
82
 
 
83
  plot_path = "script_emotional_arc.png"
84
  plt.savefig(plot_path)
85
  plt.close()
 
90
  formatted_results = ""
91
  for result in results:
92
  formatted_results += f"""
93
+ <div class="dashboard-box">
94
+ <p><img src="https://geist-ui.dev/icons/activity.svg" class="dashboard-icon" alt="Line icon"> <strong>{result['Line']}</strong></p>
95
+ <p><img src="https://geist-ui.dev/icons/bar-chart.svg" class="dashboard-icon" alt="Sentiment icon"> <strong>Sentiment:</strong> {result['Sentiment']}</p>
96
+ <p><img src="https://geist-ui.dev/icons/eye.svg" class="dashboard-icon" alt="Description icon"> <strong>Description Suggestion:</strong> {result['Description Suggestion']}</p>
97
+ <p><img src="https://geist-ui.dev/icons/music.svg" class="dashboard-icon" alt="Music Cue icon"> <strong>Music Cue:</strong> {result['Music Cue']}</p>
98
  </div>
99
  """
100
  return formatted_results
101
 
102
  # Gradio interface to analyze script and display the dashboard
103
+ with gr.Blocks(css="custom.css") as interface:
104
+ gr.Markdown("## Script Sentiment and Music Cue Analyzer", elem_id="title")
105
+ gr.Markdown("Enter your script line-by-line, and this tool will analyze sentiment, generate scene descriptions, suggest music cues, and show an emotional and musical arc.", elem_id="description")
106
 
107
  script_input = gr.Textbox(lines=10, placeholder="Enter your script here, one line per thought or dialogue.", label="Script")
108
  display_dashboard_button = gr.Button("Analyze Script")
 
110
  output_dashboard = gr.HTML(label="Dashboard Results")
111
  output_graph = gr.Image(label="Emotional and Musical Arc for Entire Script")
112
 
 
113
  def display_dashboard(script):
114
  analysis_results, graph_path = analyze_script(script)
115
  dashboard_content = format_dashboard(analysis_results)
 
117
 
118
  display_dashboard_button.click(display_dashboard, inputs=script_input, outputs=[output_dashboard, output_graph])
119
 
120
+ interface.launch()