Spaces:
Sleeping
Sleeping
Miguel Castro
commited on
Commit
·
ecff187
1
Parent(s):
9ecbafc
Explicitly add [PAD] token, add Geist icons and styles
Browse files- script_analyzer.py +17 -24
script_analyzer.py
CHANGED
@@ -2,27 +2,30 @@ import gradio as gr
|
|
2 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
|
3 |
import matplotlib.pyplot as plt
|
4 |
|
5 |
-
# Load
|
6 |
tokenizer_sentiment = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
7 |
model_sentiment = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
8 |
sentiment_classifier = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_sentiment)
|
9 |
|
10 |
-
# Load open-source language model for description and music cue generation (GPT-J or GPT-Neo)
|
11 |
tokenizer_gpt = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
|
12 |
model_gpt = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
|
13 |
|
14 |
-
# Updated generate_text function with
|
15 |
def generate_text(prompt):
|
|
|
|
|
|
|
|
|
16 |
# Prepare the input tensors with attention_mask and padding
|
17 |
inputs = tokenizer_gpt(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50)
|
18 |
# Generate text using max_new_tokens instead of max_length
|
19 |
outputs = model_gpt.generate(
|
20 |
inputs["input_ids"],
|
21 |
attention_mask=inputs["attention_mask"],
|
22 |
-
max_new_tokens=50,
|
23 |
num_return_sequences=1,
|
24 |
no_repeat_ngram_size=2,
|
25 |
-
pad_token_id=tokenizer_gpt.
|
26 |
)
|
27 |
generated_text = tokenizer_gpt.decode(outputs[0], skip_special_tokens=True)
|
28 |
return generated_text.strip()
|
@@ -37,25 +40,20 @@ def analyze_script(script):
|
|
37 |
analysis_results = []
|
38 |
|
39 |
for i, line in enumerate(lines):
|
40 |
-
# Perform sentiment analysis with advanced model
|
41 |
result = sentiment_classifier(line)[0]
|
42 |
sentiment = result['label']
|
43 |
score = result['score']
|
44 |
|
45 |
-
# Generate a detailed scene description based on sentiment and line content
|
46 |
description_prompt = f"Describe a scene with the sentiment '{sentiment}' for the line: '{line}'"
|
47 |
description = generate_text(description_prompt)
|
48 |
|
49 |
-
# Generate a specific music cue suggestion based on sentiment
|
50 |
music_cue_prompt = f"Suggest music elements (like tempo, key, and instrumentation) that would fit a scene with the sentiment '{sentiment}': '{line}'"
|
51 |
music_cue = generate_text(music_cue_prompt)
|
52 |
|
53 |
-
# Append data for display and graph
|
54 |
all_scores.append(score)
|
55 |
descriptions.append(description)
|
56 |
music_cues.append(music_cue)
|
57 |
|
58 |
-
# Format analysis results
|
59 |
analysis_results.append(
|
60 |
{
|
61 |
"Line": f"Line {i + 1}: {line}",
|
@@ -65,7 +63,6 @@ def analyze_script(script):
|
|
65 |
}
|
66 |
)
|
67 |
|
68 |
-
# Generate the emotional arc graph for the entire script
|
69 |
graph_path = generate_script_graph()
|
70 |
return analysis_results, graph_path
|
71 |
|
@@ -74,7 +71,6 @@ def generate_script_graph():
|
|
74 |
plt.figure(figsize=(12, 6))
|
75 |
plt.plot(all_scores, marker='o', linestyle='-', color='b', label='Sentiment Intensity')
|
76 |
|
77 |
-
# Add text labels for music cues along the graph
|
78 |
for i, score in enumerate(all_scores):
|
79 |
plt.text(i, score, music_cues[i], fontsize=8, ha='right', rotation=45)
|
80 |
|
@@ -84,7 +80,6 @@ def generate_script_graph():
|
|
84 |
plt.legend()
|
85 |
plt.tight_layout()
|
86 |
|
87 |
-
# Save plot as image file
|
88 |
plot_path = "script_emotional_arc.png"
|
89 |
plt.savefig(plot_path)
|
90 |
plt.close()
|
@@ -95,19 +90,19 @@ def format_dashboard(results):
|
|
95 |
formatted_results = ""
|
96 |
for result in results:
|
97 |
formatted_results += f"""
|
98 |
-
<div
|
99 |
-
<p
|
100 |
-
<p
|
101 |
-
<p
|
102 |
-
<p
|
103 |
</div>
|
104 |
"""
|
105 |
return formatted_results
|
106 |
|
107 |
# Gradio interface to analyze script and display the dashboard
|
108 |
-
with gr.Blocks() as interface:
|
109 |
-
gr.Markdown("## Script Sentiment and Music Cue Analyzer")
|
110 |
-
gr.Markdown("Enter your script line-by-line, and this tool will analyze sentiment, generate scene descriptions, suggest music cues, and show an emotional and musical arc.")
|
111 |
|
112 |
script_input = gr.Textbox(lines=10, placeholder="Enter your script here, one line per thought or dialogue.", label="Script")
|
113 |
display_dashboard_button = gr.Button("Analyze Script")
|
@@ -115,7 +110,6 @@ with gr.Blocks() as interface:
|
|
115 |
output_dashboard = gr.HTML(label="Dashboard Results")
|
116 |
output_graph = gr.Image(label="Emotional and Musical Arc for Entire Script")
|
117 |
|
118 |
-
# Display dashboard functionality
|
119 |
def display_dashboard(script):
|
120 |
analysis_results, graph_path = analyze_script(script)
|
121 |
dashboard_content = format_dashboard(analysis_results)
|
@@ -123,5 +117,4 @@ with gr.Blocks() as interface:
|
|
123 |
|
124 |
display_dashboard_button.click(display_dashboard, inputs=script_input, outputs=[output_dashboard, output_graph])
|
125 |
|
126 |
-
|
127 |
-
interface.launch()
|
|
|
2 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM
|
3 |
import matplotlib.pyplot as plt
|
4 |
|
5 |
+
# Load sentiment and generation models as previously set up
|
6 |
tokenizer_sentiment = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
7 |
model_sentiment = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
|
8 |
sentiment_classifier = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_sentiment)
|
9 |
|
|
|
10 |
tokenizer_gpt = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
|
11 |
model_gpt = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
|
12 |
|
13 |
+
# Updated generate_text function with padding token fix
|
14 |
def generate_text(prompt):
|
15 |
+
# Add '[PAD]' as a padding token if not set
|
16 |
+
if tokenizer_gpt.pad_token is None:
|
17 |
+
tokenizer_gpt.add_special_tokens({'pad_token': '[PAD]'})
|
18 |
+
|
19 |
# Prepare the input tensors with attention_mask and padding
|
20 |
inputs = tokenizer_gpt(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50)
|
21 |
# Generate text using max_new_tokens instead of max_length
|
22 |
outputs = model_gpt.generate(
|
23 |
inputs["input_ids"],
|
24 |
attention_mask=inputs["attention_mask"],
|
25 |
+
max_new_tokens=50,
|
26 |
num_return_sequences=1,
|
27 |
no_repeat_ngram_size=2,
|
28 |
+
pad_token_id=tokenizer_gpt.pad_token_id # Ensure pad_token_id is set
|
29 |
)
|
30 |
generated_text = tokenizer_gpt.decode(outputs[0], skip_special_tokens=True)
|
31 |
return generated_text.strip()
|
|
|
40 |
analysis_results = []
|
41 |
|
42 |
for i, line in enumerate(lines):
|
|
|
43 |
result = sentiment_classifier(line)[0]
|
44 |
sentiment = result['label']
|
45 |
score = result['score']
|
46 |
|
|
|
47 |
description_prompt = f"Describe a scene with the sentiment '{sentiment}' for the line: '{line}'"
|
48 |
description = generate_text(description_prompt)
|
49 |
|
|
|
50 |
music_cue_prompt = f"Suggest music elements (like tempo, key, and instrumentation) that would fit a scene with the sentiment '{sentiment}': '{line}'"
|
51 |
music_cue = generate_text(music_cue_prompt)
|
52 |
|
|
|
53 |
all_scores.append(score)
|
54 |
descriptions.append(description)
|
55 |
music_cues.append(music_cue)
|
56 |
|
|
|
57 |
analysis_results.append(
|
58 |
{
|
59 |
"Line": f"Line {i + 1}: {line}",
|
|
|
63 |
}
|
64 |
)
|
65 |
|
|
|
66 |
graph_path = generate_script_graph()
|
67 |
return analysis_results, graph_path
|
68 |
|
|
|
71 |
plt.figure(figsize=(12, 6))
|
72 |
plt.plot(all_scores, marker='o', linestyle='-', color='b', label='Sentiment Intensity')
|
73 |
|
|
|
74 |
for i, score in enumerate(all_scores):
|
75 |
plt.text(i, score, music_cues[i], fontsize=8, ha='right', rotation=45)
|
76 |
|
|
|
80 |
plt.legend()
|
81 |
plt.tight_layout()
|
82 |
|
|
|
83 |
plot_path = "script_emotional_arc.png"
|
84 |
plt.savefig(plot_path)
|
85 |
plt.close()
|
|
|
90 |
formatted_results = ""
|
91 |
for result in results:
|
92 |
formatted_results += f"""
|
93 |
+
<div class="dashboard-box">
|
94 |
+
<p><img src="https://geist-ui.dev/icons/activity.svg" class="dashboard-icon" alt="Line icon"> <strong>{result['Line']}</strong></p>
|
95 |
+
<p><img src="https://geist-ui.dev/icons/bar-chart.svg" class="dashboard-icon" alt="Sentiment icon"> <strong>Sentiment:</strong> {result['Sentiment']}</p>
|
96 |
+
<p><img src="https://geist-ui.dev/icons/eye.svg" class="dashboard-icon" alt="Description icon"> <strong>Description Suggestion:</strong> {result['Description Suggestion']}</p>
|
97 |
+
<p><img src="https://geist-ui.dev/icons/music.svg" class="dashboard-icon" alt="Music Cue icon"> <strong>Music Cue:</strong> {result['Music Cue']}</p>
|
98 |
</div>
|
99 |
"""
|
100 |
return formatted_results
|
101 |
|
102 |
# Gradio interface to analyze script and display the dashboard
|
103 |
+
with gr.Blocks(css="custom.css") as interface:
|
104 |
+
gr.Markdown("## Script Sentiment and Music Cue Analyzer", elem_id="title")
|
105 |
+
gr.Markdown("Enter your script line-by-line, and this tool will analyze sentiment, generate scene descriptions, suggest music cues, and show an emotional and musical arc.", elem_id="description")
|
106 |
|
107 |
script_input = gr.Textbox(lines=10, placeholder="Enter your script here, one line per thought or dialogue.", label="Script")
|
108 |
display_dashboard_button = gr.Button("Analyze Script")
|
|
|
110 |
output_dashboard = gr.HTML(label="Dashboard Results")
|
111 |
output_graph = gr.Image(label="Emotional and Musical Arc for Entire Script")
|
112 |
|
|
|
113 |
def display_dashboard(script):
|
114 |
analysis_results, graph_path = analyze_script(script)
|
115 |
dashboard_content = format_dashboard(analysis_results)
|
|
|
117 |
|
118 |
display_dashboard_button.click(display_dashboard, inputs=script_input, outputs=[output_dashboard, output_graph])
|
119 |
|
120 |
+
interface.launch()
|
|