Spaces:
Sleeping
Sleeping
File size: 5,658 Bytes
da890b5 fcfe145 da890b5 fcfe145 da890b5 fcfe145 da890b5 fcfe145 da890b5 fcfe145 da890b5 fcfe145 da890b5 fcfe145 a1ee826 fcfe145 a1ee826 fcfe145 a1ee826 fcfe145 a1ee826 fcfe145 da890b5 a1ee826 fcfe145 da890b5 a1ee826 fcfe145 da890b5 a1ee826 da890b5 a1ee826 da890b5 a1ee826 da890b5 fcfe145 5660b08 da890b5 fcfe145 a1ee826 fcfe145 da890b5 fcfe145 da890b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
from transformers import pipeline
import re
# Initialize pipelines
summarizer = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
def convert_to_json(transcript_text):
lines = transcript_text.strip().split("\n")
session_data = []
therapist_pattern = re.compile(r"^\s*(Therapist|T):", re.IGNORECASE)
client_pattern = re.compile(r"^\s*(Client|C):", re.IGNORECASE)
current_speaker = None
current_text = []
for line in lines:
line = line.strip()
if therapist_pattern.match(line):
if current_speaker and current_text:
session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
current_text = []
current_speaker = "Therapist"
text_part = therapist_pattern.sub("", line).strip()
current_text.append(text_part)
elif client_pattern.match(line):
if current_speaker and current_text:
session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
current_text = []
current_speaker = "Client"
text_part = client_pattern.sub("", line).strip()
current_text.append(text_part)
else:
if current_speaker is None:
current_speaker = "Unknown"
current_text.append(line)
if current_speaker and current_text:
session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
if not session_data:
session_data = [{"speaker": "Unknown", "text": transcript_text.strip()}]
json_data = {"session": session_data}
return json_data
def analyze_session(transcript, custom_instruction, audio):
# If an audio file is provided, transcribe it
if audio is not None:
# 'audio' will be the file path if type="filepath"
asr_result = asr_pipeline(audio)
transcript_text = asr_result['text']
else:
transcript_text = transcript
if not transcript_text.strip():
return "Please provide a transcript or an audio file."
json_data = convert_to_json(transcript_text)
prompt = (
"You are a helpful assistant that summarizes psychotherapy sessions. "
"The session is provided in JSON format with speaker turns. "
"Summarize the key themes, emotional shifts, and patterns from this session. "
)
if custom_instruction.strip():
prompt += f" Additionally, {custom_instruction.strip()}"
prompt += "\n\nJSON data:\n" + str(json_data)
summary_output = summarizer(prompt, max_length=200, do_sample=False)
summary = summary_output[0]['generated_text'].strip()
sentiment_results = sentiment_analyzer(transcript_text)
main_sentiment = sentiment_results[0]['label']
words = transcript_text.lower().split()
keywords_of_interest = ["anxiety", "depression", "relationship", "stress", "fear", "goals", "progress", "cognitive", "behavior"]
recurring_concerns = list(set([word for word in words if word in keywords_of_interest]))
if not recurring_concerns:
recurring_concerns_str = "No specific recurring concerns identified from the predefined list."
else:
recurring_concerns_str = "Recurring concerns include: " + ", ".join(recurring_concerns)
follow_up_suggestions = []
if "progress" in summary.lower():
follow_up_suggestions.append("Explore client's perception of progress in more detail.")
if "relationship" in summary.lower():
follow_up_suggestions.append("Discuss client's relationship dynamics further.")
if not follow_up_suggestions:
follow_up_suggestions.append("Consider following up on the emotional themes identified in the summary.")
follow_up_suggestions_str = " ".join(follow_up_suggestions)
final_output = f"**Summary of Session:**\n{summary}\n\n**Overall Sentiment:** {main_sentiment}\n\n**{recurring_concerns_str}**\n\n**Suggested Follow-Up Topics:** {follow_up_suggestions_str}"
return final_output
description = """# Psychotherapy Session Summarizer
This tool summarizes psychotherapy session transcripts (text or audio) into key themes, emotional shifts, and patterns.
**How to Use:**
- You may upload an audio file of the session or paste the text transcript.
- Optionally provide a custom focus or instruction (e.g., "Focus on how the client talks about their anxiety.").
- Click 'Summarize' to generate a summary along with identified concerns and suggested follow-ups.
**Important:**
-These is a tool for educational or research purpuses, should not be taking as profesional advice. I dont resposabilize for any misuse.
"""
with gr.Blocks() as demo:
gr.Markdown(description)
with gr.Row():
transcript_input = gr.Textbox(label="Session Transcript (Text)", lines=10, placeholder="Paste the session transcript here...")
audio_input = gr.Audio(type="filepath", label="Session Audio (Optional)")
custom_instruction_input = gr.Textbox(label="Custom Instruction (Optional)", placeholder="e.g., Focus on anxiety and coping strategies.")
summarize_button = gr.Button("Summarize")
output_box = gr.Markdown()
summarize_button.click(fn=analyze_session, inputs=[transcript_input, custom_instruction_input, audio_input], outputs=output_box)
if __name__ == "__main__":
demo.launch()
|