fracapuano commited on
Commit
15253f2
·
verified ·
1 Parent(s): 0330963

add files via upload

Browse files
Files changed (5) hide show
  1. .Dockerfile +34 -0
  2. app.py +26 -0
  3. meeting_notes.py +202 -0
  4. poetry.lock +0 -0
  5. pyproject.toml +18 -0
.Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a base image
2
+ FROM python:3.12
3
+
4
+ # Set the HOME environment variable and make /home directory world-writable
5
+ ENV HOME=/home
6
+ RUN mkdir -p $HOME && chmod 777 $HOME
7
+
8
+ # Set the working directory in the container
9
+ WORKDIR /usr/src/app
10
+
11
+ # Copy the current directory contents into the container at /usr/src/app
12
+ COPY pyproject.toml poetry.lock* /usr/src/app/
13
+
14
+ # Install Poetry
15
+ RUN pip install -U pip
16
+ RUN pip install poetry
17
+
18
+ # Configure Poetry: Do not create a virtual environment
19
+ RUN poetry config virtualenvs.create false
20
+
21
+ # Install project dependencies
22
+ RUN poetry install
23
+
24
+ # Copy the rest of your app's source code from your host to your image filesystem.
25
+ COPY . /usr/src/app
26
+
27
+ # This is the port exposed by the container
28
+ EXPOSE 7860
29
+
30
+ # Checking the container is still working
31
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
32
+
33
+ # The command to run the app
34
+ ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from meeting_notes import transcript_to_notes
3
+
4
+ st.title('Meeting Transcript to Notes Converter')
5
+
6
+ uploaded_file = st.file_uploader("Choose a file", type=["txt"])
7
+ if uploaded_file is not None:
8
+ transcript = str(uploaded_file.read(), "utf-8") # Read and decode file
9
+ speakers_input = st.text_input("Enter the list of speakers, separated by commas (optional)")
10
+
11
+ if st.button("Generate Notes"):
12
+ speakers_list = [speaker.strip() for speaker in speakers_input.split(',')] if speakers_input else None
13
+ notes = transcript_to_notes(transcript, speakers_list)
14
+
15
+ # Write notes to a temporary file
16
+ with open("meeting_notes.txt", "w") as file:
17
+ file.write(notes)
18
+
19
+ st.success("Meeting notes generated successfully!")
20
+ st.download_button(
21
+ label="Download Meeting Notes",
22
+ data=notes,
23
+ file_name="meeting_notes.txt",
24
+ mime="text/plain"
25
+ )
26
+
meeting_notes.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from dotenv import load_dotenv
3
+ from typing import List, Optional
4
+ from tqdm import tqdm
5
+
6
+ load_dotenv()
7
+ client = OpenAI()
8
+
9
+ def extract_topics(meeting_transcript):
10
+ prompt_text = f"""
11
+ ## Transcript
12
+ <transcript>
13
+ {meeting_transcript}
14
+ </transcript>
15
+
16
+ You are a topic extractor whose main task is to identify and list the top 5 most important topics discussed
17
+ in a meeting, to whom you have access thanks to the provided meeting transcript.
18
+ Provided the transcript available under the <transcript> tags, analyze it and extract the main topics discussed.
19
+ Format your output as a list of skills I could iterate on.
20
+
21
+ An ideal output for a meeting covering budgeting budget concers, project handling and staffing looks like
22
+ [Budgeting, Project Deadlines, Staffing, New Policies, Client Feedback]
23
+ """
24
+
25
+ response = client.chat.completions.create(
26
+ model="gpt-4-turbo",
27
+ messages=[{"role": "user", "content": prompt_text}],
28
+ max_tokens=100, # short list of topics discussed
29
+ temperature=0.0,
30
+ stop=["\n", "]"] # Stops to help ensure the list format is respected
31
+ )
32
+
33
+ topics = response.choices[0].message.content.strip('][').split(', ') # Processing the string output into a Python list
34
+ return topics
35
+
36
+ def generate_bullet_point_summary(topic, meeting_transcript):
37
+ prompt_text = f"""
38
+ <topic>
39
+ {topic}
40
+ </topic>
41
+ <transcript>
42
+ {meeting_transcript}
43
+ </transcript>
44
+
45
+ You are an AI assistant tasked with assisting in summarizing meeting discussions.
46
+ Below is the transcript of a meeting, and a specific topic to focus on.
47
+ Please provide a summary of all the discussions related to this topic in bullet points. Be very
48
+ concise and to the point. Each bullet point must contain one concept only.
49
+ """
50
+
51
+ response = client.chat.completions.create(
52
+ model="gpt-4-turbo",
53
+ messages=[{"role": "user", "content": prompt_text}],
54
+ max_tokens=400, # Increase if more detailed summaries are needed
55
+ stop=["\n\n"] # A double newline to signify the end of the summary list
56
+ )
57
+ summary = response.choices[0].message.content.strip()
58
+ return summary
59
+
60
+ def summarize_topics(topics, meeting_transcript):
61
+ # Generate summaries for each topic
62
+ summaries = {}
63
+ for topic in topics:
64
+ summaries[topic] = generate_bullet_point_summary(topic, meeting_transcript)
65
+
66
+ return summaries
67
+
68
+ def extract_actionable_items(meeting_transcript):
69
+ prompt_text = f"""
70
+ Please carefully analyze the following meeting transcript, which will be provided between XML tags:
71
+
72
+ <meeting_transcript>
73
+ {meeting_transcript}
74
+ </meeting_transcript>
75
+
76
+ First, identify each unique speaker who participated in the meeting.
77
+
78
+ Then, for each speaker you identified, carefully extract any concrete action items, tasks, or next
79
+ steps that were assigned to them during the meeting. Use the full context of the meeting to
80
+ determine what the key next steps are for each person.
81
+
82
+ Format your response as a bulleted list, with each speaker's full name followed by a sublist of the
83
+ specific action items you identified for them. Here is an example of the desired format:
84
+
85
+ ## John Smith:
86
+ - Follow up with the client by next Wednesday.
87
+ - Prepare a detailed budget proposal for the next meeting.
88
+ - Jane Doe:
89
+ - Coordinate with the marketing team to draft the new campaign outline.
90
+ - Send updated staffing requirements to HR by Friday.
91
+
92
+ Omit any speakers for whom no clear action items or next steps were specified in the meeting. Focus
93
+ on extracting the most concrete and actionable items for each speaker.
94
+
95
+ Write your full list of speakers and action items inside <result> tags.
96
+ If you are unable to identify the speakers' names, please write "Speaker 1", "Speaker 2", etc.
97
+ """
98
+ response = client.chat.completions.create(
99
+ model="gpt-4-turbo",
100
+ messages=[{"role": "user", "content": prompt_text}],
101
+ stop=["\n\n"] # A double newline to signify the end of the list
102
+ )
103
+ action_items = response.choices[0].message.content.strip()
104
+ return action_items
105
+
106
+ def cleanup_meeting_notes(meeting_notes, speakers_list=None):
107
+ prompt_text = f"""
108
+ <meeting_notes_draft>
109
+ {meeting_notes}
110
+ </meeting_notes_draft>
111
+ <speakers_list>
112
+ {speakers_list if speakers_list else "No speakers list provided"}
113
+ </speakers_list>
114
+
115
+ You are a meeting notes editor who has been tasked with cleaning up the draft of a meeting notes document.
116
+ You must not modify the content you receive in any way or form, your task is simply to reformat the text to make it adhere to
117
+ the following guidelines:
118
+ - Production-ready meeting notes are always formatted in markdown. Ensure that the text is properly formatted in markdown.
119
+ - Production-ready meeting notes always have 3 sections: "Speakers", "Meeting Summary", "Action Items". These sections are always H1 in markdown (#Speakers, #Meeting Summary, #Action Items).
120
+ - Production-ready meeting notes always have a horizontal rule (---) between each section.
121
+ - Production-ready meeting notes always present the topics discussed in the #Meeting Summary section, with each topic being a toggle subheading (> ##Topic).
122
+ - Production-ready meeting notes always present the bullet points under each topic as markdown bullet points points.
123
+ - Production-ready meeting notes always have each speaker's name in bold.
124
+ - Production-ready meeting notes always have the action items in a bulleted list.
125
+ - Production-ready meeting notes always have the action items grouped by the speaker who is responsible for them.
126
+ - Production-ready meetings always presents speakers mapped to the name in the <speakers_list> tag, if available, in the same order. This means that for ["Francesco", "Carlo", "Antonio"]
127
+ you would have that "Francesco" is the "Speaker 0", "Carlo" is the "Speaker 1", and "Antonio" is the "Speaker 2".
128
+
129
+ Your output must exactly match the format described above. You must not modify the content of the meeting notes in any way, only the formatting. You will be
130
+ penalized if you change the content of the meeting notes.
131
+ An example template for the meeting notes is as follows:
132
+ # Speakers
133
+ - **Speaker 0**
134
+ - **Speaker 1**
135
+ ...
136
+
137
+ ---
138
+ # Meeting Summary
139
+ > ## Topic 1
140
+ - Bullet point 1
141
+ - Bullet point 2
142
+ ...
143
+ > ## Topic 2
144
+ - Bullet point 1
145
+ - Bullet point 2
146
+ ...
147
+
148
+ ---
149
+ # Action Items
150
+ ## <Speaker 0's name> to own
151
+ - Action item 1
152
+ - Action item 2
153
+ ## <Speaker 1's name> to own
154
+ - Action item 1
155
+ - Action item 2
156
+ """
157
+
158
+ response = client.chat.completions.create(
159
+ model="gpt-4-turbo",
160
+ messages=[{"role": "user", "content": prompt_text}]
161
+ )
162
+ return response.choices[0].message.content
163
+
164
+ def transcript_to_notes(meeting_transcript: str, speakers_list:Optional[List[str]]=None) -> str:
165
+ """Converts a meeting transcript into formatted meeting notes.
166
+
167
+ Args:
168
+ meeting_transcript (str): The text of the meeting transcript
169
+ speakers_list (Optional[List[str]]): A list of speakers in the meeting
170
+
171
+ Returns:
172
+ str: The formatted meeting notes
173
+ """
174
+ pbar = tqdm(total=3)
175
+ topics = extract_topics(meeting_transcript)
176
+ pbar.update(1)
177
+ by_topic_summaries = summarize_topics(topics, meeting_transcript)
178
+ pbar.update(1)
179
+ actions_by_speaker = extract_actionable_items(meeting_transcript)
180
+ pbar.update(1)
181
+
182
+ draft_notes = f"""
183
+ topics: {topics}
184
+ summaries: {by_topic_summaries}
185
+ actions: {actions_by_speaker}
186
+ """
187
+
188
+ meeting_notes = cleanup_meeting_notes(draft_notes, speakers_list)
189
+ return meeting_notes
190
+
191
+ # Example usage
192
+ if __name__ == "__main__":
193
+ with open("tanguy-off-boarding-meeting.txt", "r") as file:
194
+ meeting_transcript = file.read()
195
+
196
+ speakers_list = ["Tanguy", "Francesco"]
197
+
198
+ notes = transcript_to_notes(meeting_transcript, speakers_list)
199
+ with open("meeting_notes.md", "w") as file:
200
+ file.write(notes)
201
+
202
+ print("Meeting notes generated successfully!")
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "transcript2notes"
3
+ version = "0.1.0"
4
+ description = "A LLM-based chain made agents to make my life at Alan easier."
5
+ authors = ["fracapuano <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ openai = "^1.30.3"
11
+ anthropic = "^0.26.1"
12
+ python-dotenv = "^1.0.1"
13
+ streamlit = "^1.35.0"
14
+
15
+
16
+ [build-system]
17
+ requires = ["poetry-core"]
18
+ build-backend = "poetry.core.masonry.api"