Spaces:

fracapuano
/

transcript2notes

Sleeping

App Files Files Community

fracapuano commited on May 29, 2024

Commit

15253f2

verified ·

1 Parent(s): 0330963

add files via upload

Browse files

Files changed (5) hide show

.Dockerfile +34 -0
app.py +26 -0
meeting_notes.py +202 -0
poetry.lock +0 -0
pyproject.toml +18 -0

.Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use an official Python runtime as a base image
+FROM python:3.12
+# Set the HOME environment variable and make /home directory world-writable
+ENV HOME=/home
+RUN mkdir -p $HOME && chmod 777 $HOME
+# Set the working directory in the container
+WORKDIR /usr/src/app
+# Copy the current directory contents into the container at /usr/src/app
+COPY pyproject.toml poetry.lock* /usr/src/app/
+# Install Poetry
+RUN pip install -U pip
+RUN pip install poetry
+# Configure Poetry: Do not create a virtual environment
+RUN poetry config virtualenvs.create false
+# Install project dependencies
+RUN poetry install
+# Copy the rest of your app's source code from your host to your image filesystem.
+COPY . /usr/src/app
+# This is the port exposed by the container
+EXPOSE 7860
+# Checking the container is still working
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+# The command to run the app
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import streamlit as st
+from meeting_notes import transcript_to_notes
+st.title('Meeting Transcript to Notes Converter')
+uploaded_file = st.file_uploader("Choose a file", type=["txt"])
+if uploaded_file is not None:
+    transcript = str(uploaded_file.read(), "utf-8")  # Read and decode file
+    speakers_input = st.text_input("Enter the list of speakers, separated by commas (optional)")
+    if st.button("Generate Notes"):
+        speakers_list = [speaker.strip() for speaker in speakers_input.split(',')] if speakers_input else None
+        notes = transcript_to_notes(transcript, speakers_list)
+        # Write notes to a temporary file
+        with open("meeting_notes.txt", "w") as file:
+            file.write(notes)
+        st.success("Meeting notes generated successfully!")
+        st.download_button(
+            label="Download Meeting Notes",
+            data=notes,
+            file_name="meeting_notes.txt",
+            mime="text/plain"
+        )

meeting_notes.py ADDED Viewed

	@@ -0,0 +1,202 @@

+from openai import OpenAI
+from dotenv import load_dotenv
+from typing import List, Optional
+from tqdm import tqdm
+load_dotenv()
+client = OpenAI()
+def extract_topics(meeting_transcript):
+    prompt_text = f"""
+    ## Transcript
+    <transcript>
+    {meeting_transcript}
+    </transcript>
+    You are a topic extractor whose main task is to identify and list the top 5 most important topics discussed
+    in a meeting, to whom you have access thanks to the provided meeting transcript.
+    Provided the transcript available under the <transcript> tags, analyze it and extract the main topics discussed.
+    Format your output as a list of skills I could iterate on.
+    An ideal output for a meeting covering budgeting budget concers, project handling and staffing looks like
+    [Budgeting, Project Deadlines, Staffing, New Policies, Client Feedback]
+    """
+    response = client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[{"role": "user", "content": prompt_text}],
+        max_tokens=100,  # short list of topics discussed
+        temperature=0.0,
+        stop=["\n", "]"]  # Stops to help ensure the list format is respected
+    )
+    topics = response.choices[0].message.content.strip('][').split(', ')  # Processing the string output into a Python list
+    return topics
+def generate_bullet_point_summary(topic, meeting_transcript):
+    prompt_text = f"""
+    <topic>
+    {topic}
+    </topic>
+    <transcript>
+    {meeting_transcript}
+    </transcript>
+    You are an AI assistant tasked with assisting in summarizing meeting discussions.
+    Below is the transcript of a meeting, and a specific topic to focus on.
+    Please provide a summary of all the discussions related to this topic in bullet points. Be very
+    concise and to the point. Each bullet point must contain one concept only.
+    """
+    response = client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[{"role": "user", "content": prompt_text}],
+        max_tokens=400,  # Increase if more detailed summaries are needed
+        stop=["\n\n"]  # A double newline to signify the end of the summary list
+    )
+    summary = response.choices[0].message.content.strip()
+    return summary
+def summarize_topics(topics, meeting_transcript):
+    # Generate summaries for each topic
+    summaries = {}
+    for topic in topics:
+        summaries[topic] = generate_bullet_point_summary(topic, meeting_transcript)
+    return summaries
+def extract_actionable_items(meeting_transcript):
+    prompt_text = f"""
+    Please carefully analyze the following meeting transcript, which will be provided between XML tags:
+    <meeting_transcript>
+    {meeting_transcript}
+    </meeting_transcript>
+    First, identify each unique speaker who participated in the meeting.
+    Then, for each speaker you identified, carefully extract any concrete action items, tasks, or next
+    steps that were assigned to them during the meeting. Use the full context of the meeting to
+    determine what the key next steps are for each person.
+    Format your response as a bulleted list, with each speaker's full name followed by a sublist of the
+    specific action items you identified for them. Here is an example of the desired format:
+    ## John Smith:
+    - Follow up with the client by next Wednesday.
+    - Prepare a detailed budget proposal for the next meeting.
+    - Jane Doe:
+    - Coordinate with the marketing team to draft the new campaign outline.
+    - Send updated staffing requirements to HR by Friday.
+    Omit any speakers for whom no clear action items or next steps were specified in the meeting. Focus
+    on extracting the most concrete and actionable items for each speaker.
+    Write your full list of speakers and action items inside <result> tags.
+    If you are unable to identify the speakers' names, please write "Speaker 1", "Speaker 2", etc.
+    """
+    response = client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[{"role": "user", "content": prompt_text}],
+        stop=["\n\n"]  # A double newline to signify the end of the list
+    )
+    action_items = response.choices[0].message.content.strip()
+    return action_items
+def cleanup_meeting_notes(meeting_notes, speakers_list=None):
+    prompt_text = f"""
+    <meeting_notes_draft>
+    {meeting_notes}
+    </meeting_notes_draft>
+    <speakers_list>
+    {speakers_list if speakers_list else "No speakers list provided"}
+    </speakers_list>
+    You are a meeting notes editor who has been tasked with cleaning up the draft of a meeting notes document.
+    You must not modify the content you receive in any way or form, your task is simply to reformat the text to make it adhere to
+    the following guidelines:
+    - Production-ready meeting notes are always formatted in markdown. Ensure that the text is properly formatted in markdown.
+    - Production-ready meeting notes always have 3 sections: "Speakers", "Meeting Summary", "Action Items". These sections are always H1 in markdown (#Speakers, #Meeting Summary, #Action Items).
+    - Production-ready meeting notes always have a horizontal rule (---) between each section.
+    - Production-ready meeting notes always present the topics discussed in the #Meeting Summary section, with each topic being a toggle subheading (> ##Topic).
+    - Production-ready meeting notes always present the bullet points under each topic as markdown bullet points points.
+    - Production-ready meeting notes always have each speaker's name in bold.
+    - Production-ready meeting notes always have the action items in a bulleted list.
+    - Production-ready meeting notes always have the action items grouped by the speaker who is responsible for them.
+    - Production-ready meetings always presents speakers mapped to the name in the <speakers_list> tag, if available, in the same order. This means that for ["Francesco", "Carlo", "Antonio"]
+    you would have that "Francesco" is the "Speaker 0", "Carlo" is the "Speaker 1", and "Antonio" is the "Speaker 2".
+    Your output must exactly match the format described above. You must not modify the content of the meeting notes in any way, only the formatting. You will be
+    penalized if you change the content of the meeting notes.
+    An example template for the meeting notes is as follows:
+    # Speakers
+    - **Speaker 0**
+    - **Speaker 1**
+    ...
+    ---
+    # Meeting Summary
+    > ## Topic 1
+    - Bullet point 1
+    - Bullet point 2
+    ...
+    > ## Topic 2
+    - Bullet point 1
+    - Bullet point 2
+    ...
+    ---
+    # Action Items
+    ## <Speaker 0's name> to own
+    - Action item 1
+    - Action item 2
+    ## <Speaker 1's name> to own
+    - Action item 1
+    - Action item 2
+    """
+    response = client.chat.completions.create(
+        model="gpt-4-turbo",
+        messages=[{"role": "user", "content": prompt_text}]
+    )
+    return response.choices[0].message.content
+def transcript_to_notes(meeting_transcript: str, speakers_list:Optional[List[str]]=None) -> str:
+    """Converts a meeting transcript into formatted meeting notes.
+    Args:
+        meeting_transcript (str): The text of the meeting transcript
+        speakers_list (Optional[List[str]]): A list of speakers in the meeting
+    Returns:
+        str: The formatted meeting notes
+    """
+    pbar = tqdm(total=3)
+    topics = extract_topics(meeting_transcript)
+    pbar.update(1)
+    by_topic_summaries = summarize_topics(topics, meeting_transcript)
+    pbar.update(1)
+    actions_by_speaker = extract_actionable_items(meeting_transcript)
+    pbar.update(1)
+    draft_notes = f"""
+    topics: {topics}
+    summaries: {by_topic_summaries}
+    actions: {actions_by_speaker}
+    """
+    meeting_notes = cleanup_meeting_notes(draft_notes, speakers_list)
+    return meeting_notes
+# Example usage
+if __name__ == "__main__":
+    with open("tanguy-off-boarding-meeting.txt", "r") as file:
+        meeting_transcript = file.read()
+    speakers_list = ["Tanguy", "Francesco"]
+    notes = transcript_to_notes(meeting_transcript, speakers_list)
+    with open("meeting_notes.md", "w") as file:
+        file.write(notes)
+    print("Meeting notes generated successfully!")

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,18 @@

+[tool.poetry]
+name = "transcript2notes"
+version = "0.1.0"
+description = "A LLM-based chain made agents to make my life at Alan easier."
+authors = ["fracapuano <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.12"
+openai = "^1.30.3"
+anthropic = "^0.26.1"
+python-dotenv = "^1.0.1"
+streamlit = "^1.35.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"