IAMTFRMZA commited on
Commit
4bd484d
·
verified ·
1 Parent(s): a01800e
Files changed (1) hide show
  1. app.py +56 -85
app.py CHANGED
@@ -1,17 +1,17 @@
 
 
 
1
  import streamlit as st
2
  from openai import OpenAI
3
  import time
4
- import os
5
- import re
6
- import pandas as pd
7
  import PyPDF2
 
8
  from datetime import datetime
9
  from pydub import AudioSegment
10
- from docx import Document
11
  from io import BytesIO
12
 
 
13
  st.set_page_config(page_title="Schlager ContractAi")
14
-
15
  st.title("Schlager ContractAi")
16
  st.caption("Chat with your contract or manage meeting minutes")
17
 
@@ -19,13 +19,62 @@ st.caption("Chat with your contract or manage meeting minutes")
19
  with st.sidebar:
20
  OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")
21
 
22
- # Tabs for Contract, Technical, and Minutes
23
- tab1, tab2, tab3 = st.tabs(["Contract", "Technical", "Minutes"])
24
 
25
  SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
26
  SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
29
  def contract_chat_section(tab, assistant_id, session_key, input_key):
30
  with tab:
31
  st.subheader("Chat")
@@ -87,81 +136,3 @@ contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contrac
87
  # Technical Chat Section
88
  contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")
89
 
90
- with tab3:
91
- st.subheader("Minutes")
92
-
93
- if "generated_minutes" not in st.session_state:
94
- st.session_state["generated_minutes"] = ""
95
-
96
- uploaded_files = st.file_uploader("Upload meeting minutes (PDF/DOCX/Audio)",
97
- type=["pdf", "docx", "mp3", "wav", "m4a"],
98
- accept_multiple_files=True)
99
-
100
- if uploaded_files:
101
- st.write("### Uploaded Files:")
102
- for uploaded_file in uploaded_files:
103
- st.write(f"- {uploaded_file.name}")
104
-
105
- combined_text = ""
106
- for uploaded_file in uploaded_files:
107
- if uploaded_file.name.lower().endswith(SUPPORTED_AUDIO_FORMATS):
108
- audio = AudioSegment.from_file(uploaded_file)
109
- temp_audio_path = "temp_audio.mp3"
110
- audio.export(temp_audio_path, format="mp3")
111
- with open(temp_audio_path, "rb") as audio_file:
112
- transcription = client.audio.transcriptions.create(
113
- model="whisper-1",
114
- file=audio_file
115
- )
116
- combined_text += transcription.text + "\n"
117
- os.remove(temp_audio_path)
118
- else:
119
- if uploaded_file.name.endswith(".docx"):
120
- doc = Document(uploaded_file)
121
- combined_text += "\n".join([para.text for para in doc.paragraphs])
122
- elif uploaded_file.name.endswith(".pdf"):
123
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
124
- combined_text += "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
125
-
126
- if combined_text:
127
- st.write("### Transcribed and Extracted Text:")
128
- st.text_area("Meeting Transcript", combined_text, height=300)
129
-
130
- if st.button("Generate Meeting Minutes", key="generate_minutes"):
131
- prompt = f"""
132
- Based on the following meeting transcript, generate structured meeting minutes in the format below:
133
-
134
- ---
135
- **Meeting Name:** [Enter meeting name]
136
- **Location:** [Enter location]
137
- **Date:** [Enter date]
138
- **Time:** [Enter time]
139
- **Attendees:** [List attendees]
140
- ### Agenda Items
141
- - [Agenda Item 1]
142
- - [Agenda Item 2]
143
- - [Agenda Item 3]
144
- ### Action Items
145
- | Action Item | Owner(s) | Deadline | Status |
146
- |------------|---------|----------|--------|
147
- | [Action Item 1] | [Owner(s) 1] | [Deadline 1] | [Status 1] |
148
- | [Action Item 2] | [Owner(s) 2] | [Deadline 2] | [Status 2] |
149
- | [Action Item 3] | [Owner(s) 3] | [Deadline 3] | [Status 3] |
150
- ---
151
- **Meeting Summary:**
152
- [Brief summary of key points, discussions, and decisions]
153
- **Transcript:**
154
- {combined_text}
155
- """
156
-
157
- response = client.chat.completions.create(
158
- model="gpt-4-turbo",
159
- messages=[
160
- {"role": "system", "content": "You are an AI assistant that generates professional meeting minutes in a structured template."},
161
- {"role": "user", "content": prompt}
162
- ]
163
- )
164
- st.session_state["generated_minutes"] = response.choices[0].message.content
165
-
166
- st.write("### Generated Meeting Minutes:")
167
- st.text_area("Meeting Minutes", st.session_state["generated_minutes"], height=400)
 
1
+ import os
2
+ import gradio as gr
3
+ import subprocess
4
  import streamlit as st
5
  from openai import OpenAI
6
  import time
 
 
 
7
  import PyPDF2
8
+ from docx import Document
9
  from datetime import datetime
10
  from pydub import AudioSegment
 
11
  from io import BytesIO
12
 
13
+ # Streamlit Page Config
14
  st.set_page_config(page_title="Schlager ContractAi")
 
15
  st.title("Schlager ContractAi")
16
  st.caption("Chat with your contract or manage meeting minutes")
17
 
 
19
  with st.sidebar:
20
  OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")
21
 
22
+ # Tabs
23
+ tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"])
24
 
25
  SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
26
  SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")
27
 
28
+ def install_dependencies():
29
+ try:
30
+ subprocess.run(['apt-get', 'update'], check=True)
31
+ subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True)
32
+ subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
33
+ subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True)
34
+ print("Packages installed successfully!")
35
+ except subprocess.CalledProcessError as e:
36
+ print(f"An error occurred: {e}")
37
+
38
+ install_dependencies()
39
+
40
+ def process_pdf(file):
41
+ input_pdf = file.name
42
+ os.system(f'pdftoppm -png "{input_pdf}" img')
43
+
44
+ for image in os.listdir():
45
+ if image.startswith('img') and image.endswith('.png'):
46
+ output_txt = f"ocr_{image}.txt"
47
+ os.system(f'tesseract "{image}" "{output_txt[:-4]}"')
48
+
49
+ output_txt_file = f"{input_pdf[:-4]}.txt"
50
+ with open(output_txt_file, 'w') as output_file:
51
+ for text_file in os.listdir():
52
+ if text_file.startswith('ocr_img') and text_file.endswith('.txt'):
53
+ with open(text_file, 'r') as f:
54
+ output_file.write(f.read())
55
+ output_file.write("\n")
56
+
57
+ for file in os.listdir():
58
+ if file.startswith('img') or file.startswith('ocr_img'):
59
+ os.remove(file)
60
+
61
+ return output_txt_file
62
+
63
+ with tab4:
64
+ st.subheader("Document Preparation")
65
+ uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False)
66
+
67
+ if uploaded_file:
68
+ st.write("Processing the uploaded document...")
69
+ result_file = process_pdf(uploaded_file)
70
+ st.download_button(
71
+ label="Download Extracted Text",
72
+ data=open(result_file, "rb").read(),
73
+ file_name=result_file,
74
+ mime="text/plain"
75
+ )
76
 
77
+ # Contract Chat Section
78
  def contract_chat_section(tab, assistant_id, session_key, input_key):
79
  with tab:
80
  st.subheader("Chat")
 
136
  # Technical Chat Section
137
  contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")
138