Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -64,7 +64,7 @@ current_request_index = -1 # Изначально указывает на по
|
|
64 |
|
65 |
|
66 |
def download_current_message_database():
|
67 |
-
#
|
68 |
repos = {
|
69 |
'Storage_1': 'Редакторы',
|
70 |
'Storage_Ira': 'Ира',
|
@@ -72,13 +72,13 @@ def download_current_message_database():
|
|
72 |
'Storage_Sveta': 'Света'
|
73 |
}
|
74 |
|
75 |
-
#
|
76 |
base_url = 'https://api.github.com/repos/fruitpicker01/{repo}/contents'
|
77 |
|
78 |
data_list = []
|
79 |
|
80 |
headers = {
|
81 |
-
"Authorization": f"token {token}",
|
82 |
"Content-Type": "application/json"
|
83 |
}
|
84 |
|
@@ -87,22 +87,22 @@ def download_current_message_database():
|
|
87 |
response = requests.get(url, headers=headers)
|
88 |
if response.status_code == 200:
|
89 |
files = response.json()
|
90 |
-
#
|
91 |
json_files = [file for file in files if file['name'].startswith("file") and file['name'].endswith('.json')]
|
92 |
for file_info in json_files:
|
93 |
file_name = file_info['name']
|
94 |
file_url = file_info['download_url']
|
95 |
|
96 |
-
#
|
97 |
try:
|
98 |
timestamp = file_name.split('_')[1].split('.')[0]
|
99 |
-
#
|
100 |
save_date = datetime.utcfromtimestamp(int(timestamp)) + timedelta(hours=3)
|
101 |
save_date = save_date.strftime('%Y-%m-%d %H:%M:%S')
|
102 |
except:
|
103 |
-
save_date = None #
|
104 |
|
105 |
-
#
|
106 |
file_response = requests.get(file_url)
|
107 |
if file_response.status_code == 200:
|
108 |
data = json.loads(file_response.text)
|
@@ -111,21 +111,21 @@ def download_current_message_database():
|
|
111 |
normalized_data['Автор'] = author
|
112 |
data_list.append(normalized_data)
|
113 |
else:
|
114 |
-
print(f"
|
115 |
else:
|
116 |
-
print(f"
|
117 |
|
118 |
if data_list:
|
119 |
df = pd.concat(data_list, ignore_index=True)
|
120 |
-
#
|
121 |
df['Дата сохранения'] = pd.to_datetime(df['Дата сохранения'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
|
122 |
|
123 |
-
#
|
124 |
df.drop_duplicates(inplace=True)
|
125 |
-
#
|
126 |
df.sort_values(by='Дата сохранения', ascending=False, inplace=True)
|
127 |
|
128 |
-
#
|
129 |
desired_columns = [
|
130 |
"Модель", "Автор", "Дата сохранения", "Персонализированное сообщение",
|
131 |
"Комментарий", "Откорректированное сообщение", "Описание предложения",
|
@@ -135,33 +135,29 @@ def download_current_message_database():
|
|
135 |
"Персонализированный промпт"
|
136 |
]
|
137 |
|
138 |
-
#
|
139 |
existing_columns = [col for col in desired_columns if col in df.columns]
|
140 |
df = df[existing_columns]
|
141 |
|
142 |
-
#
|
143 |
output = io.BytesIO()
|
144 |
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
145 |
df.to_excel(writer, index=False)
|
146 |
|
147 |
-
#
|
148 |
workbook = writer.book
|
149 |
worksheet = writer.sheets['Sheet1']
|
150 |
|
151 |
-
#
|
152 |
from openpyxl.styles import Alignment
|
153 |
|
154 |
-
#
|
155 |
columns_fit_content = ["Модель", "Автор", "Дата сохранения", "Пол", "Поколение", "Психотип", "Стадия бизнеса", "ОПФ"]
|
156 |
-
#
|
157 |
columns_wrap_text = ["Персонализированное сообщение", "Комментарий", "Откорректированное сообщение"]
|
158 |
|
159 |
for idx, col in enumerate(df.columns, 1):
|
160 |
column_letter = get_column_letter(idx)
|
161 |
-
# Установка выравнивания для заголовков
|
162 |
-
header_cell = worksheet.cell(row=1, column=idx)
|
163 |
-
header_cell.alignment = Alignment(wrap_text=True, vertical='center', horizontal='left')
|
164 |
-
|
165 |
if col in columns_fit_content:
|
166 |
max_length = 0
|
167 |
column = df[col].astype(str)
|
@@ -170,35 +166,29 @@ def download_current_message_database():
|
|
170 |
max_length = max(max_length, len(cell_value))
|
171 |
adjusted_width = (max_length + 2)
|
172 |
worksheet.column_dimensions[column_letter].width = adjusted_width
|
173 |
-
# Установка выравнивания для всех ячеек в столбце
|
174 |
-
for cell in worksheet[column_letter]:
|
175 |
-
cell.alignment = Alignment(wrap_text=True, vertical='center', horizontal='left')
|
176 |
elif col in columns_wrap_text:
|
177 |
-
worksheet.column_dimensions[column_letter].width = 50 #
|
178 |
for cell in worksheet[column_letter]:
|
179 |
-
cell.alignment = Alignment(wrap_text=True
|
180 |
else:
|
181 |
-
worksheet.column_dimensions[column_letter].width = 20 #
|
182 |
-
# Установка выравнивания для всех ячеек в столбце
|
183 |
-
for cell in worksheet[column_letter]:
|
184 |
-
cell.alignment = Alignment(wrap_text=True, vertical='center', horizontal='left')
|
185 |
|
186 |
-
output.seek(0) #
|
187 |
|
188 |
-
#
|
189 |
content = base64.b64encode(output.read()).decode('utf-8')
|
190 |
|
191 |
-
#
|
192 |
repo = "fruitpicker01/Storage_dev"
|
193 |
|
194 |
-
#
|
195 |
current_time = datetime.utcnow() + timedelta(hours=3)
|
196 |
filename = f"db_{current_time.strftime('%d.%m.%Y_%H.%M')}.xlsx"
|
197 |
|
198 |
-
path = filename #
|
199 |
url = f"https://api.github.com/repos/{repo}/contents/{path}"
|
200 |
|
201 |
-
#
|
202 |
get_response = requests.get(url, headers=headers)
|
203 |
if get_response.status_code == 200:
|
204 |
sha = get_response.json()['sha']
|
@@ -206,61 +196,70 @@ def download_current_message_database():
|
|
206 |
sha = None
|
207 |
|
208 |
data = {
|
209 |
-
"message": f"
|
210 |
"content": content,
|
211 |
}
|
212 |
if sha:
|
213 |
-
data["sha"] = sha #
|
214 |
|
215 |
-
#
|
216 |
put_response = requests.put(url, headers=headers, data=json.dumps(data))
|
217 |
if put_response.status_code in [200, 201]:
|
218 |
-
#
|
219 |
download_url = f"https://raw.githubusercontent.com/{repo}/main/{path}"
|
220 |
-
return download_url, df #
|
221 |
else:
|
222 |
-
print(f"
|
223 |
-
return "
|
224 |
else:
|
225 |
-
return "
|
226 |
|
227 |
|
228 |
def update_download_link():
|
229 |
result = download_current_message_database()
|
230 |
link, df = result if isinstance(result, tuple) else (result, None)
|
231 |
if isinstance(link, str) and link.startswith("http") and df is not None:
|
232 |
-
#
|
233 |
total_messages = len(df)
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
model_counts = df['Модель'].value_counts()
|
236 |
-
#
|
237 |
-
clean_df = df[df['Комментарий'].isna() & df['Откорректированное сообщение'].isna()]
|
238 |
clean_model_counts = clean_df['Модель'].value_counts()
|
239 |
-
#
|
240 |
contributor_counts = df['Автор'].value_counts()
|
241 |
-
#
|
242 |
df['Дата сохранения'] = pd.to_datetime(df['Дата сохранения'], errors='coerce')
|
243 |
date_counts = df['Дата сохранения'].dt.date.value_counts().sort_index()
|
244 |
|
245 |
-
#
|
246 |
total_messages_display_value = f"**Общее количество сообщений:** {total_messages}"
|
|
|
247 |
|
248 |
-
#
|
249 |
model_pie_fig = px.pie(values=model_counts.values, names=model_counts.index, title='Количество сообщений по моделям')
|
250 |
-
clean_model_pie_fig = px.pie(values=clean_model_counts.values, names=clean_model_counts.index, title='Количество
|
251 |
contributor_pie_fig = px.pie(values=contributor_counts.values, names=contributor_counts.index, title='Наиболее активные контрибьюторы')
|
252 |
date_message_fig = px.bar(x=date_counts.index, y=date_counts.values, labels={'x': 'Дата', 'y': 'Количество сообщений'}, title='Количество добавленных сообщений по датам')
|
253 |
|
254 |
return (
|
255 |
f"[Скачать базу сообщений]({link})",
|
256 |
total_messages_display_value,
|
|
|
257 |
model_pie_fig,
|
258 |
clean_model_pie_fig,
|
259 |
contributor_pie_fig,
|
260 |
date_message_fig
|
261 |
)
|
262 |
else:
|
263 |
-
return link, "", None, None, None, None
|
264 |
|
265 |
|
266 |
def correct_dash_usage(text):
|
@@ -1994,10 +1993,11 @@ with gr.Blocks() as demo:
|
|
1994 |
gr.Markdown("## Аналитика")
|
1995 |
|
1996 |
total_messages_display = gr.Markdown(value="", label="Общее количество сообщений")
|
|
|
1997 |
|
1998 |
with gr.Row():
|
1999 |
model_pie_chart = gr.Plot(label="Количество сообщений по моделям")
|
2000 |
-
clean_model_pie_chart = gr.Plot(label="Количество
|
2001 |
contributor_pie_chart = gr.Plot(label="Наиболее активные контрибьюторы")
|
2002 |
|
2003 |
date_message_chart = gr.Plot(label="Количество добавленных сообщений по датам")
|
@@ -2672,6 +2672,7 @@ with gr.Blocks() as demo:
|
|
2672 |
outputs=[
|
2673 |
download_link,
|
2674 |
total_messages_display,
|
|
|
2675 |
model_pie_chart,
|
2676 |
clean_model_pie_chart,
|
2677 |
contributor_pie_chart,
|
|
|
64 |
|
65 |
|
66 |
def download_current_message_database():
|
67 |
+
# Mapping of GitHub repositories and authors
|
68 |
repos = {
|
69 |
'Storage_1': 'Редакторы',
|
70 |
'Storage_Ira': 'Ира',
|
|
|
72 |
'Storage_Sveta': 'Света'
|
73 |
}
|
74 |
|
75 |
+
# Base GitHub API URL
|
76 |
base_url = 'https://api.github.com/repos/fruitpicker01/{repo}/contents'
|
77 |
|
78 |
data_list = []
|
79 |
|
80 |
headers = {
|
81 |
+
"Authorization": f"token {token}",
|
82 |
"Content-Type": "application/json"
|
83 |
}
|
84 |
|
|
|
87 |
response = requests.get(url, headers=headers)
|
88 |
if response.status_code == 200:
|
89 |
files = response.json()
|
90 |
+
# Filter files starting with 'file' and ending with '.json'
|
91 |
json_files = [file for file in files if file['name'].startswith("file") and file['name'].endswith('.json')]
|
92 |
for file_info in json_files:
|
93 |
file_name = file_info['name']
|
94 |
file_url = file_info['download_url']
|
95 |
|
96 |
+
# Extract timestamp from filename
|
97 |
try:
|
98 |
timestamp = file_name.split('_')[1].split('.')[0]
|
99 |
+
# Add 3 hours to the time
|
100 |
save_date = datetime.utcfromtimestamp(int(timestamp)) + timedelta(hours=3)
|
101 |
save_date = save_date.strftime('%Y-%m-%d %H:%M:%S')
|
102 |
except:
|
103 |
+
save_date = None # or set default value
|
104 |
|
105 |
+
# Download and decode file content
|
106 |
file_response = requests.get(file_url)
|
107 |
if file_response.status_code == 200:
|
108 |
data = json.loads(file_response.text)
|
|
|
111 |
normalized_data['Автор'] = author
|
112 |
data_list.append(normalized_data)
|
113 |
else:
|
114 |
+
print(f"Error downloading file {file_name} from repository {repo_name}: {file_response.status_code}")
|
115 |
else:
|
116 |
+
print(f"Error accessing repository {repo_name}: {response.status_code}")
|
117 |
|
118 |
if data_list:
|
119 |
df = pd.concat(data_list, ignore_index=True)
|
120 |
+
# Convert 'Дата сохранения' to datetime and adjust by +3 hours
|
121 |
df['Дата сохранения'] = pd.to_datetime(df['Дата сохранения'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
|
122 |
|
123 |
+
# Remove duplicates
|
124 |
df.drop_duplicates(inplace=True)
|
125 |
+
# Sort by 'Дата сохранения' descending
|
126 |
df.sort_values(by='Дата сохранения', ascending=False, inplace=True)
|
127 |
|
128 |
+
# Set column order
|
129 |
desired_columns = [
|
130 |
"Модель", "Автор", "Дата сохранения", "Персонализированное сообщение",
|
131 |
"Комментарий", "Откорректированное сообщение", "Описание предложения",
|
|
|
135 |
"Персонализированный промпт"
|
136 |
]
|
137 |
|
138 |
+
# Ensure all specified columns are in the DataFrame
|
139 |
existing_columns = [col for col in desired_columns if col in df.columns]
|
140 |
df = df[existing_columns]
|
141 |
|
142 |
+
# Save Excel file in memory
|
143 |
output = io.BytesIO()
|
144 |
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
145 |
df.to_excel(writer, index=False)
|
146 |
|
147 |
+
# Get worksheet
|
148 |
workbook = writer.book
|
149 |
worksheet = writer.sheets['Sheet1']
|
150 |
|
151 |
+
# Adjust column widths and text wrapping
|
152 |
from openpyxl.styles import Alignment
|
153 |
|
154 |
+
# Columns to fit content width
|
155 |
columns_fit_content = ["Модель", "Автор", "Дата сохранения", "Пол", "Поколение", "Психотип", "Стадия бизнеса", "ОПФ"]
|
156 |
+
# Columns to set fixed width and wrap text
|
157 |
columns_wrap_text = ["Персонализированное сообщение", "Комментарий", "Откорректированное сообщение"]
|
158 |
|
159 |
for idx, col in enumerate(df.columns, 1):
|
160 |
column_letter = get_column_letter(idx)
|
|
|
|
|
|
|
|
|
161 |
if col in columns_fit_content:
|
162 |
max_length = 0
|
163 |
column = df[col].astype(str)
|
|
|
166 |
max_length = max(max_length, len(cell_value))
|
167 |
adjusted_width = (max_length + 2)
|
168 |
worksheet.column_dimensions[column_letter].width = adjusted_width
|
|
|
|
|
|
|
169 |
elif col in columns_wrap_text:
|
170 |
+
worksheet.column_dimensions[column_letter].width = 50 # Set fixed width
|
171 |
for cell in worksheet[column_letter]:
|
172 |
+
cell.alignment = Alignment(wrap_text=True)
|
173 |
else:
|
174 |
+
worksheet.column_dimensions[column_letter].width = 20 # Default width
|
|
|
|
|
|
|
175 |
|
176 |
+
output.seek(0) # Reset pointer
|
177 |
|
178 |
+
# Encode file content in base64 for uploading to GitHub
|
179 |
content = base64.b64encode(output.read()).decode('utf-8')
|
180 |
|
181 |
+
# Parameters for uploading file to GitHub
|
182 |
repo = "fruitpicker01/Storage_dev"
|
183 |
|
184 |
+
# Get current time and adjust by +3 hours
|
185 |
current_time = datetime.utcnow() + timedelta(hours=3)
|
186 |
filename = f"db_{current_time.strftime('%d.%m.%Y_%H.%M')}.xlsx"
|
187 |
|
188 |
+
path = filename # Use new filename
|
189 |
url = f"https://api.github.com/repos/{repo}/contents/{path}"
|
190 |
|
191 |
+
# Check if file already exists
|
192 |
get_response = requests.get(url, headers=headers)
|
193 |
if get_response.status_code == 200:
|
194 |
sha = get_response.json()['sha']
|
|
|
196 |
sha = None
|
197 |
|
198 |
data = {
|
199 |
+
"message": f"Updated file {filename}",
|
200 |
"content": content,
|
201 |
}
|
202 |
if sha:
|
203 |
+
data["sha"] = sha # Needed for updating existing file
|
204 |
|
205 |
+
# Upload (or update) file on GitHub
|
206 |
put_response = requests.put(url, headers=headers, data=json.dumps(data))
|
207 |
if put_response.status_code in [200, 201]:
|
208 |
+
# Get download link
|
209 |
download_url = f"https://raw.githubusercontent.com/{repo}/main/{path}"
|
210 |
+
return download_url, df # Return file URL and DataFrame
|
211 |
else:
|
212 |
+
print(f"Error uploading file to GitHub: {put_response.status_code}, {put_response.text}")
|
213 |
+
return "Error uploading file to GitHub.", None
|
214 |
else:
|
215 |
+
return "No data to generate file.", None
|
216 |
|
217 |
|
218 |
def update_download_link():
|
219 |
result = download_current_message_database()
|
220 |
link, df = result if isinstance(result, tuple) else (result, None)
|
221 |
if isinstance(link, str) and link.startswith("http") and df is not None:
|
222 |
+
# Compute analytics
|
223 |
total_messages = len(df)
|
224 |
+
|
225 |
+
# Total messages without need for corrections
|
226 |
+
clean_df = df[
|
227 |
+
df['Персонализированное сообщение'].notna() & df['Персонализированное сообщение'].str.strip().ne('') &
|
228 |
+
df['Комментарий'].isna() & df['Откорректированное сообщение'].isna()
|
229 |
+
]
|
230 |
+
total_clean_messages = len(clean_df)
|
231 |
+
|
232 |
+
# Number of messages by model
|
233 |
model_counts = df['Модель'].value_counts()
|
234 |
+
# Number of messages without corrections by model
|
|
|
235 |
clean_model_counts = clean_df['Модель'].value_counts()
|
236 |
+
# Most active contributors
|
237 |
contributor_counts = df['Автор'].value_counts()
|
238 |
+
# Number of messages by date
|
239 |
df['Дата сохранения'] = pd.to_datetime(df['Дата сохранения'], errors='coerce')
|
240 |
date_counts = df['Дата сохранения'].dt.date.value_counts().sort_index()
|
241 |
|
242 |
+
# Prepare display outputs
|
243 |
total_messages_display_value = f"**Общее количество сообщений:** {total_messages}"
|
244 |
+
total_clean_messages_display_value = f"**Общее количество сообщений без необходимости корректировок:** {total_clean_messages}"
|
245 |
|
246 |
+
# Create charts using plotly
|
247 |
model_pie_fig = px.pie(values=model_counts.values, names=model_counts.index, title='Количество сообщений по моделям')
|
248 |
+
clean_model_pie_fig = px.pie(values=clean_model_counts.values, names=clean_model_counts.index, title='Количество сообщений без необходимости корректировок по моделям')
|
249 |
contributor_pie_fig = px.pie(values=contributor_counts.values, names=contributor_counts.index, title='Наиболее активные контрибьюторы')
|
250 |
date_message_fig = px.bar(x=date_counts.index, y=date_counts.values, labels={'x': 'Дата', 'y': 'Количество сообщений'}, title='Количество добавленных сообщений по датам')
|
251 |
|
252 |
return (
|
253 |
f"[Скачать базу сообщений]({link})",
|
254 |
total_messages_display_value,
|
255 |
+
total_clean_messages_display_value,
|
256 |
model_pie_fig,
|
257 |
clean_model_pie_fig,
|
258 |
contributor_pie_fig,
|
259 |
date_message_fig
|
260 |
)
|
261 |
else:
|
262 |
+
return link, "", "", None, None, None, None
|
263 |
|
264 |
|
265 |
def correct_dash_usage(text):
|
|
|
1993 |
gr.Markdown("## Аналитика")
|
1994 |
|
1995 |
total_messages_display = gr.Markdown(value="", label="Общее количество сообщений")
|
1996 |
+
total_clean_messages_display = gr.Markdown(value="", label="Общее количество сообщений без необходимости корректировок")
|
1997 |
|
1998 |
with gr.Row():
|
1999 |
model_pie_chart = gr.Plot(label="Количество сообщений по моделям")
|
2000 |
+
clean_model_pie_chart = gr.Plot(label="Количество сообщений без необходимости корректировок по моделям")
|
2001 |
contributor_pie_chart = gr.Plot(label="Наиболее активные контрибьюторы")
|
2002 |
|
2003 |
date_message_chart = gr.Plot(label="Количество добавленных сообщений по датам")
|
|
|
2672 |
outputs=[
|
2673 |
download_link,
|
2674 |
total_messages_display,
|
2675 |
+
total_clean_messages_display,
|
2676 |
model_pie_chart,
|
2677 |
clean_model_pie_chart,
|
2678 |
contributor_pie_chart,
|