Spaces:
Sleeping
Sleeping
acecalisto3
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -200,83 +200,3 @@ def create_network_graph(df):
|
|
200 |
node_colors.append('red')
|
201 |
elif graph.nodes[node]['type'] == 'author':
|
202 |
node_colors.append('blue')
|
203 |
-
else:
|
204 |
-
node_colors.append('green')
|
205 |
-
|
206 |
-
# Set node labels
|
207 |
-
node_labels = []
|
208 |
-
for node in graph.nodes():
|
209 |
-
node_labels.append(node)
|
210 |
-
|
211 |
-
node_trace.marker.color = node_colors
|
212 |
-
node_trace.text = node_labels
|
213 |
-
|
214 |
-
# Create the figure
|
215 |
-
fig = go.Figure(data=[edge_trace, node_trace],
|
216 |
-
layout=go.Layout(
|
217 |
-
title="GitHub Issue Network Graph",
|
218 |
-
showlegend=False,
|
219 |
-
hovermode='closest',
|
220 |
-
margin=dict(b=20, l=5, r=5, t=40),
|
221 |
-
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
222 |
-
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
|
223 |
-
)
|
224 |
-
)
|
225 |
-
|
226 |
-
# Display the figure in a Streamlit app
|
227 |
-
st.plotly_chart(fig)
|
228 |
-
|
229 |
-
# Function to build a predictive model for issue resolution time
|
230 |
-
def build_predictive_model(df):
|
231 |
-
# Feature engineering
|
232 |
-
df['created_at_day'] = df['created_at'].dt.day
|
233 |
-
df['created_at_weekday'] = df['created_at'].dt.weekday
|
234 |
-
df['created_at_hour'] = df['created_at'].dt.hour
|
235 |
-
df['author_encoded'] = df['author'].astype('category').cat.codes
|
236 |
-
df['assignee_encoded'] = df['assignee'].astype('category').cat.codes
|
237 |
-
|
238 |
-
# Select features and target variable
|
239 |
-
features = ['created_at_day', 'created_at_weekday', 'created_at_hour', 'author_encoded', 'assignee_encoded', 'sentiment']
|
240 |
-
target = 'resolution_time'
|
241 |
-
|
242 |
-
# Split data into training and testing sets
|
243 |
-
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)
|
244 |
-
|
245 |
-
# Create a pipeline for feature scaling and model training
|
246 |
-
pipeline = Pipeline([
|
247 |
-
('scaler', StandardScaler()),
|
248 |
-
('model', LogisticRegression())
|
249 |
-
])
|
250 |
-
|
251 |
-
# Train the model
|
252 |
-
pipeline.fit(X_train, y_train)
|
253 |
-
|
254 |
-
# Evaluate the model
|
255 |
-
y_pred = pipeline.predict(X_test)
|
256 |
-
accuracy = accuracy_score(y_test, y_pred)
|
257 |
-
st.write("Accuracy:", accuracy)
|
258 |
-
st.write(classification_report(y_test, y_pred))
|
259 |
-
|
260 |
-
# Main function
|
261 |
-
if __name__ == "__main__":
|
262 |
-
# Replace with your GitHub username and repository name
|
263 |
-
username = "Ig0tU"
|
264 |
-
repository = "miagiii"
|
265 |
-
|
266 |
-
# Fetch issue data from GitHub
|
267 |
-
issues_data = fetch_issue_data(username, repository, 1, 10)
|
268 |
-
|
269 |
-
# Clean and structure the data
|
270 |
-
df = clean_and_structure_data(issues_data)
|
271 |
-
|
272 |
-
# Perform exploratory data analysis (EDA)
|
273 |
-
perform_eda(df)
|
274 |
-
|
275 |
-
# Analyze text content using NLP
|
276 |
-
analyze_text_content(df)
|
277 |
-
|
278 |
-
# Create a network graph of issues, authors, and assignees
|
279 |
-
create_network_graph(df)
|
280 |
-
|
281 |
-
# Build a predictive model for issue resolution time
|
282 |
-
build_predictive_model(df)
|
|
|
200 |
node_colors.append('red')
|
201 |
elif graph.nodes[node]['type'] == 'author':
|
202 |
node_colors.append('blue')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|