Spaces:

acecalisto3
/

AiAi0

Running

App Files Files Community

acecalisto3 commited on Jun 18, 2024

Commit

5538e6a

verified ·

1 Parent(s): 5931852

Update app.py

Browse files

Files changed (1) hide show

app.py +281 -65

app.py CHANGED Viewed

@@ -1,66 +1,282 @@
-# Import necessary libraries
 import gradio as gr
-import torch
-import torchvision.transforms as T
-from torchvision.models.detection import maskrcnn_resnet50_fpn
-from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
-from google_drive_downloader import GoogleDriveDownloader as gdd
-# Function to download the RAG model and tokenizer
-def download_models(file_id_model, file_id_tokenizer, dest_path_model, dest_path_tokenizer):
-    gdd.download_file_from_google_drive(file_id_model, dest_path_model)
-    gdd.download_file_from_google_drive(file_id_tokenizer, dest_path_tokenizer)
-# Download the RAG model and tokenizer
-download_models(
-    file_id_model='your_model_file_id',
-    file_id_tokenizer='your_tokenizer_file_id',
-    dest_path_model='./model.pt',
-    dest_path_tokenizer='./tokenizer'
-)
-# Load the RAG model and tokenizer
-tokenizer = RagTokenizer.from_pretrained('./tokenizer')
-retriever = RagRetriever.from_pretrained('./model.pt')
-model = RagSequenceForGeneration.from_pretrained('./model.pt')
-# Load the Mask R-CNN model
-model_rcnn = maskrcnn_resnet50_fpn(pretrained=True)
-model_rcnn.eval()
-# Define the class labels for the COCO dataset
-class_labels = [
-    # ...
-]
-# Define the image-to-text object segmentation function
-def image_to_text_segmentation(image):
-    # Preprocess the image and run it through the Mask R-CNN model
-    # ...
-    # Generate the segmented text for each object
-    segmented_text = []
-    # ...
-    return segmented_text
-# Define the Gradio interface for text generation
-text_generation_iface = gr.Interface(
-    fn=generate_text,
-    inputs=input_text,
-    outputs=output_text,
-    title=title,
-    description=description,
-    examples=[
-        # ...
-    ]
-).launch()
-# Define the Gradio interface for image-to-text segmentation
-segmentation_iface = gr.Interface(
-    fn=image_to_text_segmentation,
-    inputs=input_image,
-    outputs=output_text,
-    title="Image-to-Text Object Segmentation",
-    description="Segment objects in the image and generate corresponding text."
-).launch()

+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import datetime
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+from gensim.models import LdaModel
+from gensim.corpora import Dictionary
+from textblob import TextBlob
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+import networkx as nx
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import Pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from scipy import linalg
+import plotly.graph_objects as go
+from collections import Counter
+import warnings
+import transformers
 import gradio as gr
+import streamlit as st
+warnings.filterwarnings("ignore")
+# Set up logging
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Function to fetch HTML content from GitHub issue pages
+def fetch_issue_data(username, repository, start_page, end_page):
+    issues_data = []
+    for page in range(start_page, end_page + 1):
+        url = f"https://github.com/{username}/{repository}/issues?page={page}"
+        response = requests.get(url)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        issue_elements = soup.find_all('div', class_='flex-shrink-0')
+        for issue_element in issue_elements:
+            issue_link = issue_element.find('a', class_='Link--primary')['href']
+            issue_url = f"https://github.com{issue_link}"
+            issue_data = fetch_issue_details(issue_url)
+            issues_data.append(issue_data)
+    return issues_data
+# Function to fetch details of a specific issue
+def fetch_issue_details(issue_url):
+    response = requests.get(issue_url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    issue_title = soup.find('h1', class_='gh-header-title').text.strip()
+    issue_body = soup.find('div', class_='markdown-body').text.strip()
+    issue_created_at = soup.find('relative-time')['datetime']
+    issue_closed_at = soup.find('relative-time', class_='no-wrap')
+    if issue_closed_at:
+        issue_closed_at = issue_closed_at['datetime']
+    else:
+        issue_closed_at = None
+    issue_author = soup.find('a', class_='author').text.strip()
+    issue_assignee = soup.find('a', class_='Link--muted')
+    if issue_assignee:
+        issue_assignee = issue_assignee.text.strip()
+    else:
+        issue_assignee = None
+    return {
+        'title': issue_title,
+        'body': issue_body,
+        'created_at': issue_created_at,
+        'closed_at': issue_closed_at,
+        'author': issue_author,
+        'assignee': issue_assignee
+    }
+# Function to clean and structure the data
+def clean_and_structure_data(issues_data):
+    df = pd.DataFrame(issues_data)
+    if 'created_at' in df.columns:
+        df['created_at'] = pd.to_datetime(df['created_at'])
+    else:
+        logging.error("The 'created_at' column is missing from the dataframe.")
+        df['created_at'] = pd.NaT
+    if 'closed_at' in df.columns:
+        df['closed_at'] = pd.to_datetime(df['closed_at'])
+    else:
+        df['closed_at'] = None
+    df['resolution_time'] = (df['closed_at'] - df['created_at']).dt.days
+    df['resolution_time'] = df['resolution_time'].fillna(-1)
+    df['is_closed'] = (df['closed_at'].notna()).astype(int)
+    return df
+# Function for exploratory data analysis (EDA)
+def perform_eda(df):
+    # Descriptive statistics
+    st.write(df.describe())
+    # Visualizations
+    sns.histplot(df['resolution_time'], kde=True)
+    st.pyplot(plt)
+    sns.lineplot(x=df['created_at'].dt.month, y='resolution_time', data=df)
+    st.pyplot(plt)
+    top_authors = df['author'].value_counts().nlargest(10)
+    st.write("\nTop 10 Authors:")
+    st.write(top_authors)
+    top_assignees = df['assignee'].value_counts().nlargest(10)
+    st.write("\nTop 10 Assignees:")
+    st.write(top_assignees)
+# Function for text analysis using NLP
+def analyze_text_content(df):
+    # Text preprocessing
+    stop_words = set(stopwords.words('english'))
+    lemmatizer = WordNetLemmatizer()
+    df['processed_body'] = df['body'].apply(lambda text: ' '.join([lemmatizer.lemmatize(word) for word in word_tokenize(text) if word.lower() not in stop_words]))
+    # Topic modeling
+    dictionary = Dictionary([word_tokenize(text) for text in df['processed_body']])
+    corpus = [dictionary.doc2bow(word_tokenize(text)) for text in df['processed_body']]
+    lda_model = LdaModel(corpus, num_topics=5, id2word=dictionary)
+    st.write("Top 5 Topics:")
+    for topic in lda_model.print_topics(num_words=5):
+        st.write(topic)
+    # Sentiment analysis
+    analyzer = SentimentIntensityAnalyzer()
+    df['sentiment'] = df['body'].apply(lambda text: analyzer.polarity_scores(text)['compound'])
+    st.write("Sentiment Analysis:")
+    st.write(df['sentiment'].describe())
+    # Word Cloud for Common Words
+    from wordcloud import WordCloud
+    all_words = ' '.join([text for text in df['processed_body']])
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_words)
+    st.pyplot(plt.figure(figsize=(10, 6), facecolor=None))
+    plt.imshow(wordcloud)
+    plt.axis("off")
+    plt.tight_layout(pad=0)
+    plt.show()
+# Function to create a network graph of issues, authors, and assignees
+def create_network_graph(df):
+    graph = nx.Graph()
+    for index, row in df.iterrows():
+        graph.add_node(row['title'], type='issue')
+        graph.add_node(row['author'], type='author')
+        if row['assignee']:
+            graph.add_node(row['assignee'], type='assignee')
+        graph.add_edge(row['title'], row['author'])
+        if row['assignee']:
+            graph.add_edge(row['title'], row['assignee'])
+    ...
+    # Interactive Network Graph with Plotly
+    pos = nx.spring_layout(graph, k=0.5)
+    edge_x = []
+    edge_y = []
+    for edge in graph.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_x.append([x0, x1, None])
+        edge_y.append([y0, y1, None])
+    edge_trace = go.Scatter(
+        x=edge_x,
+        y=edge_y,
+        line=dict(width=0.5, color='#888'),
+        hoverinfo='none',
+        mode='lines'
+    )
+    node_x = []
+    node_y = []
+    for node in graph.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+    node_trace = go.Scatter(
+        x=node_x,
+        y=node_y,
+        mode='markers',
+        marker=dict(
+            color=[],
+            size=10,
+            line=dict(width=2, color='black')
+        ),
+        text=[],
+        hoverinfo='text'
+    )
+    # Set node colors based on type
+    node_colors = []
+    for node in graph.nodes():
+        if graph.nodes[node]['type'] == 'issue':
+            node_colors.append('red')
+        elif graph.nodes[node]['type'] == 'author':
+            node_colors.append('blue')
+        else:
+            node_colors.append('green')
+    # Set node labels
+    node_labels = []
+    for node in graph.nodes():
+        node_labels.append(node)
+    node_trace.marker.color = node_colors
+    node_trace.text = node_labels
+    # Create the figure
+    fig = go.Figure(data=[edge_trace, node_trace],
+                   layout=go.Layout(
+                       title="GitHub Issue Network Graph",
+                       showlegend=False,
+                       hovermode='closest',
+                       margin=dict(b=20, l=5, r=5, t=40),
+                       xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+                       yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
+                   )
+                  )
+    # Display the figure in a Streamlit app
+    st.plotly_chart(fig)
+# Function to build a predictive model for issue resolution time
+def build_predictive_model(df):
+    # Feature engineering
+    df['created_at_day'] = df['created_at'].dt.day
+    df['created_at_weekday'] = df['created_at'].dt.weekday
+    df['created_at_hour'] = df['created_at'].dt.hour
+    df['author_encoded'] = df['author'].astype('category').cat.codes
+    df['assignee_encoded'] = df['assignee'].astype('category').cat.codes
+    # Select features and target variable
+    features = ['created_at_day', 'created_at_weekday', 'created_at_hour', 'author_encoded', 'assignee_encoded', 'sentiment']
+    target = 'resolution_time'
+    # Split data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)
+    # Create a pipeline for feature scaling and model training
+    pipeline = Pipeline([
+        ('scaler', StandardScaler()),
+        ('model', LogisticRegression())
+    ])
+    # Train the model
+    pipeline.fit(X_train, y_train)
+    # Evaluate the model
+    y_pred = pipeline.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    st.write("Accuracy:", accuracy)
+    st.write(classification_report(y_test, y_pred))
+# Main function
+if __name__ == "__main__":
+    # Replace with your GitHub username and repository name
+    username = "Ig0tU"
+    repository = "miagiii"
+    # Fetch issue data from GitHub
+    issues_data = fetch_issue_data(username, repository, 1, 10)
+    # Clean and structure the data
+    df = clean_and_structure_data(issues_data)
+    # Perform exploratory data analysis (EDA)
+    perform_eda(df)
+    # Analyze text content using NLP
+    analyze_text_content(df)
+    # Create a network graph of issues, authors, and assignees
+    create_network_graph(df)
+    # Build a predictive model for issue resolution time
+    build_predictive_model(df)