File size: 3,435 Bytes
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c413759
 
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
631e35c
cb8f97e
 
 
e15f1b6
 
 
cb8f97e
e15f1b6
 
cb8f97e
 
 
 
 
e15f1b6
cb8f97e
 
 
 
 
 
e15f1b6
cb8f97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from huggingface_hub import hf_hub_download
import json
import gzip


usernames = {}


filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision="v1.1")
with gzip.open(filepath, 'r') as f:
    usernames["v1.1"] = json.loads(f.read().decode('utf-8'))

filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset")
with gzip.open(filepath, 'r') as f:
    usernames["v1.0"] = json.loads(f.read().decode('utf-8'))

text = """\
🔍🌟AICodeFly⚡️ is a program to search github repos.  The goal is to make repos easy to search, review, clone and download.  AI to make your coding experience fast and easy.
This search will match your term to find up to 100 github repositories that match your search.  Use the link to shell the repository as html.
""" + """\
"""

def check_username(username, version):
    output_md = ""
    if username in usernames[version] and len(usernames[version][username])>0:
        repos = usernames[version][username]
        repo_word = "repository" if len(repos)==1 else "repositories"
        output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
        for repo in repos:
            output_md += f"_{repo}_\n\n"
    else:
        output_md += "**No**, your code is not in The Stack."
    return output_md.strip()

def check_keyword(username, version):
    output_md = ""
    maxhitcount = 1000
    maxrepos = 70000000  #6M user entries * up to 18 per user
    currenthitcount=0
    currentrepos=0
    repocounter=0
    usercounter=0
    
    for repolist in usernames[version]:
        usercounter += 1
        
        #print(repolist)
        repos = usernames[version][repolist]
        repo_word = "repository" if len(repos)==1 else "repositories"
        #output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
        for repo in repos:
            repocounter += 1
            currentrepos += 1
            if currentrepos > maxrepos: 
                output_md += f"**Found maximum repos**, Count: **{currentrepos}** in The Stack:\n\n"
                return output_md.strip()
            if username in repo:
                currenthitcount += 1
                output_md += f"_<a href=https://github.com/{repo} target=_blank>{repo} repocounter: {repocounter} usercounter: {usercounter}</a>_\n\n"
                if currenthitcount > maxhitcount: 
                    output_md += f"**Found maximum hits**, Count: **{currenthitcount}** in The Stack:\n\n"
                    return output_md.strip()
    else:
        output_md += "**Searched All Repos**, Above found in The Stack."
    return output_md.strip()

with gr.Blocks() as demo:
    with gr.Row():
        _, colum_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
        with colum_2:
            gr.Markdown(text)
            version = gr.Dropdown(["v1.1", "v1.0"], label="The Stack version:", value="v1.1")
            username = gr.Text("", label="Keyword to match against repos e.g. BeatSaber")
            check_button = gr.Button("Check!")
            
            repos = gr.Markdown()
            
            #check_button.click(check_username, [username, version], repos)
            check_button.click(check_keyword, [username, version], repos)


demo.launch()