import os import logging import gradio as gr import requests from transformers import AutoTokenizer, AutoModelForSeq2SeqLM debug = True logging_level = logging.DEBUG if debug else logging.INFO logging.basicConfig(level=logging_level) # Initialize the CodeBERT model and tokenizer tokenizer = AutoTokenizer.from_pretrained("microsoft/CodeBERT-base") model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/CodeBERT-base") def fetch_repo_contents(repo_url): username, repo_name = repo_url.split("github.com/")[-1].split("/") api_url = f"https://api.github.com/repos/{username}/{repo_name}/contents" response = requests.get(api_url) response.raise_for_status() return response.json() def generate_chatbot_response(repo_url, question): repo_contents = fetch_repo_contents(repo_url) prompt = f"Answer the question about the repository {repo_url}: {question}\n\n" for item in repo_contents: prompt += f"{item['name']}:\n{item['download_url']}\n\n" inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True) outputs = model.generate(inputs, max_length=150, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Gradio UI repo_url_input = gr.inputs.Text(label="GitHub Repository URL") question_input = gr.inputs.Text(label="Question") output_text = gr.outputs.Text(label="Answer") gr.Interface( generate_chatbot_response, inputs=[repo_url_input, question_input], outputs=output_text, title="Create a Conversational AI Chatbot for Your Public GitHub Repository Codebase", theme="huggingface_dark", ).launch()