import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import os # Initialize Hugging Face model and tokenizer MODEL_NAME = "microsoft/codebert-base" # Load the pre-trained CodeBERT model for understanding code tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # Helper function to analyze code def analyze_code(code): # Split the code into manageable chunks max_length = 512 lines = code.split("\n") chunks = ["\n".join(lines[i:i+max_length]) for i in range(0, len(lines), max_length)] results = [] for chunk in chunks: tokenized_code = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_length) outputs = model(**tokenized_code) logits = outputs.logits results.append(logits.argmax(dim=1).item()) return results # Function to simulate quality and bug detection # Note: Replace with a real analysis pipeline. def check_code_quality_and_bugs(code): suggestions = [] # Example: Check for readability (placeholder logic) if len(code.split("\n")) < 5: suggestions.append("Code seems too short, ensure functionality is implemented correctly.") # Example: Check for comments (placeholder logic) if "#" not in code: suggestions.append("Consider adding comments to improve code clarity.") # Example: Check for style guide adherence (placeholder logic) if "import" in code and "os" not in code: suggestions.append("Unused imports detected; consider removing them.") # Example: Detect code smells (placeholder logic) if "try" in code and "except" not in code: suggestions.append("`try` block without `except` may lead to unhandled exceptions.") return suggestions # Streamlit app UI st.title("Code Quality and Bug Detection Tool") st.markdown("Analyze your code for syntax issues, quality, and potential bugs.") # File uploader uploaded_file = st.file_uploader("Upload a Python code file", type=["py"]) # Code snippet input code_snippet = st.text_area("Or paste your code snippet below:") if st.button("Analyze Code"): if uploaded_file is not None: code = uploaded_file.read().decode("utf-8") elif code_snippet.strip(): code = code_snippet else: st.error("Please upload a file or paste code to analyze.") st.stop() # Perform code analysis st.subheader("Analysis Results") st.write("**Code Quality and Bug Suggestions:**") suggestions = check_code_quality_and_bugs(code) if suggestions: for i, suggestion in enumerate(suggestions, 1): st.write(f"{i}. {suggestion}") else: st.write("No issues detected. Your code looks good!") # Simulated CodeBERT analysis (placeholder) st.write("**Model Analysis:**") model_results = analyze_code(code) for idx, result in enumerate(model_results, 1): st.write(f"Chunk {idx} classification result: {result}") st.markdown("---") st.markdown("*Powered by Hugging Face and Streamlit*")