Spaces:

ctizzzy0
/

kit2

Sleeping

App Files Files Community

ctizzzy0 commited on 14 days ago

Commit

1c4706d

verified ·

1 Parent(s): f966e89

Upload app.py

Browse files

Files changed (1) hide show

app.py +672 -0

app.py ADDED Viewed

	@@ -0,0 +1,672 @@

+# CollegeGenius HF — 100% Hugging Face, No External APIs
+# ------------------------------------------------------
+# Everything runs inside the Space using open models from the Hugging Face Hub.
+# No third‑party web APIs or keys required.
+#
+# Models:
+# - sentence-transformers/paraphrase-MiniLM-L6-v2  (embeddings)
+# - textattack/roberta-base-CoLA                   (grammar acceptability per sentence)
+# - sshleifer/distilbart-cnn-12-6                 (summarization / light rewriting)
+# - j-hartmann/emotion-english-distilroberta-base (emotion for interviews)
+# - distilroberta-base                             (masked LM for resume verb suggestions)
+#
+# Launch config is at bottom: demo.queue(...).launch()
+# ------------------------------------------------------
+import os
+import re
+import io
+import json
+import uuid
+import random
+import datetime as dt
+from typing import List, Dict, Any
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import gradio as gr
+from fpdf import FPDF
+# Optional OCR (local only)
+try:
+    import easyocr
+    _OCR = easyocr.Reader(['en'], gpu=False)
+except Exception:
+    _OCR = None
+# Hugging Face models
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    AutoModelForSeq2SeqLM,
+    AutoModelForMaskedLM,
+    pipeline,
+    set_seed,
+)
+from sentence_transformers import SentenceTransformer
+APP_DIR = "cghf_data"
+os.makedirs(APP_DIR, exist_ok=True)
+LOG_CSV = os.path.join(APP_DIR, "runs.csv")
+if not os.path.exists(LOG_CSV):
+    pd.DataFrame(columns=["timestamp","tool","score","meta"]).to_csv(LOG_CSV, index=False)
+set_seed(42)
+# -----------------------------
+# Load models
+# -----------------------------
+_EMBEDDER = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
+# Grammar acceptability (CoLA)
+_CoLA_tok = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA")
+_CoLA = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA")
+_acc_pipe = pipeline("text-classification", model=_CoLA, tokenizer=_CoLA_tok, return_all_scores=True)
+# Summarizer / lightweight rewriting
+_SUM_tok = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
+_SUM = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
+_sum_pipe = pipeline("summarization", model=_SUM, tokenizer=_SUM_tok)
+# Interview emotion
+_EMO_tok = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+_EMO = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
+_emo_pipe = pipeline("text-classification", model=_EMO, tokenizer=_EMO_tok, top_k=None)
+# Masked LM for resume suggestions
+_MLM_tok = AutoTokenizer.from_pretrained("distilroberta-base")
+_MLM = AutoModelForMaskedLM.from_pretrained("distilroberta-base")
+# -----------------------------
+# Small built-in college dataset
+# -----------------------------
+COLLEGES = pd.DataFrame([
+    ("MIT","MA",3.9,1550,35,0.04,5,4),
+    ("Stanford","CA",3.95,1540,35,0.04,5,5),
+    ("Harvard","MA",3.95,1540,35,0.04,5,5),
+    ("UC Berkeley","CA",3.85,1470,33,0.15,5,4),
+    ("Michigan","MI",3.85,1470,33,0.20,5,4),
+    ("Georgia Tech","GA",3.85,1480,34,0.17,5,3),
+    ("UT Austin","TX",3.8,1420,32,0.31,4,4),
+    ("CMU","PA",3.9,1530,35,0.14,5,4),
+    ("Princeton","NJ",3.95,1540,35,0.05,5,5),
+    ("Yale","CT",3.95,1540,35,0.05,5,5),
+    ("Brown","RI",3.9,1510,34,0.06,4,5),
+    ("Duke","NC",3.9,1520,34,0.07,5,5),
+    ("Northwestern","IL",3.9,1500,34,0.07,4,5),
+    ("NYU","NY",3.7,1450,33,0.13,4,5),
+    ("UF","FL",3.8,1400,31,0.31,4,3),
+    ("Purdue","IN",3.7,1380,30,0.53,4,3),
+    ("Virginia Tech","VA",3.7,1360,30,0.57,4,3),
+    ("UW","WA",3.75,1410,32,0.48,4,4),
+    ("UIUC","IL",3.75,1420,32,0.45,5,3),
+    ("UCLA","CA",3.9,1500,34,0.09,4,5)
+], columns=["name","state","avg_gpa","sat_mid","act_mid","accept_rate","stem","hum"])
+# -----------------------------
+# Utils
+# -----------------------------
+def _now():
+    return dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+def _clean(s: str) -> str:
+    if not isinstance(s, str): return ""
+    return re.sub(r"\s+"," ",s).strip()
+def _sentences(text: str) -> List[str]:
+    text = _clean(text)
+    if not text: return []
+    sents = re.split(r"(?<=[.!?])\s+", text)
+    return [s for s in sents if s]
+def _save_log(tool, score, meta):
+    df = pd.read_csv(LOG_CSV)
+    row = {"timestamp": _now(), "tool": tool, "score": score if score is not None else "", "meta": json.dumps(meta)[:1200]}
+    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
+    df.to_csv(LOG_CSV, index=False)
+def ocr_local(file) -> str:
+    if not file: return ""
+    path = file.name if hasattr(file, "name") else str(file)
+    if path.lower().endswith(".pdf"):
+        try:
+            from pypdf import PdfReader
+            pages = []
+            reader = PdfReader(path)
+            for p in reader.pages:
+                pages.append(p.extract_text() or "")
+            t = "\n".join(pages)
+            if t.strip(): return _clean(t)
+        except Exception:
+            pass
+        return "PDF text not found. Export as text PDF or upload an image. (No web OCR used.)"
+    else:
+        if _OCR:
+            res = _OCR.readtext(path, detail=0, paragraph=True)
+            return _clean(" ".join(res))
+        return "OCR not available locally. Install `easyocr` or paste text."
+# -----------------------------
+# Essay AI (HF-only)
+# -----------------------------
+CLICHES = [
+    "ever since i was a child","since the dawn of time","follow my dreams",
+    "changed my life forever","made me who i am today","i have always loved",
+    "the true meaning","anything is possible","importance of hard work",
+    "the power of perseverance"
+]
+def acceptability_score(sentence: str) -> float:
+    out = _acc_pipe(sentence)[0]
+    scores = {o["label"]: o["score"] for o in out}
+    return float(scores.get("LABEL_1", 0.0))
+def essay_grammar_profile(text: str) -> Dict[str, Any]:
+    sents = _sentences(text)
+    if not sents: return {"avg_acceptability":0, "per_sentence":[]}
+    per = [{"sentence": s, "acceptability": round(acceptability_score(s),3)} for s in sents]
+    avg = float(np.mean([p["acceptability"] for p in per]))
+    return {"avg_acceptability": round(avg,3), "per_sentence": per}
+def summarize_text(text: str, max_words=120) -> str:
+    if not text.strip(): return ""
+    chunks = []
+    words = text.split()
+    step = 350
+    for i in range(0, len(words), step):
+        chunk = " ".join(words[i:i+step])
+        if len(chunk) < 10: continue
+        s = _sum_pipe(chunk, max_length=128, min_length=56, do_sample=False)[0]["summary_text"]
+        chunks.append(s)
+    out = " ".join(chunks) if chunks else _sum_pipe(text, max_length=150, min_length=60, do_sample=False)[0]["summary_text"]
+    return " ".join(out.split()[:max_words])
+def redundancy_map(text: str) -> Dict[str, Any]:
+    sents = _sentences(text)
+    if len(sents) < 2:
+        return {"avg_sim": 0.0, "pairs": []}
+    embs = _EMBEDDER.encode(sents, normalize_embeddings=True)
+    sims = []
+    for i in range(len(sents)-1):
+        sim = float(np.dot(embs[i], embs[i+1]))
+        sims.append({"pair": (i, i+1), "sim": round(sim,3)})
+    avg = float(np.mean([p["sim"] for p in sims])) if sims else 0.0
+    return {"avg_sim": round(avg,3), "pairs": sims}
+def coherence_score(text: str) -> float:
+    sents = _sentences(text)
+    if len(sents) < 3: return 0.5
+    embs = _EMBEDDER.encode(sents, normalize_embeddings=True)
+    sims = []
+    for i in range(len(sents)-2):
+        sims.append(float(np.dot(embs[i], embs[i+2])))
+    score = float(np.mean(sims))
+    return round((score+1)/2, 3)
+def cliche_hits(text: str) -> List[str]:
+    low = text.lower()
+    return [c for c in CLICHES if c in low]
+def suggest_rewrite(sentence: str, max_len=64) -> str:
+    s = sentence.strip()
+    if len(s.split()) < 8:
+        return s
+    try:
+        out = _sum_pipe(s, max_length=min(max_len, 72), min_length=12, do_sample=False)[0]["summary_text"]
+        return out
+    except Exception:
+        return s
+def essay_score(text: str) -> Dict[str, Any]:
+    wc = len(re.findall(r"[A-Za-z0-9']+", text))
+    grammar = essay_grammar_profile(text)
+    redun = redundancy_map(text)
+    coher = coherence_score(text)
+    clichés = cliche_hits(text)
+    s = (
+        0.35 * grammar["avg_acceptability"] +
+        0.25 * (1 - max(0, redun["avg_sim"] - 0.6)) +
+        0.25 * coher +
+        0.15 * max(0, min(1, 800 - abs(650 - wc)) / 800)
+    )
+    total = int(round(100 * max(0.0, min(1.0, s))))
+    return {
+        "score": total,
+        "word_count": wc,
+        "grammar_avg": grammar["avg_acceptability"],
+        "redundancy_avg": redun["avg_sim"],
+        "coherence": coher,
+        "cliches": clichés,
+        "per_sentence": grammar["per_sentence"]
+    }
+def essay_pipeline(prompt, essay_text, upload):
+    src = _clean(essay_text or "")
+    if (not src) and upload is not None:
+        src = ocr_local(upload)
+    if not src or src.startswith("OCR not available") or src.startswith("PDF text not found"):
+        return src or "No text found.", None, None, None
+    profile = essay_score(src)
+    summary = summarize_text(src)
+    worst = sorted(profile["per_sentence"], key=lambda x: x["acceptability"])[:3]
+    rewrites = []
+    for w in worst:
+        rewrites.append({"original": w["sentence"], "rewrite": suggest_rewrite(w["sentence"])})
+    dims = {
+        "Grammar": int(100*profile["grammar_avg"]),
+        "Redundancy (↓ better)": int(100*(1-profile["redundancy_avg"])),
+        "Coherence": int(100*profile["coherence"]),
+    }
+    img = bars_image(list(dims.keys()), list(dims.values()), title=f"Essay Score: {profile['score']}")
+    md = f"### Score: **{profile['score']}/100**  \n"
+    md += f"- Word count: **{profile['word_count']}**\n"
+    md += f"- Grammar acceptability (avg): **{profile['grammar_avg']}**\n"
+    md += f"- Adjacent redundancy (avg cosine): **{profile['redundancy_avg']}** (lower is better)\n"
+    md += f"- Coherence: **{profile['coherence']}**\n"
+    if profile['cliches']:
+        md += f"- Clichés to remove: `{', '.join(profile['cliches'])}`\n"
+    md += "\n**Auto Summary:**\n> " + summary
+    rew_md = "\n\n**Rewrite Suggestions (lowest-acceptability sentences):**\n"
+    for r in rewrites:
+        rew_md += f"- **Original:** {r['original']}\n  \n  **Rewrite:** {r['rewrite']}\n\n"
+    _save_log("essay", profile["score"], profile)
+    return md + rew_md, img, src[:1200], json.dumps(profile, indent=2)[:1800]
+# -----------------------------
+# Resume / Activities
+# -----------------------------
+ACTION_VERBS = [
+    "led","built","created","launched","founded","organized","architected",
+    "automated","designed","initiated","streamlined","optimized","developed",
+    "engineered","authored","deployed","scaled","improved","won","achieved",
+    "delivered","analyzed","spearheaded","directed","mentored","taught",
+    "coordinated","presented","implemented","transformed","secured"
+]
+def mlm_suggest(bullet: str) -> str:
+    low = bullet.lower().strip("-• ")
+    has_action = any(low.startswith(v) for v in ACTION_VERBS)
+    text = bullet
+    if not has_action:
+        text = "<mask> " + bullet
+    tokens = _MLM_tok(text, return_tensors="pt")
+    logits = _MLM(**tokens).logits
+    mask_positions = (tokens["input_ids"] == _MLM_tok.mask_token_id).nonzero(as_tuple=True)
+    if len(mask_positions[1]) == 0:
+        return bullet
+    mask_idx = int(mask_positions[1][0])
+    probs = logits[0, mask_idx].softmax(-1)
+    ids = probs.topk(8).indices.tolist()
+    cands = [_MLM_tok.decode([i]).strip() for i in ids]
+    for c in cands:
+        if re.match(r"^[A-Za-z\-]+$", c) and c.lower() in ACTION_VERBS:
+            return c.capitalize() + " " + bullet
+    return cands[0].capitalize() + " " + bullet
+def bullet_score(bullet: str) -> int:
+    low = bullet.lower()
+    action = any(low.startswith(v) for v in ACTION_VERBS)
+    nums = bool(re.search(r"\d", low))
+    outcome = bool(re.search(r"(result|impact|increase|decrease|grew|reduced|saved|won|revenue|users|raised|donations)", low))
+    length_ok = 8 <= len(re.findall(r"[A-Za-z0-9']+", bullet)) <= 30
+    score = 50 + 10*action + 10*nums + 10*outcome + 10*length_ok
+    return min(100, max(30, score))
+def analyze_resume(text: str):
+    bullets = [b.strip() for b in text.split("\n") if b.strip()]
+    rows = []
+    for b in bullets:
+        s = bullet_score(b)
+        suggestion = mlm_suggest(b) if s < 80 else ""
+        rows.append([b, s, suggestion])
+    avg = int(np.mean([r[1] for r in rows])) if rows else 0
+    _save_log("resume", avg, {"n":len(rows)})
+    return pd.DataFrame(rows, columns=["Bullet","Score","Suggestion"]), avg
+def resume_pipeline(resume_text, upload):
+    src = _clean(resume_text or "")
+    if (not src) and upload is not None:
+        src = ocr_local(upload)
+    df, avg = analyze_resume(src)
+    img = bars_image(["Avg","Min","Max"], [avg, int(df["Score"].min() if len(df)>0 else 0), int(df["Score"].max() if len(df)>0 else 0)], title="Resume Bullet Quality")
+    md = f"### Resume Analysis\n- Bullets: **{len(df)}**\n- Average quality: **{avg}**/100"
+    return df, img, md
+# -----------------------------
+# College Matcher (local dataset)
+# -----------------------------
+def match_colleges(gpa: float, sat: int, act: int, interest: str, state_pref: str):
+    df = COLLEGES.copy()
+    if sat and sat>0:
+        df["test_gap"] = (sat - df["sat_mid"])/200.0
+    elif act and act>0:
+        df["test_gap"] = (act - df["act_mid"])/4.0
+    else:
+        df["test_gap"] = 0.0
+    df["gpa_gap"] = (gpa - df["avg_gpa"])
+    if interest.lower() in ["stem","engineering","cs","math","physics"]:
+        df["fit"] = 0.5*df["gpa_gap"] + 0.5*df["test_gap"] + 0.3*(df["stem"]/5.0)
+    else:
+        df["fit"] = 0.5*df["gpa_gap"] + 0.5*df["test_gap"] + 0.3*(df["hum"]/5.0)
+    if state_pref:
+        df.loc[df["state"].str.lower()==state_pref.lower(), "fit"] += 0.15
+    tiers = []
+    for _, r in df.iterrows():
+        if r["fit"] >= 0.3:
+            tiers.append("Target")
+        elif r["fit"] >= 0.1:
+            tiers.append("Reach")
+        else:
+            tiers.append("High Reach")
+    df["tier"] = tiers
+    df = df.sort_values("fit", ascending=False)
+    cols = ["name","state","tier","fit","avg_gpa","sat_mid","act_mid","accept_rate"]
+    return df[cols].head(12)
+def college_pipeline(gpa, sat, act, interest, state_pref):
+    try:
+        gpa = float(gpa) if gpa else 0.0
+        sat = int(sat) if sat else 0
+        act = int(act) if act else 0
+    except Exception:
+        gpa, sat, act = 0.0, 0, 0
+    df = match_colleges(gpa, sat, act, interest, state_pref)
+    buf = bars_image(df["name"].head(5).tolist(), [round(x,3) for x in df["fit"].head(5).tolist()], title="Top Fit")
+    return df, buf
+# -----------------------------
+# Interview Practice
+# -----------------------------
+MAJOR_QUESTIONS = {
+    "Computer Science": [
+        "Tell me about a time you debugged a difficult problem.",
+        "What's a software project you're proud of and why?",
+        "Explain an algorithm you like to a non-technical audience."
+    ],
+    "Business": [
+        "Describe a time you influenced someone without authority.",
+        "How did you analyze data to drive a decision?",
+        "Pitch a product for college students."
+    ],
+    "Biology": [
+        "What experiment taught you the most?",
+        "How do you evaluate the quality of a scientific source?",
+        "Explain CRISPR at a high level."
+    ],
+    "Humanities": [
+        "How has a book changed how you see the world?",
+        "Tell me about a debate that sharpened your thinking.",
+        "What does good writing mean to you?"
+    ]
+}
+def emo_scores(text: str) -> Dict[str,float]:
+    if not text.strip(): return {}
+    outs = _emo_pipe(text)[0]
+    return {o["label"]: float(o["score"]) for o in outs}
+def interview_pipeline(major: str, behavioral: bool, answer: str):
+    if behavioral:
+        qs = [
+            "Tell me about a time you failed and what you learned.",
+            "Describe a conflict you resolved.",
+            "When did you change your mind about something important?",
+            "What's a project that best represents you and why?"
+        ]
+    else:
+        qs = MAJOR_QUESTIONS.get(major, MAJOR_QUESTIONS["Humanities"])
+    q = random.choice(qs)
+    sc = emo_scores(answer or "")
+    if sc:
+        top = sorted(sc.items(), key=lambda x: x[1], reverse=True)[:3]
+        md = "### Emotions detected\n" + "\n".join([f"- {k}: {round(v,3)}" for k,v in top])
+    else:
+        md = "### Emotions detected\n- (type an answer to analyze)"
+    img = bars_image(list(sc.keys()), [round(v,3) for v in sc.values()], title="Emotion Scores") if sc else None
+    return q, md, img
+# -----------------------------
+# Spike Planner + Charts + PDF
+# -----------------------------
+SPIKE_IDEAS = {
+    "AI/ML": [
+        "Build a model for a local nonprofit (forecast demand).",
+        "Open-source a dataset or evaluation tool.",
+        "Publish an 8-part 'ML intuition for teens' blog."
+    ],
+    "Finance": [
+        "Lead an investing club; run monthly backtests.",
+        "Survey students on budgeting; publish findings.",
+        "Prototype a student budgeting app."
+    ],
+    "Biotech": [
+        "Bioinformatics analysis of a public dataset.",
+        "Write a bioethics mini-series with interviews.",
+        "Organize a safe wet-lab collaboration session."
+    ]
+}
+def build_spike(interest: str, weeks: int=8):
+    ideas = SPIKE_IDEAS.get(interest, SPIKE_IDEAS["AI/ML"])
+    milestones = []
+    for w in range(weeks):
+        milestones.append({
+            "week": w+1,
+            "goal": f"Progress: {ideas[w%len(ideas)]}",
+            "deliverable": f"Week {w+1} demo/post"
+        })
+    return ideas, milestones
+def export_ics(title: str, milestones):
+    now = dt.datetime.now()
+    lines = ["BEGIN:VCALENDAR","VERSION:2.0","PRODID:-//CGHF//EN"]
+    for m in milestones:
+        start = now + dt.timedelta(weeks=m["week"]-1)
+        dtstamp = now.strftime("%Y%m%dT%H%M%SZ")
+        dtstart = start.strftime("%Y%m%d")
+        uid = str(uuid.uuid4()) + "@cghf"
+        lines += [
+            "BEGIN:VEVENT",
+            f"UID:{uid}",
+            f"DTSTAMP:{dtstamp}",
+            f"DTSTART;VALUE=DATE:{dtstart}",
+            f"SUMMARY:{title} - Week {m['week']}: {m['deliverable']}",
+            f"DESCRIPTION:{m['goal']}",
+            "END:VEVENT"
+        ]
+    lines.append("END:VCALENDAR")
+    out = os.path.join(APP_DIR, "spike.ics")
+    with open(out, "w") as f:
+        f.write("\n".join(lines))
+    return out
+def draw_gantt(milestones):
+    fig, ax = plt.subplots(figsize=(7,2+0.3*len(milestones)))
+    for i, m in enumerate(milestones):
+        ax.barh(i, 1, left=m["week"], height=0.4)
+        ax.text(m["week"]+0.05, i, f"W{m['week']} {m['deliverable']}", va="center")
+    ax.set_yticks(range(len(milestones)))
+    ax.set_yticklabels([f"W{m['week']}" for m in milestones])
+    ax.set_xlabel("Week")
+    ax.set_title("Spike Timeline")
+    buf = io.BytesIO()
+    plt.tight_layout()
+    plt.savefig(buf, format="png")
+    plt.close(fig)
+    buf.seek(0)
+    return buf.getvalue()
+def portfolio_pdf(essay_fb, resume_df, colleges_df, spike_title, milestones, emotions_summary):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial","B",16)
+    pdf.cell(0,10,"CollegeGenius HF – Portfolio", ln=True)
+    pdf.set_font("Arial","",12)
+    pdf.cell(0,8,"Essay Summary", ln=True)
+    for k in ["score","word_count","grammar_avg","redundancy_avg","coherence"]:
+        if k in essay_fb:
+            pdf.cell(0,6,f"- {k}: {essay_fb[k]}", ln=True)
+    pdf.ln(3)
+    pdf.cell(0,8,"Resume Highlights (top 5)", ln=True)
+    try:
+        rows = resume_df.values.tolist()
+    except Exception:
+        rows = []
+    for r in rows[:5]:
+        pdf.multi_cell(0,6,f"• [{r[1]}] {r[0]}")
+        if r[2]:
+            pdf.multi_cell(0,6,f"  Suggestion: {r[2]}")
+    pdf.ln(3)
+    pdf.cell(0,8,"College Matches", ln=True)
+    try:
+        for _, row in colleges_df.head(6).iterrows():
+            pdf.cell(0,6,f"- {row['name']} ({row['tier']}) | Fit={round(row['fit'],3)}", ln=True)
+    except Exception:
+        pass
+    pdf.ln(3)
+    pdf.cell(0,8,f"Spike: {spike_title}", ln=True)
+    for m in milestones[:6]:
+        pdf.cell(0,6,f"Week {m['week']}: {m['deliverable']} – {m['goal']}", ln=True)
+    pdf.ln(3)
+    pdf.cell(0,8,"Interview Emotions", ln=True)
+    for k,v in emotions_summary.items():
+        pdf.cell(0,6,f"- {k}: {round(v,3)}", ln=True)
+    out = os.path.join(APP_DIR, "portfolio.pdf")
+    pdf.output(out)
+    return out
+# -----------------------------
+# Viz helper
+# -----------------------------
+def bars_image(labels, values, title=""):
+    fig, ax = plt.subplots()
+    ax.bar(labels, values)
+    ax.set_title(title)
+    ax.set_ylim(0, max(1, max(values) if values else 1))
+    buf = io.BytesIO()
+    plt.tight_layout()
+    plt.savefig(buf, format="png")
+    plt.close(fig)
+    buf.seek(0)
+    return buf.getvalue()
+# -----------------------------
+# Spike pipeline (also returns milestones json)
+# -----------------------------
+def spike_pipeline(interest, weeks, title_hint):
+    ideas, milestones = build_spike(interest, weeks)
+    ics_path = export_ics(title_hint or f"{interest} Spike", milestones)
+    md = "### Spike Plan\n" + "\n".join([f"- W{m['week']}: {m['deliverable']} — {m['goal']}" for m in milestones])
+    gantt = draw_gantt(milestones)
+    return md, ideas, ics_path, gantt, json.dumps(milestones)
+# -----------------------------
+# Build PDF pipeline
+# -----------------------------
+def essay_to_pdf_pipeline(essay_json, resume_df, college_df, spike_title, milestones_json, emo_json):
+    try:
+        essay = json.loads(essay_json)
+    except Exception:
+        essay = {}
+    try:
+        mil = json.loads(milestones_json)
+    except Exception:
+        mil = [{"week":1,"goal":"Start","deliverable":"Kickoff"}]
+    try:
+        emo = json.loads(emo_json) if emo_json else {}
+    except Exception:
+        emo = {}
+    out = portfolio_pdf(essay, resume_df if isinstance(resume_df,pd.DataFrame) else pd.DataFrame(), college_df if isinstance(college_df,pd.DataFrame) else COLLEGES, spike_title or "Spike", mil, emo)
+    return out
+# -----------------------------
+# Gradio UI
+# -----------------------------
+with gr.Blocks(title="CollegeGenius HF", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎓 CollegeGenius HF – All Local, All Hugging Face\nNo API keys. Everything runs with open models from the Hugging Face Hub.")
+    with gr.Tab("📝 Essay Coach"):
+        prompt = gr.Textbox(label="Prompt (optional)", lines=2)
+        essay_text = gr.Textbox(label="Paste essay text", lines=14)
+        upload_essay = gr.File(label="Upload image/PDF (optional; OCR is local-only)")
+        btn_essay = gr.Button("Analyze Essay", variant="primary")
+        essay_md = gr.Markdown()
+        essay_plot = gr.Image(label="Essay Diagnostics")
+        essay_excerpt = gr.Textbox(label="First 1200 chars", lines=6)
+        essay_json = gr.Textbox(label="Essay JSON (for PDF)", lines=6)
+        btn_essay.click(essay_pipeline, inputs=[prompt, essay_text, upload_essay],
+                        outputs=[essay_md, essay_plot, essay_excerpt, essay_json])
+    with gr.Tab("📄 Resume & Activities"):
+        resume_text = gr.Textbox(label="Paste resume bullets (one per line)", lines=10)
+        upload_resume = gr.File(label="Upload image/PDF (optional)")
+        btn_resume = gr.Button("Analyze Resume", variant="primary")
+        resume_df = gr.Dataframe(headers=["Bullet","Score","Suggestion"], interactive=False, wrap=True)
+        resume_img = gr.Image(label="Quality Snapshot")
+        resume_md = gr.Markdown()
+        btn_resume.click(resume_pipeline, inputs=[resume_text, upload_resume],
+                         outputs=[resume_df, resume_img, resume_md])
+    with gr.Tab("🏫 College Matcher"):
+        with gr.Row():
+            gpa = gr.Textbox(label="Unweighted GPA (e.g., 3.8)")
+            sat = gr.Textbox(label="SAT (1600)")
+            act = gr.Textbox(label="ACT (36)")
+        with gr.Row():
+            interest = gr.Dropdown(["STEM","Humanities","Business","Biology","CS","Engineering"], value="STEM", label="Interest")
+            state_pref = gr.Textbox(label="State preference (optional)")
+        btn_match = gr.Button("Find Matches", variant="primary")
+        college_df = gr.Dataframe(interactive=False, wrap=True)
+        college_plot = gr.Image(label="Top Fit")
+        btn_match.click(college_pipeline, inputs=[gpa, sat, act, interest, state_pref], outputs=[college_df, college_plot])
+    with gr.Tab("🚀 Spike Planner"):
+        with gr.Row():
+            sp_interest = gr.Dropdown(list(SPIKE_IDEAS.keys()), value="AI/ML", label="Interest")
+            weeks = gr.Slider(4, 16, value=8, step=1, label="Weeks")
+            sp_title = gr.Textbox(label="Spike Title (optional)")
+        btn_spike = gr.Button("Generate Plan", variant="primary")
+        sp_md = gr.Markdown()
+        sp_ideas = gr.HighlightedText(label="Idea Starters", combine_adjacent=True)
+        sp_ics = gr.File(label="Calendar (.ics)")
+        sp_gantt = gr.Image(label="Gantt")
+        sp_json = gr.Textbox(label="Milestones JSON", lines=6)
+        btn_spike.click(spike_pipeline, inputs=[sp_interest, weeks, sp_title],
+                        outputs=[sp_md, sp_ideas, sp_ics, sp_gantt, sp_json])
+    with gr.Tab("🎤 Interview Practice"):
+        major = gr.Dropdown(list(MAJOR_QUESTIONS.keys()), value="Computer Science", label="Major")
+        behavioral = gr.Checkbox(value=True, label="Behavioral?")
+        answer = gr.Textbox(label="Your answer (type to analyze emotions)", lines=8)
+        btn_interview = gr.Button("Get Question + Analyze", variant="primary")
+        iv_q = gr.Textbox(label="Question", lines=2)
+        iv_md = gr.Markdown()
+        iv_img = gr.Image(label="Emotion Scores")
+        btn_interview.click(interview_pipeline, inputs=[major, behavioral, answer], outputs=[iv_q, iv_md, iv_img])
+    with gr.Tab("📚 Build Portfolio PDF"):
+        gr.Markdown("Combine everything into a shareable PDF.")
+        spike_title_in = gr.Textbox(label="Spike Title", value="My Spike")
+        emo_json = gr.Textbox(label="Emotions JSON (from Interview tab; optional)", lines=4)
+        btn_pdf = gr.Button("Build PDF", variant="primary")
+        pdf_out = gr.File()
+        btn_pdf.click(essay_to_pdf_pipeline, inputs=[essay_json, resume_df, college_df, spike_title_in, sp_json, emo_json], outputs=[pdf_out])
+app = demo
+if __name__ == "__main__":
+    demo.queue(concurrency_count=2, max_size=32).launch()