import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

model_path = "ssocean/NAIP"  # 更换为你的模型路径
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=1, load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

device = "cuda" if torch.cuda.is_available() else "cpu"

model.eval()

def predict(title, abstract):
    # 将标题和摘要处理为一个单一的字符串
    text = f"Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):"
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs.to(device))
    # 应用 Sigmoid 函数来获取概率输出
    probability = torch.sigmoid(outputs.logits).item()
    return {"Predicted Impact": round(probability, 4)}

# 创建 Gradio 界面
iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter Paper Title Here...", label="Paper Title"),
        gr.Textbox(lines=5, placeholder="Enter Paper Abstract Here... (Do not input line breaks. No more than 1024 tokens.)", label="Paper Abstract")
    ],
    outputs=gr.Label(label="Predicted Impact"),
    title="Predict academic impact with LLMs",
    description="Predict the normalized academic impact of a paper based on its title and abstract. Please note that the predicted impact is a probabilistic value generated by the model and does not accurately reflect the article's future citation performance. It should not be associated with writing quality, novelty, or other attributes. The author assumes no responsibility for the predictive metrics."
)

iface.launch()