Spaces:

ssocean
/

Newborn_Article_Impact_Predict

Running on Zero

App Files Files Community

ssocean commited on Dec 5, 2024

Commit

1fd1dd9

verified ·

1 Parent(s): 83d6a82

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -39

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch.nn.functional as F
 import torch.nn as nn
 import re
-model_path = "ssocean/NAIP"  # 更换为你的模型路径
 model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=1, load_in_8bit=True)
 tokenizer = AutoTokenizer.from_pretrained(model_path)
@@ -14,45 +13,17 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model.eval()
-def validate_input(title, abstract):
-    """验证输入是否符合要求"""
-    # 黑名单：屏蔽非拉丁字符
-    non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
-    if len(title.split(' '))<4:
-        return False, "The title must be at least 3 words long."
-    if len(abstract.split(' ')) < 50:
-        return False, "The abstract must be at least 50 words long."
-    if len((title + abstract).split(' '))>1024:
-        return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
-    if non_latin_pattern.search(title):
-        return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
-    if non_latin_pattern.search(abstract):
-        return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
-    return True, "Inputs are valid! Good to go!"
-def update_button_status(title, abstract):
-    """根据输入内容动态更新按钮状态"""
-    valid, message = validate_input(title, abstract)
-    if not valid:
-        return gr.update(value="Error: " + message), gr.update(interactive=False)
-    return gr.update(value=message), gr.update(interactive=True)
 def predict(title, abstract):
     text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):'''
     inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs.to(device))
-    probability = torch.sigmoid(outputs.logits).item() + 0.05
     # reason for +0.05: We observed that the predicted values in the web demo are generally around 0.05 lower than those in the local deployment (due to differences in software/hardware environments). Therefore, we applied the following compensation in the web demo. Please do not use this in the local deployment.
-    # Clamp the value to ensure it is between 0 and 1 (for probabilities)
-    clamped_probability = torch.clamp(probability, min=0.0, max=1.0)
-    # Return the clamped probability, rounded to 4 decimal places
-    return round(clamped_probability, 4)
 # 示例数据
@@ -106,6 +77,31 @@ examples = [
     ]
 ]
 # 创建 Gradio 界面
 with gr.Blocks() as iface:
     gr.Markdown("""
@@ -160,10 +156,6 @@ with gr.Blocks() as iface:
     - Predicted impact is a probabilistic value generated by the model and does not reflect paper quality or novelty.
     - The author takes no responsibility for the prediction results.
     - To identify potentially impactful papers, this study uses the sigmoid+MSE approach to optimize NDCG values (over sigmoid+BCE), resulting in predicted values concentrated between 0.1 and 0.9 due to the sigmoid gradient effect.
-    - Generally, it is considered a predicted influence score greater than 0.65 to indicate an exceptionally impactful paper.
     """)
 iface.launch()

 import torch.nn.functional as F
 import torch.nn as nn
 import re
+model_path = r'ssocean/NAIP'
 model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=1, load_in_8bit=True)
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model.eval()
 def predict(title, abstract):
     text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):'''
     inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs.to(device))
+    probability = torch.sigmoid(outputs.logits).item()
     # reason for +0.05: We observed that the predicted values in the web demo are generally around 0.05 lower than those in the local deployment (due to differences in software/hardware environments). Therefore, we applied the following compensation in the web demo. Please do not use this in the local deployment.
+    if probability + 0.05 >=1.0:
+        return round(1, 4)
+    return round(probability + 0.05, 4)
 # 示例数据
     ]
 ]
+def validate_input(title, abstract):
+    """验证输入是否符合要求"""
+    # 黑名单：屏蔽非拉丁字符
+    non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
+    if len(title.split(' '))<4:
+        return False, "The title must be at least 3 words long."
+    if len(abstract.split(' ')) < 50:
+        return False, "The abstract must be at least 50 words long."
+    if len((title + abstract).split(' '))>1024:
+        return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
+    if non_latin_pattern.search(title):
+        return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
+    if non_latin_pattern.search(abstract):
+        return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
+    return True, "Inputs are valid! Good to go!"
+def update_button_status(title, abstract):
+    """根据输入内容动态更新按钮状态"""
+    valid, message = validate_input(title, abstract)
+    if not valid:
+        return gr.update(value="Error: " + message), gr.update(interactive=False)
+    return gr.update(value=message), gr.update(interactive=True)
 # 创建 Gradio 界面
 with gr.Blocks() as iface:
     gr.Markdown("""
     - Predicted impact is a probabilistic value generated by the model and does not reflect paper quality or novelty.
     - The author takes no responsibility for the prediction results.
     - To identify potentially impactful papers, this study uses the sigmoid+MSE approach to optimize NDCG values (over sigmoid+BCE), resulting in predicted values concentrated between 0.1 and 0.9 due to the sigmoid gradient effect.
+    - Generally, it is considered a predicted influence score greater than 0.65 to indicate an impactful paper.
     """)
 iface.launch()