Spaces:

dejanseo
/

Query-Quality-Classifier

Running

App Files Files Community

dejanseo commited on Aug 31

Commit

8418d26

•

1 Parent(s): 9da4402

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -61

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import AlbertTokenizer, AlbertForSequenceClassification, AlbertConfig
 import plotly.graph_objects as go
 # URL of the logo
@@ -11,7 +11,9 @@ st.logo(logo_url, link="https://dejan.ai")
 # Streamlit app title and description
 st.title("Search Query Form Classifier")
-st.write("Ambiguous search queries are candidates for query expansion. Our model identifies such queries with an 80 percent accuracy and is deployed in a batch processing pipeline directly connected with Google Search Console API. In this demo you can test the model capability by testing individual queries.")
 st.write("Enter a query to check if it's well-formed:")
 # Load the model and tokenizer from the Hugging Face Model Hub
@@ -19,66 +21,120 @@ model_name = 'dejanseo/Query-Quality-Classifier'
 tokenizer = AlbertTokenizer.from_pretrained(model_name)
 model = AlbertForSequenceClassification.from_pretrained(model_name)
-# Set the model to evaluation mode
 model.eval()
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
-# User input
-user_input = st.text_input("Query:", "What is?")
-st.write("Developed by [Dejan AI](https://dejan.ai/blog/search-query-quality-classifier/)")
-def classify_query(query):
-    # Tokenize input
-    inputs = tokenizer.encode_plus(
-        query,
-        add_special_tokens=True,
-        max_length=32,
-        padding='max_length',
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors='pt'
-    )
-    input_ids = inputs['input_ids'].to(device)
-    attention_mask = inputs['attention_mask'].to(device)
-    # Perform inference
-    with torch.no_grad():
-        outputs = model(input_ids, attention_mask=attention_mask)
-        logits = outputs.logits
-        softmax_scores = torch.softmax(logits, dim=1).cpu().numpy()[0]
-        confidence = softmax_scores[1] * 100  # Confidence for well-formed class
-    return confidence
-# Check and display classification
-if user_input:
-    confidence = classify_query(user_input)
-    # Plotly gauge
-    fig = go.Figure(go.Indicator(
-        mode="gauge+number",
-        value=confidence,
-        title={'text': "Well-formedness Confidence"},
-        gauge={
-            'axis': {'range': [0, 100]},
-            'bar': {'color': "darkblue"},
-            'steps': [
-                {'range': [0, 50], 'color': "red"},
-                {'range': [50, 100], 'color': "green"}
-            ],
-            'threshold': {
-                'line': {'color': "black", 'width': 4},
-                'thickness': 0.75,
-                'value': confidence
-            }
-        }
-    ))
-    st.plotly_chart(fig)
-    if confidence >= 50:
-        st.success(f"The query is likely well-formed with {confidence:.2f}% confidence.")
-    else:
-        st.error(f"The query is likely not well-formed with {100 - confidence:.2f}% confidence.")

 import streamlit as st
 import torch
+from transformers import AlbertTokenizer, AlbertForSequenceClassification
 import plotly.graph_objects as go
 # URL of the logo
 # Streamlit app title and description
 st.title("Search Query Form Classifier")
+st.write(
+    "Ambiguous search queries are candidates for query expansion. Our model identifies such queries with an 80 percent accuracy and is deployed in a batch processing pipeline directly connected with Google Search Console API. In this demo you can test the model capability by testing individual queries."
+)
 st.write("Enter a query to check if it's well-formed:")
 # Load the model and tokenizer from the Hugging Face Model Hub
 tokenizer = AlbertTokenizer.from_pretrained(model_name)
 model = AlbertForSequenceClassification.from_pretrained(model_name)
+# Set the model to evaluation mode
 model.eval()
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
+# Create tabs for single and bulk queries
+tab1, tab2 = st.tabs(["Single Query", "Bulk Query"])
+with tab1:
+    user_input = st.text_input("Query:", "What is?")
+    #st.write("Developed by [Dejan AI](https://dejan.ai/blog/search-query-quality-classifier/)")
+    def classify_query(query):
+        # Tokenize input
+        inputs = tokenizer.encode_plus(
+            query,
+            add_special_tokens=True,
+            max_length=32,
+            padding='max_length',
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors='pt'
+        )
+        input_ids = inputs['input_ids'].to(device)
+        attention_mask = inputs['attention_mask'].to(device)
+        # Perform inference
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask=attention_mask)
+            logits = outputs.logits
+            softmax_scores = torch.softmax(logits, dim=1).cpu().numpy()[0]
+            confidence = softmax_scores[1] * 100  # Confidence for well-formed class
+        return confidence
+    # Function to determine color based on confidence
+    def get_color(confidence):
+        if confidence < 50:
+            return 'rgba(255, 51, 0, 0.8)'  # Red
+        else:
+            return 'rgba(57, 172, 57, 0.8)'  # Green
+    # Check and display classification for single query
+    if user_input:
+        confidence = classify_query(user_input)
+        # Plotly grey placeholder bar with dynamic color fill
+        fig = go.Figure()
+        # Placeholder grey bar
+        fig.add_trace(go.Bar(
+            x=[100],
+            y=['Well-formedness Factor'],
+            orientation='h',
+            marker=dict(
+                color='lightgrey'
+            ),
+            width=0.8
+        ))
+        # Colored bar based on confidence
+        fig.add_trace(go.Bar(
+            x=[confidence],
+            y=['Well-formedness Factor'],
+            orientation='h',
+            marker=dict(
+                color=get_color(confidence)
+            ),
+            width=0.8
+        ))
+        fig.update_layout(
+            xaxis=dict(range=[0, 100], title='Well-formedness Factor'),
+            yaxis=dict(showticklabels=False),
+            width=600,
+            height=250,  # Increase height for better visibility
+            title_text='Well-formedness Factor',
+            plot_bgcolor='rgba(0,0,0,0)',
+            showlegend=False
+        )
+        st.plotly_chart(fig)
+        if confidence >= 50:
+            st.success(f"Query Score: {confidence:.2f}% Most likely doesn't require query expansion.")
+            st.subheader(f":sparkles: What's next?", divider="gray")
+            st.write("Connect with Google Search Console, Semrush, Ahrefs or any other search query source API and detect all queries which could benefit from expansion.")
+            st.write("[Engage our team](https://dejan.ai/call/) if you'd like us to do this for you.")
+        else:
+            st.error(f"The query is likely not well-formed with a score of {100 - confidence:.2f}% and most likely requires query expansion.")
+            st.subheader(f":sparkles: What's next?", divider="gray")
+            st.write("Connect with Google Search Console, Semrush, Ahrefs or any other search query source API and detect all queries which could benefit from expansion.")
+            st.write("[Engage our team](https://dejan.ai/call/) if you'd like us to do this for you.")
+with tab2:
+    st.write("Paste multiple queries line-separated (no headers or extra data):")
+    bulk_input = st.text_area("Bulk Queries:", height=200)
+    if bulk_input:
+        bulk_queries = bulk_input.splitlines()
+        st.write("Processing queries...")
+        # Classify each query in bulk input
+        results = [(query, classify_query(query)) for query in bulk_queries]
+        # Display results in a table
+        for query, confidence in results:
+            st.write(f"Query: {query} - Score: {confidence:.2f}%")
+            if confidence >= 50:
+                st.success("Well-formed")
+            else:
+                st.error("Not well-formed")
+        st.subheader(f":sparkles: What's next?", divider="gray")
+        st.write("Connect with Google Search Console, Semrush, Ahrefs or any other search query source API and detect all queries which could benefit from expansion.")
+        st.write("[Engage our team](https://dejan.ai/call/) if you'd like us to do this for you.")