Spaces:

Jiahuita
/

News_Classification

Sleeping

App Files Files Community

Jiahuita commited on Dec 5, 2024

Commit

60dc372

1 Parent(s): aef9aa2

initial commit

Browse files

Files changed (6) hide show

Dockerfile +22 -0
README copy.md +97 -0
app.py +87 -0
news_classifier.h5 +3 -0
requirements.txt +7 -0
tokenizer.json +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.9-slim
+WORKDIR /code
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Expose the port the app runs on
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README copy.md ADDED Viewed

	@@ -0,0 +1,97 @@

+---
+title: News Source Classifier
+emoji: 📰
+colorFrom: blue
+colorTo: red
+sdk: fastapi
+sdk_version: 0.95.2
+app_file: app.py
+pinned: false
+language: en
+license: mit
+tags:
+- text-classification
+- news-classification
+- LSTM
+- tensorflow
+pipeline_tag: text-classification
+widget:
+- example_title: "Crime News Headline"
+  text: "Wife of murdered Minnesota pastor hired 3 men to kill husband after affair: police"
+- example_title: "Science News Headline"
+  text: "Scientists discover breakthrough in renewable energy research"
+- example_title: "Political News Headline"
+  text: "Presidential candidates face off in heated debate over climate policies"
+model-index:
+- name: News Source Classifier
+  results:
+  - task:
+      type: text-classification
+      name: Text Classification
+    dataset:
+      name: Custom Dataset
+      type: Custom
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.82
+---
+# News Source Classifier
+This model classifies news headlines as either Fox News or NBC News using an LSTM neural network.
+## Model Description
+- **Model Architecture**: LSTM Neural Network
+- **Input**: News headlines (text)
+- **Output**: Binary classification (Fox News vs NBC)
+- **Training Data**: Large collection of headlines from both news sources
+- **Performance**: Achieves approximately 82% accuracy on the test set
+## Usage
+You can use this model directly with a FastAPI endpoint:
+```python
+import requests
+response = requests.post(
+    "https://huggingface.co/Jiahuita/NewsSourceClassification",
+    json={"text": "Your news headline here"}
+)
+print(response.json())
+```
+Or use it locally:
+```python
+from transformers import pipeline
+classifier = pipeline("text-classification", model="Jiahuita/NewsSourceClassification")
+result = classifier("Your news headline here")
+print(result)
+```
+Example response:
+```json
+{
+    "label": "foxnews",
+    "score": 0.875
+}
+```
+## Limitations and Bias
+This model has been trained on news headlines from specific sources and time periods, which may introduce certain biases. Users should be aware of these limitations when using the model.
+## Training
+The model was trained using:
+- TensorFlow 2.13.0
+- LSTM architecture
+- Binary cross-entropy loss
+- Adam optimizer
+## License
+This project is licensed under the MIT License.

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.text import tokenizer_from_json
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import numpy as np
+import json
+from typing import Union, List
+app = FastAPI()
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+def load_model_and_tokenizer():
+    global model, tokenizer
+    try:
+        model = load_model('news_classifier.h5')
+        with open('tokenizer.json', 'r') as f:
+            tokenizer_data = json.load(f)
+            tokenizer = tokenizer_from_json(tokenizer_data)
+    except Exception as e:
+        print(f"Error loading model or tokenizer: {str(e)}")
+        raise e
+# Load on startup
+load_model_and_tokenizer()
+class PredictionInput(BaseModel):
+    text: Union[str, List[str]]
+class PredictionOutput(BaseModel):
+    label: str
+    score: float
+@app.get("/")
+def read_root():
+    return {
+        "message": "News Source Classifier API",
+        "model_type": "LSTM",
+        "version": "1.0",
+        "status": "ready" if model and tokenizer else "not_loaded"
+    }
+@app.post("/predict", response_model=Union[PredictionOutput, List[PredictionOutput]])
+async def predict(input_data: PredictionInput):
+    if not model or not tokenizer:
+        try:
+            load_model_and_tokenizer()
+        except Exception as e:
+            raise HTTPException(status_code=500, detail="Model not loaded")
+    try:
+        # Handle both single string and list inputs
+        texts = input_data.text if isinstance(input_data.text, list) else [input_data.text]
+        # Preprocess
+        sequences = tokenizer.texts_to_sequences(texts)
+        padded = pad_sequences(sequences, maxlen=41)  # Match your model's input length
+        # Get predictions
+        predictions = model.predict(padded, verbose=0)
+        # Process results
+        results = []
+        for pred in predictions:
+            label = "foxnews" if pred[1] > 0.5 else "nbc"
+            score = float(pred[1] if label == "foxnews" else 1 - pred[1])
+            results.append({
+                "label": label,
+                "score": score
+            })
+        # Return single result if input was single string
+        return results[0] if isinstance(input_data.text, str) else results
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/reload")
+async def reload_model():
+    try:
+        load_model_and_tokenizer()
+        return {"message": "Model reloaded successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

news_classifier.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9258ee4d92199555974374b569634e73ad0d2b059d3b7125f3b75c2144528f4
+size 117315152

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+tensorflow>=2.10.0
+fastapi>=0.68.0
+uvicorn>=0.15.0
+pydantic>=1.8.2
+numpy>=1.19.2
+python-multipart
+scikit-learn>=0.24.2

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff