Spaces:

drrobot9
/

FARMLINGUA_AI_BRAIN

Build error

App Files Files Community

drrobot9 commited on 9 days ago

Commit

aec5884

verified ·

1 Parent(s): d86390c

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.dockerignore +0 -0
.dockerigore +0 -0
.gitattributes +4 -0
Dockerfile +38 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/agents/__init__.py +0 -0
app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
app/agents/__pycache__/crew_pipeline.cpython-311.pyc +0 -0
app/agents/crew_pipeline.py +138 -0
app/main.py +72 -0
app/models/__init__.py +0 -0
app/models/intent_classifier_v2.joblib +3 -0
app/tasks/__init__.py +0 -0
app/tasks/__pycache__/__init__.cpython-311.pyc +0 -0
app/tasks/__pycache__/rag_updater.cpython-311.pyc +0 -0
app/tasks/rag_updater.py +141 -0
app/utils/__init__.py +0 -0
app/utils/__pycache__/__init__.cpython-311.pyc +0 -0
app/utils/__pycache__/config.cpython-311.pyc +0 -0
app/utils/config.py +41 -0
app/vectorstore/__init__.py +0 -0
app/vectorstore/faiss_index/index.faiss +3 -0
app/vectorstore/faiss_index/index.pkl +3 -0
app/vectorstore/live_rag_index/index.faiss +0 -0
app/vectorstore/live_rag_index/index.pkl +3 -0
app/venv/bin/python +3 -0
app/venv/bin/python3 +3 -0
app/venv/bin/python3.11 +3 -0
app/venv/pyvenv.cfg +5 -0
requirements.txt +18 -0

.dockerignore ADDED Viewed

File without changes

.dockerigore ADDED Viewed

File without changes

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+app/vectorstore/faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3 filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3.11 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# Use a lightweight Python base
+FROM python:3.10-slim
+# Prevent interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    HF_HOME=/root/.cache/huggingface
+# Set work directory
+WORKDIR /code
+# Install system dependencies (needed for PyTorch + FAISS + SentenceTransformers)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    curl \
+    libopenblas-dev \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download Hugging Face models at build time
+RUN python -c "from transformers import pipeline; pipeline('text-generation', model='NousResearch/Nous-Hermes-2-Mistral-7B-DPO')" \
+ && python -c "from transformers import pipeline; pipeline('text2text-generation', model='google/flan-t5-large')" \
+ && python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')"
+# Copy project files
+COPY . .
+# Expose port for FastAPI / Hugging Face Spaces
+EXPOSE 7860
+# Run FastAPI app with uvicorn
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (166 Bytes). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.31 kB). View file

app/agents/__init__.py ADDED Viewed

File without changes

app/agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (173 Bytes). View file

app/agents/__pycache__/crew_pipeline.cpython-311.pyc ADDED Viewed

Binary file (8.73 kB). View file

app/agents/crew_pipeline.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# farmlingua/app/agents/crew_pipeline.py333
+import os
+import sys
+import requests
+import joblib
+import faiss
+import numpy as np
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+from app.utils import config
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+DEVICE = 0 if os.environ.get("CUDA_VISIBLE_DEVICES") else -1
+try:
+    classifier = joblib.load(config.CLASSIFIER_PATH)
+except Exception:
+    classifier = None
+print(f"Loading expert model ({config.EXPERT_MODEL_NAME})...")
+expert_pipeline = pipeline(
+    "text-generation",
+    model=config.EXPERT_MODEL_NAME,
+    device=DEVICE,
+    max_new_tokens=700,
+    temperature=0.3,
+    repetition_penalty=1.1
+)
+print(f"Loading formatter/weather model ({config.FORMATTER_MODEL_NAME})...")
+formatter_pipeline = pipeline(
+    "text2text-generation",
+    model=config.FORMATTER_MODEL_NAME,
+    device=DEVICE
+)
+embedder = SentenceTransformer(config.EMBEDDING_MODEL)
+def retrieve_docs(query, vs_path):
+    if not vs_path or not os.path.exists(vs_path):
+        return None
+    if os.path.isdir(vs_path):
+        try:
+            from langchain.vectorstores import FAISS as LCFAISS
+            from langchain.embeddings import SentenceTransformerEmbeddings
+            embed_model = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
+            vs = LCFAISS.load_local(str(vs_path), embed_model, allow_dangerous_deserialization=True)
+            docs = vs.similarity_search(query, k=3)
+            return "\n\n".join(d.page_content for d in docs) if docs else None
+        except Exception:
+            return None
+    try:
+        index = faiss.read_index(str(vs_path))
+    except Exception:
+        return None
+    query_vec = np.array([embedder.encode(query)], dtype=np.float32)
+    D, I = index.search(query_vec, k=3)
+    if D[0][0] == 0:
+        return None
+    meta_path = str(vs_path) + "_meta.npy"
+    if os.path.exists(meta_path):
+        metadata = np.load(meta_path, allow_pickle=True).item()
+        docs = [metadata.get(str(idx), "") for idx in I[0] if str(idx) in metadata]
+        docs = [doc for doc in docs if doc]
+        return "\n\n".join(docs) if docs else None
+    return None
+def get_weather(state_name):
+    url = "http://api.weatherapi.com/v1/current.json"
+    params = {
+        "key": config.WEATHER_API_KEY,
+        "q": f"{state_name}, Nigeria",
+        "aqi": "no"
+    }
+    r = requests.get(url, params=params)
+    if r.status_code != 200:
+        return f"Unable to retrieve weather for {state_name}."
+    data = r.json()
+    return (
+        f"Weather in {state_name}:\n"
+        f"- Condition: {data['current']['condition']['text']}\n"
+        f"- Temperature: {data['current']['temp_c']}°C\n"
+        f"- Humidity: {data['current']['humidity']}%\n"
+        f"- Wind: {data['current']['wind_kph']} kph"
+    )
+def detect_intent(query):
+    q_lower = query.lower()
+    if any(word in q_lower for word in ["weather", "temperature", "rain", "forecast"]):
+        for state in config.STATES:
+            if state.lower() in q_lower:
+                return "weather", state
+        return "weather", None
+    if any(word in q_lower for word in ["latest", "update", "breaking", "news", "current", "predict"]):
+        return "live_update", None
+    if hasattr(classifier, "predict") and hasattr(classifier, "predict_proba"):
+        predicted_intent = classifier.predict([query])[0]
+        confidence = max(classifier.predict_proba([query])[0])
+        if confidence < config.CLASSIFIER_CONFIDENCE_THRESHOLD:
+            return "low_confidence", None
+        return predicted_intent, None
+    return "normal", None
+def run_pipeline(user_query: str):
+    intent, extra = detect_intent(user_query)
+    if intent == "weather" and extra:
+        weather_text = get_weather(extra)
+        return formatter_pipeline(weather_text, max_length=256, do_sample=False)[0]["generated_text"]
+    if intent == "live_update":
+        context = retrieve_docs(user_query, config.LIVE_VS_PATH)
+        if context:
+            user_query += f"\n\nLatest agricultural updates:\n{context}"
+    if intent == "low_confidence":
+        context = retrieve_docs(user_query, config.STATIC_VS_PATH)
+        if context:
+            user_query += f"\n\nReference information:\n{context}"
+    expert_response = expert_pipeline(
+        f"Provide a detailed agricultural answer for: {user_query}",
+        max_new_tokens=700,
+        temperature=0.3
+    )[0]['generated_text']
+    formatted_response = formatter_pipeline(
+        f"Format the following answer to be clear, structured, and easy to understand for Nigerian farmers:\n\n{expert_response}",
+        max_length=512,
+        do_sample=False
+    )[0]['generated_text']
+    return formatted_response

app/main.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# farmlingua_backend/app/main.py
+import os
+import sys
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+   sys.path.insert(0, BASE_DIR)
+import logging
+from fastapi import FastAPI, Body
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+from app.tasks.rag_updater import schedule_updates
+from app.utils import config
+from app.agents.crew_pipeline import run_pipeline
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+app = FastAPI(
+    title="FarmLingua Backend",
+    description="Backend service for FarmLingua with RAG updates and CrewAI pipeline",
+    version="1.0.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=config.ALLOWED_ORIGINS if hasattr(config, "ALLOWED_ORIGINS") else ["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.on_event("startup")
+def startup_event():
+    """Start scheduled RAG updates when the app launches."""
+    logging.info(" Starting FarmLingua backend...")
+    schedule_updates()
+@app.get("/")
+def home():
+    return {
+        "status": "FarmLingua backend running",
+        "version": "1.0.0",
+        "vectorstore_path": config.VECTORSTORE_PATH
+    }
+@app.post("/ask")
+def ask_farmbot(query: str = Body(..., embed=True)):
+    """
+    Ask the FarmLingua AI a question.
+    Uses the crew_pipeline to process and return a detailed, farmer-friendly answer.
+    """
+    logging.info(f"Received query: {query}")
+    answer = run_pipeline(query)
+    return {"query": query, "answer": answer}
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=getattr(config, "PORT", 7860),
+        reload=bool(getattr(config, "DEBUG", False))
+    )

app/models/__init__.py ADDED Viewed

File without changes

app/models/intent_classifier_v2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffeda9eeb604a1a24ef64e774eb6b503ead5eae6ad3b043401033040a4309405
+size 39296294

app/tasks/__init__.py ADDED Viewed

File without changes

app/tasks/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/tasks/__pycache__/rag_updater.cpython-311.pyc ADDED Viewed

Binary file (8.43 kB). View file

app/tasks/rag_updater.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# farmlingua_backend/app/tasks/rag_updater.py
+import os
+import sys
+from datetime import datetime, date
+import logging
+import requests
+from bs4 import BeautifulSoup
+from apscheduler.schedulers.background import BackgroundScheduler
+from langchain.vectorstores import FAISS
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from app.utils import config
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+session = requests.Session()
+def fetch_weather_now():
+    """Fetch current weather for all configured states."""
+    docs = []
+    for state in config.STATES:
+        try:
+            url = "http://api.weatherapi.com/v1/current.json"
+            params = {
+                "key": config.WEATHER_API_KEY,
+                "q": f"{state}, Nigeria",
+                "aqi": "no"
+            }
+            res = session.get(url, params=params, timeout=10)
+            res.raise_for_status()
+            data = res.json()
+            if "current" in data:
+                condition = data['current']['condition']['text']
+                temp_c = data['current']['temp_c']
+                humidity = data['current']['humidity']
+                text = (
+                    f"Weather in {state}: {condition}, "
+                    f"Temperature: {temp_c}°C, Humidity: {humidity}%"
+                )
+                docs.append(Document(
+                    page_content=text,
+                    metadata={
+                        "source": "WeatherAPI",
+                        "location": state,
+                        "timestamp": datetime.utcnow().isoformat()
+                    }
+                ))
+        except Exception as e:
+            logging.error(f"Weather fetch failed for {state}: {e}")
+    return docs
+def fetch_harvestplus_articles():
+    """Fetch ALL today's articles from HarvestPlus site."""
+    try:
+        res = session.get(config.DATA_SOURCES["harvestplus"], timeout=10)
+        res.raise_for_status()
+        soup = BeautifulSoup(res.text, "html.parser")
+        articles = soup.find_all("article")
+        docs = []
+        today_str = date.today().strftime("%Y-%m-%d")
+        for a in articles:
+            content = a.get_text(strip=True)
+            if content and len(content) > 100:
+                if today_str in a.text or True:
+                    docs.append(Document(
+                        page_content=content,
+                        metadata={
+                            "source": "HarvestPlus",
+                            "timestamp": datetime.utcnow().isoformat()
+                        }
+                    ))
+        return docs
+    except Exception as e:
+        logging.error(f"HarvestPlus fetch failed: {e}")
+        return []
+def build_rag_vectorstore(reset=False):
+    job_type = "FULL REBUILD" if reset else "INCREMENTAL UPDATE"
+    logging.info(f"RAG update started — {job_type}")
+    all_docs = fetch_weather_now() + fetch_harvestplus_articles()
+    logging.info(f"Weather docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'WeatherAPI'])}")
+    logging.info(f"News docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'HarvestPlus'])}")
+    if not all_docs:
+        logging.warning("No documents fetched, skipping update")
+        return
+    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
+    chunks = splitter.split_documents(all_docs)
+    embedder = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
+    vectorstore_path = config.LIVE_VS_PATH
+    if reset and os.path.exists(vectorstore_path):
+        for file in os.listdir(vectorstore_path):
+            file_path = os.path.join(vectorstore_path, file)
+            try:
+                os.remove(file_path)
+                logging.info(f"Deleted old file: {file_path}")
+            except Exception as e:
+                logging.error(f"Failed to delete {file_path}: {e}")
+    if os.path.exists(vectorstore_path) and not reset:
+        vs = FAISS.load_local(
+            vectorstore_path,
+            embedder,
+            allow_dangerous_deserialization=True
+        )
+        vs.add_documents(chunks)
+    else:
+        vs = FAISS.from_documents(chunks, embedder)
+    os.makedirs(vectorstore_path, exist_ok=True)
+    vs.save_local(vectorstore_path)
+    logging.info(f"Vectorstore updated at {vectorstore_path}")
+def schedule_updates():
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(build_rag_vectorstore, 'interval', hours=12, kwargs={"reset": False})
+    scheduler.add_job(build_rag_vectorstore, 'interval', days=7, kwargs={"reset": True})
+    scheduler.start()
+    logging.info("Scheduler started — 12-hour incremental updates + weekly full rebuild")
+    return scheduler

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/utils/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

app/utils/config.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# farmlingua_backend/app/utils/config.py
+from pathlib import Path
+import os
+import sys
+BASE_DIR = Path(__file__).resolve().parents[2]
+if str(BASE_DIR) not in sys.path:
+    sys.path.insert(0, str(BASE_DIR))
+EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+STATIC_VS_PATH = BASE_DIR / "app" / "vectorstore" / "faiss_index"
+LIVE_VS_PATH = BASE_DIR / "app" / "vectorstore" / "live_rag_index"
+VECTORSTORE_PATH = LIVE_VS_PATH
+WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "d72aad0c21c74cf897b105451250408")
+CLASSIFIER_PATH = BASE_DIR / "app" / "models" / "intent_classifier_v2.joblib"
+CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHOLD", "0.6"))
+EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "NousResearch/Nous-Hermes-2-Mistral-7B-DPO")
+FORMATTER_MODEL_NAME = os.getenv("FORMATTER_MODEL_NAME", "google/flan-t5-large")
+DATA_SOURCES = {
+    "harvestplus": "https://agronigeria.ng/category/news/",
+}
+STATES = [
+    "Abuja", "Lagos", "Kano", "Kaduna", "Rivers", "Enugu", "Anambra", "Ogun",
+    "Oyo", "Delta", "Edo", "Katsina", "Borno", "Benue", "Niger", "Plateau",
+    "Bauchi", "Adamawa", "Cross River", "Akwa Ibom", "Ekiti", "Osun", "Ondo",
+    "Imo", "Abia", "Ebonyi", "Taraba", "Kebbi", "Zamfara", "Yobe", "Gombe",
+    "Sokoto", "Kogi", "Bayelsa", "Nasarawa", "Jigawa"
+]

app/vectorstore/__init__.py ADDED Viewed

File without changes

app/vectorstore/faiss_index/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4faefcc68ae5a575b18f559e04cd2c68e166a73c4c89c9550e1794ccbf90695
+size 19648557

app/vectorstore/faiss_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1c75f31eab757e90e9c9771b62368c2de5dc11ed776629521fb007d8d47b84a
+size 5863908

app/vectorstore/live_rag_index/index.faiss ADDED Viewed

Binary file (70.7 kB). View file

app/vectorstore/live_rag_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:466653741f0cbbcbb51c817af910e5ca03c769e9009b3e3bf0f6fdcad71393b1
+size 12074

app/venv/bin/python ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6576b9698f70099339119b06d3c9765067c335cc72a203b20aa7480b57293839
+size 6727416

app/venv/bin/python3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6576b9698f70099339119b06d3c9765067c335cc72a203b20aa7480b57293839
+size 6727416

app/venv/bin/python3.11 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6576b9698f70099339119b06d3c9765067c335cc72a203b20aa7480b57293839
+size 6727416

app/venv/pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = /usr/bin
+include-system-site-packages = false
+version = 3.11.13
+executable = /usr/bin/python3.11
+command = /usr/bin/python3 -m venv /content/drive/MyDrive/farmlingua_backend/app/venv

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+crewai
+langchain
+langchain-community
+faiss-cpu
+transformers
+sentence-transformers
+pydantic
+joblib
+pyyaml
+torch
+fastapi
+uvicorn
+apscheduler
+numpy
+requests
+beautifulsoup4
+huggingface-hub
+python-dotenv