Spaces:

shah1zil
/

AgrI_Assistant

Runtime error

File size: 4,861 Bytes

import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights, load_checkpoint_and_dispatch


# Set up environment variables
os.environ["GROQ_API_KEY"] = "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"

# Initialize the Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Load the Whisper model
whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium", "large"

# Initialize the tokenizer and model from the saved checkpoint for RAG
# Updated model loading code with disk offloading


# Specify the folder where offloaded model parts will be stored
offload_folder = "./offload"


# Specify the folder where offloaded model parts will be stored
offload_folder = "./offload"

# Ensure the offload folder exists
os.makedirs(offload_folder, exist_ok=True)

# Initialize the tokenizer
rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")

# Initialize empty weights context
with init_empty_weights():
    # Load the model with meta tensors
    rag_model = AutoModelForCausalLM.from_pretrained(
        "himmeow/vi-gemma-2b-RAG",
        torch_dtype=torch.bfloat16,
        device_map="auto",
        offload_folder=offload_folder
    )

# Dispatch the model, ensuring correct device placement and weight loading
rag_model = load_checkpoint_and_dispatch(
    rag_model,
    "himmeow/vi-gemma-2b-RAG",
    device_map="auto",
    offload_folder=offload_folder,
    offload_state_dict=True
)

# Ensure weights are properly tied if necessary
if hasattr(rag_model, 'tie_weights'):
    rag_model.tie_weights()

# Use `to_empty()` to move the model out of the meta state correctly
rag_model = rag_model.to_empty()

# Move model to GPU if available
if torch.cuda.is_available():
    rag_model = rag_model.to("cuda")

# Load PDF content
def load_pdf(pdf_path):
    pdf_text = ""
    with open(pdf_path, "rb") as file:
        reader = PdfReader(file)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text = page.extract_text()
            pdf_text += text + "\n"
    return pdf_text

# Define the prompt format for the RAG model
prompt_template = """
### Instruction and Input:
Based on the following context/document:
{}
Please answer the question: {}

### Response:
{}
"""

# Function to process audio and generate a response using RAG and Groq
def process_audio_rag(file_path):
    try:
        # Load and transcribe the audio using Whisper
        audio = whisper.load_audio(file_path)
        result = whisper_model.transcribe(audio)
        text = result["text"]

        # Load the PDF content (update with your PDF path or pass it as an argument)
        pdf_path = "/content/BN_Cotton.pdf"
        pdf_text = load_pdf(pdf_path)

        # Prepare the input data for the RAG model
        query = text
        input_text = prompt_template.format(pdf_text, query, " ")

        # Encode the input text into input ids for RAG model
        input_ids = rag_tokenizer(input_text, return_tensors="pt")
        if torch.cuda.is_available():
            input_ids = input_ids.to("cuda")

        # Generate text using the RAG model
        outputs = rag_model.generate(
            **input_ids,
            max_new_tokens=500,
            no_repeat_ngram_size=5
        )
        rag_response = rag_tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Generate a response using Groq if needed
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": rag_response}],
            model="llama3-8b-8192",  # Replace with the correct model if necessary
        )
        response_message = chat_completion.choices[0].message.content.strip()

        # Convert the response text to speech
        tts = gTTS(response_message)
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)
        response_audio_io.seek(0)

        # Save audio to a file to ensure it's generated correctly
        with open("response.mp3", "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        # Return the response text and the path to the saved audio file
        return response_message, "response.mp3"

    except Exception as e:
        return f"An error occurred: {e}", None

# Create a Gradio interface
iface = gr.Interface(
    fn=process_audio_rag,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
    live=True,
    title="Agriculture Assistant"
)

# Launch the interface with the given title
iface.launch()