# rag_chain.py import os from dotenv import load_dotenv from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable import requests from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain.prompts import PromptTemplate load_dotenv() # Already handled by Hugging Face via secret env vars hf_token = os.getenv("HUGGINGFACEHUB_ACCESS_TOKEN") openai_token = os.getenv("OPENAI_API_KEY") os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token os.environ["OPENAI_API_KEY"] = openai_token # Hugging Face Embeddings os.environ['HF_HOME'] = 'Embedding Models' embedding = HuggingFaceEmbeddings( model_name="Embedding Models/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf", model_kwargs={"local_files_only": True} ) # OpenRouter LLM (Meta LLaMA 3.3) llm = ChatOpenAI( openai_api_base="https://openrouter.ai/api/v1", model="meta-llama/llama-3.3-70b-instruct:free", ) # Custom prompt for RAG qa_prompt = PromptTemplate( template=""" You are a helpful assistant answering questions based on YouTube video content. Context: {context} Question: {question} Answer:""", input_variables=["context", "question"], ) def fetch_transcript(video_id: str) -> str: try: # ✅ Define your proxy here proxies = { "http": "http://219.65.73.81:80", "https": "http://219.65.73.81:80" } # Patch requests session with proxy session = requests.Session() session.proxies.update(proxies) YouTubeTranscriptApi._requests = session # monkey patch transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en", "hi"]) return " ".join([t["text"] for t in transcript]) except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable) as e: raise Exception(f"Transcript not available for video_id {video_id}: {str(e)}") except Exception as e: raise Exception(f"Error fetching transcript: {str(e)}") # Build RAG chain from transcript def build_chain(video_id: str) -> RetrievalQA: text = fetch_transcript(video_id) splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = splitter.create_documents([text]) vectorstore = FAISS.from_documents(docs, embedding) retriever = vectorstore.as_retriever() memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True, output_key="result" ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, memory=memory, return_source_documents=True, output_key="result", chain_type_kwargs={"prompt": qa_prompt} ) return qa_chain