harshpatel080503 commited on
Commit
c31dfff
·
verified ·
1 Parent(s): 694e6a8

Update rag_chain.py

Browse files
Files changed (1) hide show
  1. rag_chain.py +14 -10
rag_chain.py CHANGED
@@ -51,15 +51,18 @@ Answer:""",
51
  input_variables=["context", "question"],
52
  )
53
 
54
- # Updated to optionally accept proxies
55
- def fetch_transcript(video_id: str, proxies: dict = None) -> str:
56
  try:
57
- # If proxies are provided, patch requests.Session to use them
58
- if proxies:
59
- session = requests.Session()
60
- session.proxies.update(proxies)
61
- # Monkey patch the YouTubeTranscriptApi's internal session to use proxy
62
- YouTubeTranscriptApi._requests = session
 
 
 
 
63
 
64
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en", "hi"])
65
  return " ".join([t["text"] for t in transcript])
@@ -69,9 +72,10 @@ def fetch_transcript(video_id: str, proxies: dict = None) -> str:
69
  except Exception as e:
70
  raise Exception(f"Error fetching transcript: {str(e)}")
71
 
 
72
  # Build RAG chain from transcript
73
- def build_chain(video_id: str, proxies: dict = None) -> RetrievalQA:
74
- text = fetch_transcript(video_id, proxies=proxies)
75
 
76
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
77
  docs = splitter.create_documents([text])
 
51
  input_variables=["context", "question"],
52
  )
53
 
54
+ def fetch_transcript(video_id: str) -> str:
 
55
  try:
56
+ # Define your proxy here
57
+ proxies = {
58
+ "http": "http://219.65.73.81:80",
59
+ "https": "http://219.65.73.81:80"
60
+ }
61
+
62
+ # Patch requests session with proxy
63
+ session = requests.Session()
64
+ session.proxies.update(proxies)
65
+ YouTubeTranscriptApi._requests = session # monkey patch
66
 
67
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en", "hi"])
68
  return " ".join([t["text"] for t in transcript])
 
72
  except Exception as e:
73
  raise Exception(f"Error fetching transcript: {str(e)}")
74
 
75
+
76
  # Build RAG chain from transcript
77
+ def build_chain(video_id: str) -> RetrievalQA:
78
+ text = fetch_transcript(video_id)
79
 
80
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
81
  docs = splitter.create_documents([text])