Spaces:

GenAIDevTOProd
/

Reddit-SemanticSearch-Prototype

Sleeping

GenAIDevTOProd commited on 19 days ago

Commit

037c9f0

verified ·

1 Parent(s): 11db2b7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,10 +15,9 @@ loading and combining all the iterables together.
 """
-from huggingface_hub import hf_hub_url, cached_download
-import json
-from huggingface_hub import hf_hub_url, cached_download
 import json
 from itertools import chain
@@ -27,7 +26,7 @@ target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "pro
 def load_reddit_split(subreddit_name):
     """Stream Reddit comments from a specific subreddit split"""
     file_url = hf_hub_url(repo_id="HuggingFaceGECLM/REDDIT_comments", filename=f"{subreddit_name}.jsonl")
-    file_path = cached_download(file_url)
     with open(file_path, "r") as f:
         for line in f:
             yield json.loads(line)

 """
+from huggingface_hub import hf_hub_download
 import json
 from itertools import chain
 def load_reddit_split(subreddit_name):
     """Stream Reddit comments from a specific subreddit split"""
     file_url = hf_hub_url(repo_id="HuggingFaceGECLM/REDDIT_comments", filename=f"{subreddit_name}.jsonl")
+    file_path = hf_hub_download(repo_id="HuggingFaceGECLM/REDDIT_comments", filename=f"{subreddit_name}.jsonl")
     with open(file_path, "r") as f:
         for line in f:
             yield json.loads(line)