Spaces:
Running
Running
import os | |
import pygit2 | |
from fsspec.spec import AbstractFileSystem | |
from .memory import MemoryFile | |
class GitFileSystem(AbstractFileSystem): | |
"""Browse the files of a local git repo at any hash/tag/branch | |
(experimental backend) | |
""" | |
root_marker = "" | |
cachable = True | |
def __init__(self, path=None, fo=None, ref=None, **kwargs): | |
""" | |
Parameters | |
---------- | |
path: str (optional) | |
Local location of the repo (uses current directory if not given). | |
May be deprecated in favour of ``fo``. When used with a higher | |
level function such as fsspec.open(), may be of the form | |
"git://[path-to-repo[:]][ref@]path/to/file" (but the actual | |
file path should not contain "@" or ":"). | |
fo: str (optional) | |
Same as ``path``, but passed as part of a chained URL. This one | |
takes precedence if both are given. | |
ref: str (optional) | |
Reference to work with, could be a hash, tag or branch name. Defaults | |
to current working tree. Note that ``ls`` and ``open`` also take hash, | |
so this becomes the default for those operations | |
kwargs | |
""" | |
super().__init__(**kwargs) | |
self.repo = pygit2.Repository(fo or path or os.getcwd()) | |
self.ref = ref or "master" | |
def _strip_protocol(cls, path): | |
path = super()._strip_protocol(path).lstrip("/") | |
if ":" in path: | |
path = path.split(":", 1)[1] | |
if "@" in path: | |
path = path.split("@", 1)[1] | |
return path.lstrip("/") | |
def _path_to_object(self, path, ref): | |
comm, ref = self.repo.resolve_refish(ref or self.ref) | |
parts = path.split("/") | |
tree = comm.tree | |
for part in parts: | |
if part and isinstance(tree, pygit2.Tree): | |
tree = tree[part] | |
return tree | |
def _get_kwargs_from_urls(path): | |
if path.startswith("git://"): | |
path = path[6:] | |
out = {} | |
if ":" in path: | |
out["path"], path = path.split(":", 1) | |
if "@" in path: | |
out["ref"], path = path.split("@", 1) | |
return out | |
def ls(self, path, detail=True, ref=None, **kwargs): | |
path = self._strip_protocol(path) | |
tree = self._path_to_object(path, ref) | |
if isinstance(tree, pygit2.Tree): | |
out = [] | |
for obj in tree: | |
if isinstance(obj, pygit2.Tree): | |
out.append( | |
{ | |
"type": "directory", | |
"name": "/".join([path, obj.name]).lstrip("/"), | |
"hex": obj.hex, | |
"mode": f"{obj.filemode:o}", | |
"size": 0, | |
} | |
) | |
else: | |
out.append( | |
{ | |
"type": "file", | |
"name": "/".join([path, obj.name]).lstrip("/"), | |
"hex": obj.hex, | |
"mode": f"{obj.filemode:o}", | |
"size": obj.size, | |
} | |
) | |
else: | |
obj = tree | |
out = [ | |
{ | |
"type": "file", | |
"name": obj.name, | |
"hex": obj.hex, | |
"mode": f"{obj.filemode:o}", | |
"size": obj.size, | |
} | |
] | |
if detail: | |
return out | |
return [o["name"] for o in out] | |
def ukey(self, path, ref=None): | |
return self.info(path, ref=ref)["hex"] | |
def _open( | |
self, | |
path, | |
mode="rb", | |
block_size=None, | |
autocommit=True, | |
cache_options=None, | |
ref=None, | |
**kwargs, | |
): | |
obj = self._path_to_object(path, ref or self.ref) | |
return MemoryFile(data=obj.data) | |