File size: 2,928 Bytes
03c0888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
import shutil
from pathlib import Path
from crawl4ai.async_logger import AsyncLogger
from crawl4ai.llmtxt import AsyncLLMTextManager

class DocsManager:
    def __init__(self, logger=None):
        self.docs_dir = Path.home() / ".crawl4ai" / "docs"
        self.local_docs = Path(__file__).parent.parent / "docs" / "llm.txt"
        self.docs_dir.mkdir(parents=True, exist_ok=True)
        self.logger = logger or AsyncLogger(verbose=True)
        self.llm_text = AsyncLLMTextManager(self.docs_dir, self.logger)

    async def ensure_docs_exist(self):
        """Fetch docs if not present"""
        if not any(self.docs_dir.iterdir()):
            await self.fetch_docs()

    async def fetch_docs(self) -> bool:
        """Copy from local docs or download from GitHub"""
        try:
            # Try local first
            if self.local_docs.exists() and (any(self.local_docs.glob("*.md")) or any(self.local_docs.glob("*.tokens"))):
                # Empty the local docs directory
                for file_path in self.docs_dir.glob("*.md"):
                    file_path.unlink()
                # for file_path in self.docs_dir.glob("*.tokens"): 
                #     file_path.unlink()
                for file_path in self.local_docs.glob("*.md"):
                    shutil.copy2(file_path, self.docs_dir / file_path.name)
                # for file_path in self.local_docs.glob("*.tokens"):
                #     shutil.copy2(file_path, self.docs_dir / file_path.name)
                return True

            # Fallback to GitHub
            response = requests.get(
                "https://api.github.com/repos/unclecode/crawl4ai/contents/docs/llm.txt",
                headers={'Accept': 'application/vnd.github.v3+json'}
            )
            response.raise_for_status()
            
            for item in response.json():
                if item['type'] == 'file' and item['name'].endswith('.md'):
                    content = requests.get(item['download_url']).text
                    with open(self.docs_dir / item['name'], 'w', encoding='utf-8') as f:
                        f.write(content)
            return True

        except Exception as e:
            self.logger.error(f"Failed to fetch docs: {str(e)}")
            raise

    def list(self) -> list[str]:
        """List available topics"""
        names = [file_path.stem for file_path in self.docs_dir.glob("*.md")]
        # Remove [0-9]+_ prefix
        names = [name.split("_", 1)[1] if name[0].isdigit() else name for name in names]
        # Exclude those end with .xs.md and .q.md
        names = [name for name in names if not name.endswith(".xs") and not name.endswith(".q")]
        return names
    
    def generate(self, sections, mode="extended"):
        return self.llm_text.generate(sections, mode)
    
    def search(self, query: str, top_k: int = 5):
        return self.llm_text.search(query, top_k)