Spaces:
Runtime error
Runtime error
from crawl4ai.llmtxt import AsyncLLMTextManager # Changed to AsyncLLMTextManager | |
from crawl4ai.async_logger import AsyncLogger | |
from pathlib import Path | |
import asyncio | |
async def main(): | |
current_file = Path(__file__).resolve() | |
# base_dir = current_file.parent.parent / "local/_docs/llm.txt/test_docs" | |
base_dir = current_file.parent.parent / "local/_docs/llm.txt" | |
docs_dir = base_dir | |
# Create directory if it doesn't exist | |
docs_dir.mkdir(parents=True, exist_ok=True) | |
# Initialize logger | |
logger = AsyncLogger() | |
# Updated initialization with default batching params | |
# manager = AsyncLLMTextManager(docs_dir, logger, max_concurrent_calls=3, batch_size=2) | |
manager = AsyncLLMTextManager(docs_dir, logger, batch_size=2) | |
# Let's first check what files we have | |
print("\nAvailable files:") | |
for f in docs_dir.glob("*.md"): | |
print(f"- {f.name}") | |
# Generate index files | |
print("\nGenerating index files...") | |
await manager.generate_index_files( | |
force_generate_facts=False, | |
clear_bm25_cache=False | |
) | |
# Test some relevant queries about Crawl4AI | |
test_queries = [ | |
"How is using the `arun_many` method?", | |
] | |
print("\nTesting search functionality:") | |
for query in test_queries: | |
print(f"\nQuery: {query}") | |
results = manager.search(query, top_k=2) | |
print(f"Results length: {len(results)} characters") | |
if results: | |
print("First 200 chars of results:", results[:200].replace('\n', ' '), "...") | |
else: | |
print("No results found") | |
if __name__ == "__main__": | |
asyncio.run(main()) |