import gradio as gr from langchain.document_loaders.base import Document, BaseLoader from langchain.indexes import VectorstoreIndexCreator from apify_client import ApifyClient import os # Update with your OpenAI API key os.environ["OPENAI_API_KEY"] = "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" # Page Function to extract website content page_function_code = """ function pageFunction(context) { const $ = context.jQuery; const data = { title: $('title').text(), content: $('body').text(), url: context.request.url }; return data; } """ # Function to fetch website content using the updated actor def fetch_website_content(website_url): apify_client = ApifyClient("apify_api_uz0y556N4IG2aLcESj67kmnGSUpHF12XAkLp") run_input = { "startUrls": [{"url": website_url}], "pageFunction": page_function_code } run = apify_client.actor("moJRLRc85AitArpNN").call(run_input=run_input) items = list(apify_client.dataset(run["defaultDatasetId"]).iterate_items()) return items if items else None # Custom loader for our documents class CustomLoader(BaseLoader): def __init__(self, documents): self.documents = documents def load(self): return self.documents # Fetch content content = fetch_website_content("https://python.langchain.com/en/latest/") documents = [Document(page_content=item["content"] or "", metadata={"source": item.get("url", "Unknown URL")}) for item in content] # Use custom loader loader = CustomLoader(documents) index = VectorstoreIndexCreator().from_loaders([loader]) # Function for the Gradio UI def ask_langchain(question): result = index.query_with_sources(question) answer = result["answer"] sources = ", ".join(result["sources"]) return f"{answer}\n\nSources: {sources}" # Gradio interface iface = gr.Interface(fn=ask_langchain, inputs="text", outputs="text", live=True, title="LangChain Query", description="Ask a question about LangChain based on the indexed content.") iface.launch()