|
'use client' |
|
import type { FC } from 'react' |
|
import React, { useCallback, useEffect, useState } from 'react' |
|
import { useTranslation } from 'react-i18next' |
|
import UrlInput from '../base/url-input' |
|
import OptionsWrap from '../base/options-wrap' |
|
import CrawledResult from '../base/crawled-result' |
|
import Crawling from '../base/crawling' |
|
import ErrorMessage from '../base/error-message' |
|
import Header from './header' |
|
import Options from './options' |
|
import cn from '@/utils/classnames' |
|
import { useModalContext } from '@/context/modal-context' |
|
import Toast from '@/app/components/base/toast' |
|
import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets' |
|
import { sleep } from '@/utils' |
|
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' |
|
|
|
const ERROR_I18N_PREFIX = 'common.errorMsg' |
|
const I18N_PREFIX = 'datasetCreation.stepOne.website' |
|
|
|
type Props = { |
|
onPreview: (payload: CrawlResultItem) => void |
|
checkedCrawlResult: CrawlResultItem[] |
|
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void |
|
onJobIdChange: (jobId: string) => void |
|
crawlOptions: CrawlOptions |
|
onCrawlOptionsChange: (payload: CrawlOptions) => void |
|
} |
|
|
|
enum Step { |
|
init = 'init', |
|
running = 'running', |
|
finished = 'finished', |
|
} |
|
|
|
const JinaReader: FC<Props> = ({ |
|
onPreview, |
|
checkedCrawlResult, |
|
onCheckedCrawlResultChange, |
|
onJobIdChange, |
|
crawlOptions, |
|
onCrawlOptionsChange, |
|
}) => { |
|
const { t } = useTranslation() |
|
const [step, setStep] = useState<Step>(Step.init) |
|
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) |
|
useEffect(() => { |
|
if (step !== Step.init) |
|
setControlFoldOptions(Date.now()) |
|
}, [step]) |
|
const { setShowAccountSettingModal } = useModalContext() |
|
const handleSetting = useCallback(() => { |
|
setShowAccountSettingModal({ |
|
payload: 'data-source', |
|
}) |
|
}, [setShowAccountSettingModal]) |
|
|
|
const checkValid = useCallback((url: string) => { |
|
let errorMsg = '' |
|
if (!url) { |
|
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { |
|
field: 'url', |
|
}) |
|
} |
|
|
|
if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://')))) |
|
errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`) |
|
|
|
if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) { |
|
errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, { |
|
field: t(`${I18N_PREFIX}.limit`), |
|
}) |
|
} |
|
|
|
return { |
|
isValid: !errorMsg, |
|
errorMsg, |
|
} |
|
}, [crawlOptions, t]) |
|
|
|
const isInit = step === Step.init |
|
const isCrawlFinished = step === Step.finished |
|
const isRunning = step === Step.running |
|
const [crawlResult, setCrawlResult] = useState<{ |
|
current: number |
|
total: number |
|
data: CrawlResultItem[] |
|
time_consuming: number | string |
|
} | undefined>(undefined) |
|
const [crawlErrorMessage, setCrawlErrorMessage] = useState('') |
|
const showError = isCrawlFinished && crawlErrorMessage |
|
|
|
const waitForCrawlFinished = useCallback(async (jobId: string) => { |
|
try { |
|
const res = await checkJinaReaderTaskStatus(jobId) as any |
|
console.log('res', res) |
|
if (res.status === 'completed') { |
|
return { |
|
isError: false, |
|
data: { |
|
...res, |
|
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), |
|
}, |
|
} |
|
} |
|
if (res.status === 'failed' || !res.status) { |
|
return { |
|
isError: true, |
|
errorMessage: res.message, |
|
data: { |
|
data: [], |
|
}, |
|
} |
|
} |
|
|
|
setCrawlResult({ |
|
...res, |
|
total: Math.min(res.total, parseFloat(crawlOptions.limit as string)), |
|
}) |
|
onCheckedCrawlResultChange(res.data || []) |
|
await sleep(2500) |
|
return await waitForCrawlFinished(jobId) |
|
} |
|
catch (e: any) { |
|
const errorBody = await e.json() |
|
return { |
|
isError: true, |
|
errorMessage: errorBody.message, |
|
data: { |
|
data: [], |
|
}, |
|
} |
|
} |
|
}, [crawlOptions.limit]) |
|
|
|
const handleRun = useCallback(async (url: string) => { |
|
const { isValid, errorMsg } = checkValid(url) |
|
if (!isValid) { |
|
Toast.notify({ |
|
message: errorMsg!, |
|
type: 'error', |
|
}) |
|
return |
|
} |
|
setStep(Step.running) |
|
try { |
|
const startTime = Date.now() |
|
const res = await createJinaReaderTask({ |
|
url, |
|
options: crawlOptions, |
|
}) as any |
|
|
|
if (res.data) { |
|
const data = { |
|
current: 1, |
|
total: 1, |
|
data: [{ |
|
title: res.data.title, |
|
markdown: res.data.content, |
|
description: res.data.description, |
|
source_url: res.data.url, |
|
}], |
|
time_consuming: (Date.now() - startTime) / 1000, |
|
} |
|
setCrawlResult(data) |
|
onCheckedCrawlResultChange(data.data || []) |
|
setCrawlErrorMessage('') |
|
} |
|
else if (res.job_id) { |
|
const jobId = res.job_id |
|
onJobIdChange(jobId) |
|
const { isError, data, errorMessage } = await waitForCrawlFinished(jobId) |
|
if (isError) { |
|
setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`)) |
|
} |
|
else { |
|
setCrawlResult(data) |
|
onCheckedCrawlResultChange(data.data || []) |
|
setCrawlErrorMessage('') |
|
} |
|
} |
|
} |
|
catch (e) { |
|
setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!) |
|
console.log(e) |
|
} |
|
finally { |
|
setStep(Step.finished) |
|
} |
|
}, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished]) |
|
|
|
return ( |
|
<div> |
|
<Header onSetting={handleSetting} /> |
|
<div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}> |
|
<UrlInput onRun={handleRun} isRunning={isRunning} /> |
|
<OptionsWrap |
|
className={cn('mt-4')} |
|
controlFoldOptions={controlFoldOptions} |
|
> |
|
<Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} /> |
|
</OptionsWrap> |
|
|
|
{!isInit && ( |
|
<div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'> |
|
{isRunning |
|
&& <Crawling |
|
className='mt-2' |
|
crawledNum={crawlResult?.current || 0} |
|
totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0} |
|
/>} |
|
{showError && ( |
|
<ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} /> |
|
)} |
|
{isCrawlFinished && !showError |
|
&& <CrawledResult |
|
className='mb-2' |
|
list={crawlResult?.data || []} |
|
checkedList={checkedCrawlResult} |
|
onSelectedChange={onCheckedCrawlResultChange} |
|
onPreview={onPreview} |
|
usedTime={parseFloat(crawlResult?.time_consuming as string) || 0} |
|
/> |
|
} |
|
</div> |
|
)} |
|
</div> |
|
</div> |
|
) |
|
} |
|
export default React.memo(JinaReader) |
|
|