import type { FC } from 'react' import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import useSWR from 'swr' import { useRouter } from 'next/navigation' import { useTranslation } from 'react-i18next' import { omit } from 'lodash-es' import { ArrowRightIcon } from '@heroicons/react/24/solid' import { RiErrorWarningFill, } from '@remixicon/react' import s from './index.module.css' import cn from '@/utils/classnames' import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' import Button from '@/app/components/base/button' import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets' import { DataSourceType } from '@/models/datasets' import NotionIcon from '@/app/components/base/notion-icon' import PriorityLabel from '@/app/components/billing/priority-label' import { Plan } from '@/app/components/billing/type' import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general' import UpgradeBtn from '@/app/components/billing/upgrade-btn' import { useProviderContext } from '@/context/provider-context' import Tooltip from '@/app/components/base/tooltip' import { sleep } from '@/utils' type Props = { datasetId: string batchId: string documents?: FullDocumentDetail[] indexingType?: string } const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => { const { t } = useTranslation() const segmentationRuleMap = { mode: t('datasetDocuments.embedding.mode'), segmentLength: t('datasetDocuments.embedding.segmentLength'), textCleaning: t('datasetDocuments.embedding.textCleaning'), } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') return t('datasetCreation.stepTwo.removeExtraSpaces') if (key === 'remove_urls_emails') return t('datasetCreation.stepTwo.removeUrlEmails') if (key === 'remove_stopwords') return t('datasetCreation.stepTwo.removeStopwords') } const getValue = useCallback((field: string) => { let value: string | number | undefined = '-' switch (field) { case 'mode': value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string) break case 'segmentLength': value = sourceData?.rules?.segmentation?.max_tokens break default: value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) // eslint-disable-next-line array-callback-return : sourceData?.rules?.pre_processing_rules?.map((rule) => { if (rule.enabled) return getRuleName(rule.id) }).filter(Boolean).join(';') break } return value }, [sourceData]) return
{Object.keys(segmentationRuleMap).map((field) => { return })}
} const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType }) => { const { t } = useTranslation() const { enableBilling, plan } = useProviderContext() const getFirstDocument = documents[0] const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState([]) const fetchIndexingStatus = async () => { const status = await doFetchIndexingStatus({ datasetId, batchId }) setIndexingStatusDetail(status.data) return status.data } const [isStopQuery, setIsStopQuery] = useState(false) const isStopQueryRef = useRef(isStopQuery) useEffect(() => { isStopQueryRef.current = isStopQuery }, [isStopQuery]) const stopQueryStatus = () => { setIsStopQuery(true) } const startQueryStatus = async () => { if (isStopQueryRef.current) return try { const indexingStatusBatchDetail = await fetchIndexingStatus() const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status)) if (isCompleted) { stopQueryStatus() return } await sleep(2500) await startQueryStatus() } catch (e) { await sleep(2500) await startQueryStatus() } } useEffect(() => { startQueryStatus() return () => { stopQueryStatus() } // eslint-disable-next-line react-hooks/exhaustive-deps }, []) // get rule const { data: ruleDetail } = useSWR({ action: 'fetchProcessRule', params: { documentId: getFirstDocument.id }, }, apiParams => fetchProcessRule(omit(apiParams, 'action')), { revalidateOnFocus: false, }) const router = useRouter() const navToDocumentList = () => { router.push(`/datasets/${datasetId}/documents`) } const isEmbedding = useMemo(() => { return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || '')) }, [indexingStatusBatchDetail]) const isEmbeddingCompleted = useMemo(() => { return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || '')) }, [indexingStatusBatchDetail]) const getSourceName = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.name } const getFileType = (name?: string) => name?.split('.').pop() || 'txt' const getSourcePercent = (detail: IndexingStatusResponse) => { const completedCount = detail.completed_segments || 0 const totalCount = detail.total_segments || 0 if (totalCount === 0) return 0 const percent = Math.round(completedCount * 100 / totalCount) return percent > 100 ? 100 : percent } const getSourceType = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.data_source_type as DataSourceType } const getIcon = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.data_source_info.notion_page_icon } const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') return ( <>
{isEmbedding && t('datasetDocuments.embedding.processing')} {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
{ enableBilling && plan.type !== Plan.team && (
{t('billing.plansCommon.documentProcessingPriorityUpgrade')}
) }
{indexingStatusBatchDetail.map(indexingStatusDetail => (
{isSourceEmbedding(indexingStatusDetail) && (
)}
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
)} {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && ( )}
{getSourceName(indexingStatusDetail.id)}
{ enableBilling && ( ) }
{isSourceEmbedding(indexingStatusDetail) && (
{`${getSourcePercent(indexingStatusDetail)}%`}
)} {indexingStatusDetail.indexing_status === 'error' && indexingStatusDetail.error && ( {indexingStatusDetail.error}
)} >
Error
)} {indexingStatusDetail.indexing_status === 'error' && !indexingStatusDetail.error && (
Error
)} {indexingStatusDetail.indexing_status === 'completed' && (
100%
)}
))}
) } export default EmbeddingProcess