Spaces:

jbilcke-hf
/

ai-tube

Running

App Files Files Community

ai-tube / src /app /api /actions /ai-tube-hf /getVideos.ts

jbilcke-hf HF staff

working on some new features

3d4392e 9 months ago

raw

history blame

5.34 kB

	"use server"

	// import { distance } from "fastest-levenshtein"
	import MiniSearch from "minisearch"

	import { MediaInfo } from "@/types/general"

	import { getVideoIndex } from "./getVideoIndex"
	import { extendVideosWithStats } from "./extendVideosWithStats"
	import { isHighQuality } from "../../utils/isHighQuality"
	import { isAntisocial } from "../../utils/isAntisocial"

	const HARD_LIMIT = 100

	// this just return ALL videos on the platform
	export async function getVideos({
	query = "",
	mandatoryTags = [],
	niceToHaveTags = [],
	sortBy = "date",
	ignoreVideoIds = [],
	maxNbMedias = HARD_LIMIT,
	neverThrow = false,
	renewCache = true,
	}: {
	// optional search query
	query?: string

	// the videos MUST include those tags
	mandatoryTags?: string[]

	// tags that we should try to use to filter the videos,
	// but it isn't a hard limit - TODO: use some semantic search here?
	niceToHaveTags?: string[]

	sortBy?:
	\| "random" // for the home
	\| "date" // most recent first
	\| "match" // how close we are from the query

	// ignore some ids - this is used to not show the same videos again
	// eg. videos already watched, or disliked etc
	ignoreVideoIds?: string[]

	maxNbMedias?: number

	neverThrow?: boolean

	renewCache?: boolean
	}): Promise<MediaInfo[]> {
	try {
	// the index is gonna grow more and more,
	// but in the future we will use some DB eg. Prisma or sqlite
	const published = await getVideoIndex({
	status: "published",
	renewCache,
	})

	let allPotentiallyValidVideos = Object.values(published)

	if (ignoreVideoIds.length) {
	allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
	}

	if (ignoreVideoIds.length) {
	allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
	}

	const q = query.trim().toLowerCase()

	if (sortBy === "match") {
	// now obviously we are going to migrate to a database search instead,
	// maybe a bit of vector search too,
	// but let's say that for now this is good enough
	let miniSearch = new MiniSearch({
	fields: ['label', 'description', 'tags'], // fields to index for full-text search
	storeFields: ['id'] // fields to return with search results
	})

	miniSearch.addAll(allPotentiallyValidVideos)

	// mini search has plenty of options, see:
	// https://www.npmjs.com/package/minisearch
	const results = miniSearch.search(query, {
	prefix: true, // "moto" will match "motorcycle"
	fuzzy: 0.2,
	// to search within a specific category
	// filter: (result) => result.category === 'fiction'
	})

	allPotentiallyValidVideos = allPotentiallyValidVideos.filter(v => results.some(r => r.id === v.id))

	} if (sortBy === "date") {
	allPotentiallyValidVideos.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))
	} else {
	allPotentiallyValidVideos.sort(() => Math.random() - 0.5)
	}

	let videosMatchingFilters: MediaInfo[] = allPotentiallyValidVideos

	// filter videos by mandatory tags, or else we return everything
	const mandatoryTagsList = mandatoryTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
	if (mandatoryTagsList.length) {
	videosMatchingFilters = allPotentiallyValidVideos.filter(video =>
	video.tags.some(tag =>
	mandatoryTagsList.includes(tag.toLowerCase().trim())
	)
	)
	}

	// filter videos by mandatory tags, or else we return everything
	const niceToHaveTagsList = niceToHaveTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
	if (niceToHaveTagsList.length) {
	videosMatchingFilters = videosMatchingFilters.filter(video =>
	video.tags.some(tag =>
	mandatoryTagsList.includes(tag.toLowerCase().trim())
	)
	)

	// if we don't have enough videos
	if (videosMatchingFilters.length < maxNbMedias) {
	// count how many we need
	const nbMissingVideos = maxNbMedias - videosMatchingFilters.length

	// then we try to fill the gap with valid videos from other topics
	const videosToUseAsFiller = allPotentiallyValidVideos
	.filter(video => !videosMatchingFilters.some(v => v.id === video.id)) // of course we don't reuse the same
	// .sort(() => Math.random() - 0.5) // randomize them
	.slice(0, nbMissingVideos) // and only pick those we need

	videosMatchingFilters = [
	...videosMatchingFilters,
	...videosToUseAsFiller,
	]
	}
	}

	const sanitizedVideos = videosMatchingFilters.filter(v => !isAntisocial(v))

	// we enforce the max limit of HARD_LIMIT (eg. 100)
	const limitedNumberOfVideos = sanitizedVideos.slice(0, Math.min(HARD_LIMIT, maxNbMedias))

	// we ask Redis for the freshest stats
	const videosWithStats = await extendVideosWithStats(limitedNumberOfVideos)

	const highQuality = videosWithStats.filter(v => isHighQuality(v))
	const lowQuality = videosWithStats.filter(v => !isHighQuality(v))

	return [
	...highQuality,
	...lowQuality
	]
	} catch (err) {
	if (neverThrow) {
	console.error("failed to get videos:", err)
	return []
	}

	throw err
	}
	}