Spaces:

jbilcke-hf
/

ai-tube

Running

File size: 5,343 Bytes

"use server"

// import { distance } from "fastest-levenshtein"
import MiniSearch from "minisearch"

import { MediaInfo } from "@/types/general"

import { getVideoIndex } from "./getVideoIndex"
import { extendVideosWithStats } from "./extendVideosWithStats"
import { isHighQuality } from "../../utils/isHighQuality"
import { isAntisocial } from "../../utils/isAntisocial"

const HARD_LIMIT = 100

// this just return ALL videos on the platform
export async function getVideos({
  query = "",
  mandatoryTags = [],
  niceToHaveTags = [],
  sortBy = "date",
  ignoreVideoIds = [],
  maxNbMedias = HARD_LIMIT,
  neverThrow = false,
  renewCache = true,
}: {
  // optional search query
  query?: string

  // the videos MUST include those tags
  mandatoryTags?: string[]

  // tags that we should try to use to filter the videos,
  // but it isn't a hard limit - TODO: use some semantic search here?
  niceToHaveTags?: string[]

  sortBy?:
    | "random" // for the home
    | "date" // most recent first
    | "match" // how close we are from the query

  // ignore some ids - this is used to not show the same videos again
  // eg. videos already watched, or disliked etc
  ignoreVideoIds?: string[]

  maxNbMedias?: number

  neverThrow?: boolean

  renewCache?: boolean
}): Promise<MediaInfo[]> {
  try {
    // the index is gonna grow more and more,
    // but in the future we will use some DB eg. Prisma or sqlite
    const published = await getVideoIndex({
      status: "published",
      renewCache,
    })

    let allPotentiallyValidVideos = Object.values(published)
    
    if (ignoreVideoIds.length) {
      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
    }

    if (ignoreVideoIds.length) {
      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
    }

    const q = query.trim().toLowerCase()

    if (sortBy === "match") {
      // now obviously we are going to migrate to a database search instead,
      // maybe a bit of vector search too,
      // but let's say that for now this is good enough
      let miniSearch = new MiniSearch({
        fields: ['label', 'description', 'tags'], // fields to index for full-text search
        storeFields: ['id'] // fields to return with search results
      })
      
      miniSearch.addAll(allPotentiallyValidVideos)
      
      // mini search has plenty of options, see:
      // https://www.npmjs.com/package/minisearch
      const results = miniSearch.search(query, {
        prefix: true, // "moto" will match "motorcycle"
        fuzzy: 0.2,
        // to search within a specific category
        // filter: (result) => result.category === 'fiction'
      })

      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(v => results.some(r => r.id === v.id))

    } if (sortBy === "date") {
      allPotentiallyValidVideos.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))
    } else {
      allPotentiallyValidVideos.sort(() => Math.random() - 0.5)
    }

    let videosMatchingFilters: MediaInfo[] = allPotentiallyValidVideos

    // filter videos by mandatory tags, or else we return everything
    const mandatoryTagsList = mandatoryTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
    if (mandatoryTagsList.length) {
      videosMatchingFilters = allPotentiallyValidVideos.filter(video => 
        video.tags.some(tag =>
          mandatoryTagsList.includes(tag.toLowerCase().trim())
        )
      )
    }

    // filter videos by mandatory tags, or else we return everything
    const niceToHaveTagsList = niceToHaveTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
    if (niceToHaveTagsList.length) {
      videosMatchingFilters = videosMatchingFilters.filter(video => 
        video.tags.some(tag =>
          mandatoryTagsList.includes(tag.toLowerCase().trim())
        )
      )

      // if we don't have enough videos
      if (videosMatchingFilters.length < maxNbMedias) {
        // count how many we need
        const nbMissingVideos = maxNbMedias - videosMatchingFilters.length
        
        // then we try to fill the gap with valid videos from other topics
        const videosToUseAsFiller = allPotentiallyValidVideos
          .filter(video => !videosMatchingFilters.some(v => v.id === video.id)) // of course we don't reuse the same
          // .sort(() => Math.random() - 0.5) // randomize them
          .slice(0, nbMissingVideos) // and only pick those we need

        videosMatchingFilters = [
          ...videosMatchingFilters,
          ...videosToUseAsFiller,
        ]
      }
    }

    const sanitizedVideos = videosMatchingFilters.filter(v => !isAntisocial(v))
        
    // we enforce the max limit of HARD_LIMIT (eg. 100)
    const limitedNumberOfVideos = sanitizedVideos.slice(0, Math.min(HARD_LIMIT, maxNbMedias))

    // we ask Redis for the freshest stats
    const videosWithStats = await extendVideosWithStats(limitedNumberOfVideos)

    const highQuality = videosWithStats.filter(v => isHighQuality(v))
    const lowQuality = videosWithStats.filter(v => !isHighQuality(v))
 
    return [
      ...highQuality,
      ...lowQuality
    ]
  } catch (err) {
    if (neverThrow) {
      console.error("failed to get videos:", err)
      return []
    }

    throw err
  }
}