Spaces:
Running
Running
File size: 5,343 Bytes
f27679f b8c4528 ac7030c f27679f 6967c22 f70dd7e 3d4392e f27679f b8c4528 4c34e70 f27679f 4c34e70 3d4392e 8f2b05f b8c4528 f27679f b8c4528 4c34e70 b8c4528 4c34e70 3d4392e 8f2b05f b8c4528 ac7030c 8f2b05f b8c4528 8f2b05f f27679f 29f166e b8c4528 8f2b05f f27679f ac7030c 4c34e70 8f2b05f 4c34e70 8f2b05f 4c34e70 8f2b05f 4c34e70 8f2b05f 3d4392e 8f2b05f 3d4392e 8f2b05f 4c34e70 29f166e 8f2b05f 3d4392e 29f166e 8f2b05f f27679f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"use server"
// import { distance } from "fastest-levenshtein"
import MiniSearch from "minisearch"
import { MediaInfo } from "@/types/general"
import { getVideoIndex } from "./getVideoIndex"
import { extendVideosWithStats } from "./extendVideosWithStats"
import { isHighQuality } from "../../utils/isHighQuality"
import { isAntisocial } from "../../utils/isAntisocial"
const HARD_LIMIT = 100
// this just return ALL videos on the platform
export async function getVideos({
query = "",
mandatoryTags = [],
niceToHaveTags = [],
sortBy = "date",
ignoreVideoIds = [],
maxNbMedias = HARD_LIMIT,
neverThrow = false,
renewCache = true,
}: {
// optional search query
query?: string
// the videos MUST include those tags
mandatoryTags?: string[]
// tags that we should try to use to filter the videos,
// but it isn't a hard limit - TODO: use some semantic search here?
niceToHaveTags?: string[]
sortBy?:
| "random" // for the home
| "date" // most recent first
| "match" // how close we are from the query
// ignore some ids - this is used to not show the same videos again
// eg. videos already watched, or disliked etc
ignoreVideoIds?: string[]
maxNbMedias?: number
neverThrow?: boolean
renewCache?: boolean
}): Promise<MediaInfo[]> {
try {
// the index is gonna grow more and more,
// but in the future we will use some DB eg. Prisma or sqlite
const published = await getVideoIndex({
status: "published",
renewCache,
})
let allPotentiallyValidVideos = Object.values(published)
if (ignoreVideoIds.length) {
allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
}
if (ignoreVideoIds.length) {
allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
}
const q = query.trim().toLowerCase()
if (sortBy === "match") {
// now obviously we are going to migrate to a database search instead,
// maybe a bit of vector search too,
// but let's say that for now this is good enough
let miniSearch = new MiniSearch({
fields: ['label', 'description', 'tags'], // fields to index for full-text search
storeFields: ['id'] // fields to return with search results
})
miniSearch.addAll(allPotentiallyValidVideos)
// mini search has plenty of options, see:
// https://www.npmjs.com/package/minisearch
const results = miniSearch.search(query, {
prefix: true, // "moto" will match "motorcycle"
fuzzy: 0.2,
// to search within a specific category
// filter: (result) => result.category === 'fiction'
})
allPotentiallyValidVideos = allPotentiallyValidVideos.filter(v => results.some(r => r.id === v.id))
} if (sortBy === "date") {
allPotentiallyValidVideos.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))
} else {
allPotentiallyValidVideos.sort(() => Math.random() - 0.5)
}
let videosMatchingFilters: MediaInfo[] = allPotentiallyValidVideos
// filter videos by mandatory tags, or else we return everything
const mandatoryTagsList = mandatoryTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
if (mandatoryTagsList.length) {
videosMatchingFilters = allPotentiallyValidVideos.filter(video =>
video.tags.some(tag =>
mandatoryTagsList.includes(tag.toLowerCase().trim())
)
)
}
// filter videos by mandatory tags, or else we return everything
const niceToHaveTagsList = niceToHaveTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
if (niceToHaveTagsList.length) {
videosMatchingFilters = videosMatchingFilters.filter(video =>
video.tags.some(tag =>
mandatoryTagsList.includes(tag.toLowerCase().trim())
)
)
// if we don't have enough videos
if (videosMatchingFilters.length < maxNbMedias) {
// count how many we need
const nbMissingVideos = maxNbMedias - videosMatchingFilters.length
// then we try to fill the gap with valid videos from other topics
const videosToUseAsFiller = allPotentiallyValidVideos
.filter(video => !videosMatchingFilters.some(v => v.id === video.id)) // of course we don't reuse the same
// .sort(() => Math.random() - 0.5) // randomize them
.slice(0, nbMissingVideos) // and only pick those we need
videosMatchingFilters = [
...videosMatchingFilters,
...videosToUseAsFiller,
]
}
}
const sanitizedVideos = videosMatchingFilters.filter(v => !isAntisocial(v))
// we enforce the max limit of HARD_LIMIT (eg. 100)
const limitedNumberOfVideos = sanitizedVideos.slice(0, Math.min(HARD_LIMIT, maxNbMedias))
// we ask Redis for the freshest stats
const videosWithStats = await extendVideosWithStats(limitedNumberOfVideos)
const highQuality = videosWithStats.filter(v => isHighQuality(v))
const lowQuality = videosWithStats.filter(v => !isHighQuality(v))
return [
...highQuality,
...lowQuality
]
} catch (err) {
if (neverThrow) {
console.error("failed to get videos:", err)
return []
}
throw err
}
} |