File size: 2,865 Bytes
53aa97a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import puppeteer from "puppeteer"

import { sleep } from "./sleep.mts"
import { ImageSegment } from "../types.mts"
import { downloadImageAsBase64 } from "./downloadFileAsBase64.mts"
import { resizeBase64Image } from "./resizeBase64Image.mts"

// we don't use replicas yet, because it ain't easy to get their hostname
const instances: string[] = [
  `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_1 || ""}`,
  `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_2 || ""}`,
  // `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_3 || ""}`,
]

// TODO we should use an inference endpoint instead

// note: on a large T4 (8 vCPU)
// it takes about 30 seconds to compute
export async function segmentImage(
  inputImageFilePath: string,
  actionnables: string[],
  width: number,
  height: number,
): Promise<{
  pngInBase64: string
  segments: ImageSegment[]
}> {

  console.log(`segmenting image..`)

  const instance = instances.shift()
  instances.push(instance)

  const browser = await puppeteer.launch({
    headless: true,
    protocolTimeout: 40000,
  })

  try {
    const page = await browser.newPage()
    await page.goto(instance, { waitUntil: 'networkidle2' })
    
    await new Promise(r => setTimeout(r, 3000))

    const fileField = await page.$('input[type="file"]')

    // console.log(`uploading file..`)
    await fileField.uploadFile(inputImageFilePath)

    const firstTextarea = await page.$('textarea[data-testid="textbox"]')

    const conceptsToDetect = actionnables.join(" . ")
    await firstTextarea.type(conceptsToDetect)

    // console.log('looking for the button to submit')
    const submitButton = await page.$('button.lg')

    await sleep(300)

    // console.log('clicking on the button')
    await submitButton.click()

    await page.waitForSelector('img[data-testid="detailed-image"]', {
      timeout: 40000, // we keep it tight, to fail early
    })

    const maskUrl = await page.$$eval('img[data-testid="detailed-image"]', el => el.map(x => x.getAttribute("src"))[0])

    let segments: ImageSegment[] = []
    
    try {
      segments = JSON.parse(await page.$$eval('textarea', el => el.map(x => x.value)[1]))
    } catch (err) {
      console.log(`failed to parse JSON: ${err}`)
      segments = []
    }

    // const tmpMaskFileName = `${uuidv4()}.png`
    // await downloadFileToTmp(maskUrl, tmpMaskFileName)

    const rawPngInBase64 = await downloadImageAsBase64(maskUrl)

    const pngInBase64 = await resizeBase64Image(rawPngInBase64, width, height)

    return {
      pngInBase64,
      segments,
    }
  } catch (err) {
    throw err
  } finally {
    await browser.close()
  }
}

/*

If you want to try:

/ note: must be a jpg and not jpeg it seems
// (probably a playwright bug)
const results = await segmentImage("./barn.jpg", [
  "roof",
  "door",
  "window"
])

console.log("results:", results)
*/