jbilcke-hf HF staff commited on
Commit
f6f0c40
Β·
1 Parent(s): 8ce416b
src/analysis/{analyzeImage.mts β†’ analyzeImageWithIDEFICS.mts} RENAMED
@@ -1,15 +1,10 @@
1
 
2
  import { client } from "@gradio/client"
3
 
4
-
5
- // we don't use replicas yet, because it ain't easy to get their hostname
6
  const instances: string[] = [
7
  `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
8
- // `${process.env.VC_UPSCALING_SPACE_API_URL_2 || ""}`,
9
- // `${process.env.VC_UPSCALING_SPACE_API_URL_3 || ""}`,
10
  ].filter(instance => instance?.length > 0)
11
 
12
-
13
  export async function analyzeImage(src: string, prompt: string): Promise<string> {
14
 
15
  const instance = instances.shift()
@@ -18,11 +13,37 @@ export async function analyzeImage(src: string, prompt: string): Promise<string>
18
  const api = await client(instance, {
19
  hf_token: `${process.env.VC_HF_API_TOKEN}` as any
20
  })
21
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  const result = await api.predict(6, [
23
  "HuggingFaceM4/idefics-80b-instruct", // string (Option from: ['HuggingFaceM4/idefics-80b-instruct']) in 'Model' Dropdown component
24
  prompt, // string in 'Text input' Textbox component
25
- "null", // any (any valid json) in 'IDEFICS' Chatbot component
26
  src, // blob in 'Image input' Image component
27
 
28
  // the following values come from the source code at:
@@ -37,7 +58,7 @@ export async function analyzeImage(src: string, prompt: string): Promise<string>
37
 
38
  const rawResponse = result as any
39
 
40
- console.log("rawResponse:", rawResponse)
41
 
42
  return rawResponse?.data?.[0] as string
43
  }
 
1
 
2
  import { client } from "@gradio/client"
3
 
 
 
4
  const instances: string[] = [
5
  `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
 
 
6
  ].filter(instance => instance?.length > 0)
7
 
 
8
  export async function analyzeImage(src: string, prompt: string): Promise<string> {
9
 
10
  const instance = instances.shift()
 
13
  const api = await client(instance, {
14
  hf_token: `${process.env.VC_HF_API_TOKEN}` as any
15
  })
16
+
17
+ console.log("/analyzeImage: calling api.predict(6, ...)")
18
+
19
+ /*
20
+ the chat history has this format:
21
+ [
22
+ [
23
+ '![](/file=/tmp/gradio/2ee0577f810cba5c50d0a7f047a9e6557f4e269f/image.png)What do you see in the following image?',
24
+ 'I'
25
+ ]
26
+ ]
27
+ */
28
+ const chat_history = [
29
+ // ['', '']
30
+ ]
31
+
32
+ // unfortunately the Gradio client doesn't support streaming, and will crash here with a nasty error
33
+ /*
34
+ node_modules/@gradio/client/dist/index.js:705
35
+ return data.map((d, i) => {
36
+ ^
37
+ TypeError: Cannot read properties of null (reading 'is_file')
38
+ at node_modules/@gradio/client/dist/index.js:713:43
39
+ at Array.map (<anonymous>)
40
+ at transform_output (node_modules/@gradio/client/dist/index.js:705:15)
41
+ */
42
+
43
  const result = await api.predict(6, [
44
  "HuggingFaceM4/idefics-80b-instruct", // string (Option from: ['HuggingFaceM4/idefics-80b-instruct']) in 'Model' Dropdown component
45
  prompt, // string in 'Text input' Textbox component
46
+ chat_history, // any (any valid json) in 'IDEFICS' Chatbot component
47
  src, // blob in 'Image input' Image component
48
 
49
  // the following values come from the source code at:
 
58
 
59
  const rawResponse = result as any
60
 
61
+ console.log("got a response!:", rawResponse)
62
 
63
  return rawResponse?.data?.[0] as string
64
  }
src/analysis/analyzeImageWithIDEFICSAndNastyHack.mts ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /*
3
+ unfortunately the Gradio client doesn't support streaming:
4
+ it will crash here with a nasty error
5
+
6
+ node_modules/@gradio/client/dist/index.js:705
7
+ return data.map((d, i) => {
8
+ ^
9
+ TypeError: Cannot read properties of null (reading 'is_file')
10
+ at node_modules/@gradio/client/dist/index.js:713:43
11
+ at Array.map (<anonymous>)
12
+ at transform_output (node_modules/@gradio/client/dist/index.js:705:15)
13
+
14
+
15
+ This prevents use from using IDEFICS using the Gradio API,
16
+ so the only solution is to hack our way in using puppeteer.
17
+ */
18
+
19
+
20
+ import path from "node:path"
21
+
22
+ import { v4 as uuidv4 } from "uuid"
23
+ import tmpDir from "temp-dir"
24
+ import puppeteer from "puppeteer"
25
+
26
+ import { writeBase64ToFile } from "../utils/writeBase64ToFile.mts"
27
+ import { sleep } from "../utils/sleep.mts"
28
+ import { deleteFileIfExists } from "../utils/deleteFileIfExists.mts"
29
+
30
+ const instances: string[] = [
31
+ `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
32
+ ].filter(instance => instance?.length > 0)
33
+
34
+ // There is no easy to use public API for IDEFICS
35
+ // (something where we can just push text + file and get a response without handling history, upload etc)
36
+ // So let's hack our way in πŸ•
37
+ export async function analyzeImage(image: string, prompt: string) {
38
+ const instance = instances.shift()
39
+ instances.push(instance)
40
+
41
+ // wait.. is that really a jpg we have?
42
+ // well, let's hope so.
43
+ const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.jpg`)
44
+
45
+ await writeBase64ToFile(image, tmpImageFilePath)
46
+ // console.log("wrote the image to ", tmpImageFilePath)
47
+
48
+ const browser = await puppeteer.launch({
49
+ headless: true,
50
+ protocolTimeout: 30000,
51
+ })
52
+
53
+ try {
54
+ const page = await browser.newPage()
55
+
56
+ await page.goto(instance, {
57
+ waitUntil: 'networkidle2',
58
+ })
59
+
60
+ // console.log("filling in the prompt..")
61
+ const promptField = await page.$('textarea')
62
+ await promptField.type(prompt)
63
+
64
+ // console.log("beginning:", imageBase64.slice(0, 100))
65
+
66
+ // await new Promise(r => setTimeout(r, 1000))
67
+
68
+ const fileField = await page.$('input[type=file]')
69
+
70
+ console.log(`uploading file..`)
71
+ await fileField.uploadFile(tmpImageFilePath)
72
+ // console.log(`did it work? did it do something?`)
73
+
74
+ // console.log('looking for the button to submit')
75
+ const submitButton = await page.$('button.lg')
76
+
77
+ // console.log('clicking on the submit')
78
+ // await submitButton.click()
79
+
80
+ console.log("waiting for bot response..")
81
+ await page.$('.message.bot')
82
+
83
+ // note: we are going to receive the response in streaming
84
+
85
+ // TODO we should a different approach here, like perhaps something to detect when the element
86
+ // has stopped receiving updates
87
+ await sleep(12000)
88
+
89
+ const message = await page.$$eval(".message.bot p", el => el.map(x => x.innerText)[0])
90
+ console.log("message:", message)
91
+
92
+ return message
93
+ } catch (err) {
94
+ throw err
95
+ } finally {
96
+ await browser.close()
97
+ await deleteFileIfExists(tmpImageFilePath)
98
+ }
99
+ }
src/index.mts CHANGED
@@ -23,7 +23,7 @@ import { sortVideosByYoungestFirst } from "./utils/sortVideosByYoungestFirst.mts
23
  import { getRenderedScene, renderScene } from "./production/renderScene.mts"
24
  import { parseRenderRequest } from "./utils/parseRenderRequest.mts"
25
  import { loadRenderedSceneFromCache } from "./utils/loadRenderedSceneFromCache.mts"
26
- import { analyzeImage } from "./analysis/analyzeImage.mts"
27
 
28
  initFolders()
29
  // to disable all processing (eg. to debug)
@@ -40,8 +40,6 @@ let isRendering = false
40
  // an image analyzing pipeline
41
  app.post("/analyze", async (req, res) => {
42
 
43
- console.log(req.body)
44
-
45
  const request = req.body as ImageAnalysisRequest
46
 
47
  if (!request.prompt) {
@@ -60,6 +58,11 @@ app.post("/analyze", async (req, res) => {
60
  return
61
  }
62
 
 
 
 
 
 
63
  const response: ImageAnalysisResponse = {
64
  result: "",
65
  error: ""
@@ -68,7 +71,8 @@ app.post("/analyze", async (req, res) => {
68
  try {
69
  response.result = await analyzeImage(request.image, request.prompt)
70
  } catch (err) {
71
- // console.log("failed to render scene!")
 
72
  response.error = `failed to render scene: ${err}`
73
  }
74
 
 
23
  import { getRenderedScene, renderScene } from "./production/renderScene.mts"
24
  import { parseRenderRequest } from "./utils/parseRenderRequest.mts"
25
  import { loadRenderedSceneFromCache } from "./utils/loadRenderedSceneFromCache.mts"
26
+ import { analyzeImage } from "./analysis/analyzeImageWithIDEFICSAndNastyHack.mts"
27
 
28
  initFolders()
29
  // to disable all processing (eg. to debug)
 
40
  // an image analyzing pipeline
41
  app.post("/analyze", async (req, res) => {
42
 
 
 
43
  const request = req.body as ImageAnalysisRequest
44
 
45
  if (!request.prompt) {
 
58
  return
59
  }
60
 
61
+ console.log("/analyze called with: ", {
62
+ prompt: request.prompt,
63
+ image: request.image.slice(0, 50)
64
+ })
65
+
66
  const response: ImageAnalysisResponse = {
67
  result: "",
68
  error: ""
 
71
  try {
72
  response.result = await analyzeImage(request.image, request.prompt)
73
  } catch (err) {
74
+ console.log("failed to render scene!")
75
+ console.log(err)
76
  response.error = `failed to render scene: ${err}`
77
  }
78
 
src/utils/deleteFileIfExists.mts CHANGED
@@ -1,6 +1,11 @@
1
  import { existsSync, promises as fs } from "node:fs"
2
 
3
  export const deleteFileIfExists = async (filePath: string) => {
 
 
 
 
 
4
  if (existsSync(filePath)) {
5
  try {
6
  await fs.unlink(filePath)
 
1
  import { existsSync, promises as fs } from "node:fs"
2
 
3
  export const deleteFileIfExists = async (filePath: string) => {
4
+ // this function scares me a bit,
5
+ if (filePath === "/" || filePath === "~" || filePath === ".") {
6
+ throw new Error(`lol, no.`)
7
+ }
8
+
9
  if (existsSync(filePath)) {
10
  try {
11
  await fs.unlink(filePath)
src/utils/writeBase64ToFile.mts CHANGED
@@ -11,7 +11,7 @@ export async function writeBase64ToFile(content: string, filePath: string): Prom
11
  // Write binary data to file
12
  try {
13
  await fs.writeFile(filePath, data)
14
- console.log("File written successfully")
15
  } catch (error) {
16
  console.error("An error occurred:", error)
17
  }
 
11
  // Write binary data to file
12
  try {
13
  await fs.writeFile(filePath, data)
14
+ // console.log("File written successfully")
15
  } catch (error) {
16
  console.error("An error occurred:", error)
17
  }