File size: 3,902 Bytes
6348944
 
 
 
 
64dea8b
 
6348944
 
 
 
 
6b981ec
6348944
6b981ec
6348944
 
64dea8b
6348944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64dea8b
 
 
 
 
 
 
 
6348944
 
 
 
 
 
 
 
 
 
 
d0524ae
6348944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b981ec
6348944
 
64dea8b
6348944
6b981ec
6348944
 
64dea8b
02b4e01
6348944
d0524ae
6348944
 
 
 
 
6b981ec
6348944
 
 
 
 
 
 
 
 
 
fd74d53
6b981ec
 
 
6348944
 
 
 
fd74d53
6348944
 
 
 
 
 
 
 
 
 
 
109f3e6
6348944
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import express from "express"
import { HfInference } from '@huggingface/inference'

import { daisy } from "./daisy.mts"

const hfi = new HfInference(process.env.HF_API_TOKEN)
const hf = hfi.endpoint(process.env.HF_ENDPOINT_URL)

const app = express()
const port = 7860

const minPromptSize = 16 // if you change this, you will need to also change in public/index.html
const timeoutInSec = 30 * 60

console.log("timeout set to 30 minutes")

app.use(express.static("public"))

const pending: {
  total: number;
  queue: string[];
} = {
  total: 0,
  queue: [],
}
 
const endRequest = (id: string, reason: string) => {
  if (!id || !pending.queue.includes(id)) {
    return
  }
  
  pending.queue = pending.queue.filter(i => i !== id)
  console.log(`request ${id} ended (${reason})`)
}

app.get("/debug", (req, res) => {
  res.write(JSON.stringify({
    nbTotal: pending.total,
    nbPending: pending.queue.length,
    queue: pending.queue,
  }))
  res.end()
})

app.get("/app", async (req, res) => {

  const model = `${req.query.model || 'OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5'}`

  console.log('model:', model)

  const endpoint = `${req.query.endpoint || ''}`

  console.log('endpoint:', endpoint)

  if (`${req.query.prompt}`.length < minPromptSize) {
    res.write(`prompt too short, please enter at least ${minPromptSize} characters`)
    res.end()
    return
  }

  const id = `${pending.total++}`
  console.log(`new request ${id}`)

  pending.queue.push(id)

  const prefix = `<html><head><link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet" type="text/css" /><script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"></script><script src="https://cdn.tailwindcss.com?plugins=forms,typography,aspect-ratio,line-clamp"></script><title>Generated content</title><body`
  res.write(prefix)

  req.on("close", function() {
    // console.log("browser asked to close the stream for some reason.. let's ignore!")
    endRequest(id, "browser asked to end the connection")
  })

  // for testing we kill after some delay
  setTimeout(() => {
    endRequest(id, `timed out after ${timeoutInSec}s`)
  }, timeoutInSec * 1000)


  const finalPrompt = `# Task
Generate the following: ${req.query.prompt}
# API Documentation
${daisy}
# Guidelines
- Never repeat the instruction, instead directly write the final code
- Use a color scheme consistent with the brief and theme
- You must use Tailwind CSS and Daisy UI for the CSS classes, vanilla JS and Alpine.js for the JS.
- All the JS code will be written directly inside the page, using <script type="text/javascript">...</script>
- You MUST use English, not Latin! (I repeat: do NOT write lorem ipsum!)
- No need to write code comments, so please make the code compact (short function names etc)
- Use a central layout by wrapping everything in a \`<div class="flex flex-col items-center">\`
# HTML output
<html><head></head><body`

  try {
    let result = ''
    for await (const output of hf.textGenerationStream({
      inputs: finalPrompt,
      parameters: { max_new_tokens: 1024 }
    })) {
      if (!pending.queue.includes(id)) {
        break
      }
      result += output.token.text
      process.stdout.write(output.token.text)
      res.write(output.token.text)
      if (result.includes('</html>')) {
        break
      }
      if (result.includes('<|end|>') || result.includes('<|assistant|>')) {
        // it ended, but we probably don't have a valid HTML
        break
      }
    }

    endRequest(id, `normal end of the LLM stream for request ${id}`)
  } catch (e) {
    console.log(e)
    endRequest(id, `premature end of the LLM stream for request ${id} (${e})`)
  } 

  try {
    res.end()
  } catch (err) {
    console.log(`couldn't end the HTTP stream for request ${id} (${err})`)
  }
  
})

app.listen(port, () => { console.log(`Open http://localhost:${port}`) })