Spaces:
Running
Running
File size: 4,410 Bytes
b39afbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
/**
* Copyright (c) 2023 MERCENARIES.AI PTE. LTD.
* All rights reserved.
*/
//@ts-check
import { is_valid, console_log, clean_string } from './utils.js';
import { user_db_put, user_db_get, user_db_delete } from './database.js';
// @ts-ignore
async function save_text_to_cdn(ctx, text) {
//@ts-ignore
const buffer = Buffer.from(text);
const cdn_response = await ctx.app.cdn.putTemp(buffer, { mimeType: 'text/plain; charset=utf-8', userId: ctx.userId, jobId: ctx.jobId });
console_log(`cdn_response = ${JSON.stringify(cdn_response)}`);
return cdn_response;
}
// @ts-ignore
async function save_json_to_cdn(ctx, json) {
const responses_string = JSON.stringify(json, null, 2).trim();
//@ts-ignore
const buffer = Buffer.from(responses_string);
const cdn_response = await ctx.app.cdn.putTemp(buffer, { mimeType: 'text/plain; charset=utf-8', userId: ctx.userId, jobId: ctx.jobId });
console_log(`cdn_response = ${JSON.stringify(cdn_response)}`);
return cdn_response;
}
// @ts-ignore
async function get_json_from_cdn(ctx, cdn_response) {
if (!('ticket' in cdn_response))
throw new Error(`get_json_from_cdn: cdn_response = ${JSON.stringify(cdn_response)} is invalid`);
const response_from_cdn = await ctx.app.cdn.get(cdn_response.ticket, null, 'asBase64');
if (response_from_cdn == null)
throw new Error(`get_json_from_cdn: document = ${JSON.stringify(response_from_cdn)} is invalid`);
let json = null;
try {
const str = response_from_cdn.data.toString();
//@ts-ignore
const buffer = Buffer.from(str, 'base64');
const json_string = buffer.toString('utf8');
json = JSON.parse(json_string);
} catch (e) {
throw new Error(`get_json_from_cdn: error converting response_from_cdn.data to utf-8, error = ${e}`);
}
return json;
}
// @ts-ignore
async function save_json_to_cdn_as_buffer(ctx, json) {
const responses_string = JSON.stringify(json, null, 2).trim();
//@ts-ignore
const buffer = Buffer.from(responses_string);
const cdn_response = await ctx.app.cdn.putTemp(buffer, { userId: ctx.userId, jobId: ctx.jobId });
console_log(`cdn_response = ${JSON.stringify(cdn_response)}`);
return cdn_response;
}
// @ts-ignore
async function get_chunks_from_cdn(ctx, chunks_cdn) {
const chunks_json = await get_json_from_cdn(ctx, chunks_cdn);
const chunks = chunks_json.chunks;
if (!is_valid(chunks))
throw new Error(`[get_chunks_from_cdn] Error getting chunks from database with cdn= ${JSON.stringify(chunks_cdn)}`);
return chunks;
}
// @ts-ignore
async function get_cached_cdn(ctx, object_id, overwrite = false) {
let cdn = null;
if (overwrite) {
await user_db_delete(ctx, object_id);
} else {
cdn = await user_db_get(ctx, object_id);
}
console_log(`[get_cached_cdn] cdn = ${JSON.stringify(cdn)}, typeof cdn = ${typeof cdn}`);
return cdn;
}
// @ts-ignore
async function save_chunks_cdn_to_db(ctx, chunks_cdn, chunks_id) {
const success = await user_db_put(ctx, chunks_cdn, chunks_id);
if (!success) throw new Error('ERROR: could not save chunks_cdn to db');
return success;
}
// return an array of texts gathered from all the documents (1 per document)
// @ts-ignore
async function downloadTextsFromCdn(ctx, documents_cdns) {
if (!is_valid(documents_cdns))
throw new Error(`ERROR: documents is invalid. documents = ${JSON.stringify(documents_cdns)}`);
const texts = [];
for (let i = 0; i < documents_cdns.length; i++) {
const document_cdn = documents_cdns[i];
//TBD: convert docs files to text when necessary
try {
const document = await ctx.app.cdn.get(document_cdn.ticket);
//const mimeType = document_cdn.mimeType || 'text/plain; charset=utf-8';
const text = document.data.toString() || '';
if (!is_valid(text)) {
console_log(`WARNING: text is null or undefined or empty for document = ${JSON.stringify(document)}`);
continue;
}
const clearn_text = clean_string(text);
texts.push(clearn_text);
} catch (error) {
console_log(`WARNING: document ${JSON.stringify(document_cdn)} cannot be retrieved from cdn`);
}
}
if (!is_valid(texts)) throw new Error('ERROR: texts is invalid');
return texts;
}
export {
save_text_to_cdn,
save_json_to_cdn,
get_json_from_cdn,
save_json_to_cdn_as_buffer,
get_chunks_from_cdn,
get_cached_cdn,
save_chunks_cdn_to_db,
downloadTextsFromCdn
};
|