Utilisateur:Yopyop456/Brouillon/transformers-js
Cette page est un brouillon appartenant à Yopyop456
Conseils de rédaction
- → N'hésitez pas à publier sur le brouillon un texte inachevé et à le modifier autant que vous le souhaitez.
- → Pour enregistrer vos modifications au brouillon, il est nécessaire de cliquer sur le bouton bleu : « Publier les modifications ». Il n'y a pas d'enregistrement automatique.
Si votre but est de publier un nouvel article, votre brouillon doit respecter les points suivants :
- Respectez le droit d'auteur en créant un texte spécialement pour Wikipédia en français (pas de copier-coller venu d'ailleurs).
- Indiquez les éléments démontrant la notoriété du sujet (aide).
- Liez chaque fait présenté à une source de qualité (quelles sources – comment les insérer).
- Utilisez un ton neutre, qui ne soit ni orienté ni publicitaire (aide).
- Veillez également à structurer votre article, de manière à ce qu'il soit conforme aux autres pages de l'encyclopédie (structurer – mettre en page).
- → Si ces points sont respectés, pour transformer votre brouillon en article, utilisez le bouton « publier le brouillon » en haut à droite. Votre brouillon sera alors transféré dans l'espace encyclopédique.
import * as import1 from './dist/v3/transformers.v3.js'
import * as import2 from './dist/transformers.js'
const module = {...import1}
const v2tasks = ['Text2TextGenerationPipeline', 'TextGenerationPipeline', 'AutomaticSpeechRecognitionPipeline', 'ImageToTextPipeline', 'DocumentQuestionAnsweringPipeline', 'TextToAudioPipeline']
/*
let module2 = ({Text2TextGenerationPipeline, TextGenerationPipeline, AutomaticSpeechRecognitionPipeline, ImageToTextPipeline, DocumentQuestionAnsweringPipeline, TextToAudioPipeline} = import2)
Object.assign(module, import2)
*/
/** --- HELPER --- */
let {Pipeline, pipeline: v3_pipeline, AutoProcessor, AutoTokenizer, env, ImageFeatureExtractionPipeline, read_audio, softmax, RawImage, Tensor, ImageSegmentationPipeline, ObjectDetectionPipeline, TextClassificationPipeline, dot} = module
var apis = {};
function setApis(newApis){
apis = newApis
}
function setLocalEnv(){
if(module.env.allowRemoteModels){
module.env.backends.onnx.wasm.proxy = false
module.env.backends.onnx.wasm.wasmPaths = module.env.version[0] == 2 ? 'dist/' : 'dist/v3/'
module.env.backends.onnx.logLevel = 'error'
module.env.localModelPath = '_models/'
module.env.useBrowserCache = false
module.env.allowRemoteModels = false
module.env.allowLocalModels = true
}
}
const MODEL_NAME_TO_CLASS_MAPPING = new Map();
const MODEL_CLASS_TO_NAME_MAPPING = new Map();
const SUPPORTED_TASKS = {};
function populateMapping(modelsExport){
for (let name in modelsExport){
MODEL_CLASS_TO_NAME_MAPPING.set(modelsExport[name], name);
MODEL_NAME_TO_CLASS_MAPPING.set(name, modelsExport[name]);
}
}
async function pipeline(task, model, options = {session_options: {}}){
if(options.dispose_pipe){
await options.dispose_pipe.dispose()
}
const pipelineInfo = SUPPORTED_TASKS[task.split('_', 1)[0]];
if (!pipelineInfo) {
let ret
if(v2tasks.join(' ').toLowerCase().includes(task.split('_', 1)[0].replace('-',''))){
let tmp
if(module.env.backends.onnx.logLevel != 'warning'){
tmp = console.warn
console.warn = ()=>{}
}
ret = await module.v2_pipeline(...arguments)
if(module.env.backends.onnx.logLevel != 'warning'){
console.warn = tmp
}
}
else{
ret = await v3_pipeline(...arguments)
}
return ret
}
if(!('session_options' in options)) options.session_options = {}
fixInferenceSession(options.session_options)
const classes = {
'tokenizer': pipelineInfo.tokenizer,
'model': pipelineInfo.model,
'processor': pipelineInfo.processor,
}
const modelName = model || pipelineInfo.default.model
for(let key in classes){
if(classes[key]) {
if(!Array.isArray(classes[key])) classes[key] = [classes[key]]
let loaded
for(let cls of classes[key]){
try{
loaded = await cls.from_pretrained(modelName, options)
break
}
catch(e){
console.warn(e)
}
}
classes[key] = loaded
}
}
classes.task = task
classes.model.config._name_or_path = modelName
let pipe = new pipelineInfo.pipeline(classes)
return pipe
}
function PreTrainedTokenizer_call(text, options = {}){
let result = this.v2_call(...arguments)
if(options.return_offsets_mapping){
result.offset_mapping = get_offsets_mapping.call(this, result, text)
}
return result
}
async function TokenClassificationPipeline_call(text, options = {}){
let result = await this.v2_call(...arguments)
if(!Array.isArray(result[0])) result = [result]
if(!Array.isArray(text)) text = [text]
get_aggregate_words.call(this, result, options.aggregation_strategy, text)
return result.length == 1 ? result[0] : result
}
async function QuestionAnsweringPipeline_call(question, context, options = {}){
let result = await this.v2_call(...arguments)
if(!Array.isArray(result)) result = [result]
if(!Array.isArray(context)) context = [context]
get_qa_offsets.call(this, result, context)
return result.length == 1 ? result[0] : result
}
/** --- RAWAUDIO --- */
/**
* Save blob file in web.
* @param {string} path filename
* @param {Blob} blob
*/
function saveBlob(path, blob){
// Convert the canvas content to a data URL
const dataURL = URL.createObjectURL(blob);
// Create an anchor element with the data URL as the href attribute
const downloadLink = document.createElement('a');
downloadLink.href = dataURL;
// Set the download attribute to specify the desired filename for the downloaded image
downloadLink.download = path;
// Trigger the download
downloadLink.click();
// Clean up: remove the anchor element from the DOM
downloadLink.remove();
}
class RawAudio {
/**
* Create a new `RawAudio` object.
* Handles only Float32Array data, with 1 or 2 channels audio
* @param {Array|Float32Array} audio Float32Array or Array of Float32Array
* @param {number} sampling_rate
*/
constructor(audio, sampling_rate) {
if (!(
typeof audio == 'object' &&
((audio.constructor.name == 'Array' && audio[0]?.constructor.name == 'Float32Array') ||
(audio.constructor.name == 'Float32Array')) &&
typeof sampling_rate == 'number'
)) {
throw Error('TypeError. Expected audio as Float32Array or [Float32Array, Float32Array], and sampling_rate as number')
}
if (audio.constructor.name != 'Array') audio = [audio]
this.audio = audio
this.sampling_rate = sampling_rate
this.interleaved = false
}
/**
* Combines multiple audio channels into one, alterning left / right audio input.
* @param {boolean} keepOriginalValues keep or not the original non-interleaved audio data
* @returns {Array} Array of Float32Array
*/
interleave(keepOriginalValues = false) {
if (this.audio.length != 2 || this.interleaved == true) {
console.warn('Could not interleave audios data')
return
}
let audio, res, res2, len, i, offset
audio = this.audio
len = audio[0].length
res = new audio[0].constructor(len)
res2 = keepOriginalValues ? (new audio[0].constructor(len)) : audio[1]
for (i = 0; i < len; i++) {
res[i] = audio[i % 2][i >> 1]
}
for (offset = i, i = 0; i < len; i++) {
res2[i] = audio[(offset + i) % 2][(offset + i) >> 1]
}
if (keepOriginalValues) {
return [res, res2]
} else {
this.interleaved = true
this.audio[0].set(res)
return this.audio
}
}
/**
* Convert the audio to a wav blob.
* WAV file specs : https://en.wikipedia.org/wiki/Waveform_Audio_File_Format
* @returns {Blob}
*/
toBlob() {
let audio, sampling_rate, wav_header, buf_size, nums
({
audio,
sampling_rate
} = this)
buf_size = audio[0].buffer.byteLength * audio.length
wav_header = new Uint8Array([
82, 73, 70, 70, // 0: 'RIFF'
0, 0, 0, 0, // 4: RIFF size (file size - 8)
87, 65, 86, 69, // 8: 'WAVE'
102, 109, 116, 32, // 12: 'fmt '
16, 0, 0, 0, // 16: fmt chunksize
3, 0, // 20: format tag (1 int, 3 float)
audio.length, 0, // 22: channels
0, 0, 0, 0, // 24: sample per sec
0, 0, 0, 0, // 28: byte per sec (byte per bloc * sample rate)
4, 0, // 32: byte per bloc
32, 0, // 34: bits per sample (16 bits int, 32 bits float)
100, 97, 116, 97, // 38: 'data'
0, 0, 0, 0 // 42: data size
])
nums = [
[4, buf_size + wav_header.length - 8],
[24, sampling_rate],
[28, 4 * sampling_rate],
[40, buf_size]
]
nums.forEach(([offset, num]) => {
do {
wav_header[offset++] = (num & 255)
num >>= 8
} while (num > 0)
})
if (audio.length == 2 && !this.interleaved) {
audio = this.interleave(true)
}
return new Blob([wav_header, ...audio])
}
/**
* Save the audio to a wav file.
* @param {string} path
*/
async save(path = 'audio.wav') {
let fn
if (apis.IS_BROWSER_ENV) {
if (apis.IS_WEBWORKER_ENV) {
throw new Error('Unable to save a file from a Web Worker.')
}
fn = saveBlob
} else if (apis.IS_FS_AVAILABLE) {
fn = async (path, blob) => {
let buf = await blob.arrayBuffer()
buf = new Uint8Array(buf)
fs.writeFile(path, buf, (err) => {
throw new Error(err)
})
}
} else {
throw new Error('Unable to save because filesystem is disabled in this environment.')
}
if (!(/\.wav$/.test(path))) {
console.warn('Change filename extension to .wav')
path = path.replace(/\.\w{0,4}$/, '') + '.wav'
}
await fn(path, this.toBlob())
}
}
async function _call_text_to_waveform(){
let out = await this.v2_call_text_to_waveform(...arguments);
return new RawAudio(out.audio, out.sampling_rate);
}
async function _call_text_to_spectrogram(){
let out = await this.v2_call_text_to_spectrogram(...arguments);
return new RawAudio(out.audio, out.sampling_rate);
}
/** --- REGISTER PIPELINE --- */
/**
* Get model class from name
* @param {string} name
* @returns
*/
function getModelClassFromName(name){
let cls = MODEL_NAME_TO_CLASS_MAPPING.get(name);
if (!cls) console.warn(name + ' undefined');
return cls;
}
/**
* Get class name from class
* @param {*} cls
* @returns
*/
function getClassNameFromClass(cls){
let name = MODEL_CLASS_TO_NAME_MAPPING.get(cls);
if (!name) name = MODEL_CLASS_TO_NAME_MAPPING.get(cls.constructor);
if (!name) console.warn(cls + ' undefined');
return name;
}
/**
* Register a custom task pipeline.
* @param {string} task
*
* **Example:** Custom task: audio-feature-extraction.
* ```javascript
* import {
* Pipeline,
* read_audio,
* register_pipeline,
* pipeline
* } from '@Xenova/transformers';
*
* class AudioFeatureExtractionPipeline extends Pipeline {
* async _call(input, kwargs = {}) {
* input = await read_audio(input).then(input=>this.processor(input))
* let { audio_embeds } = await this.model(input)
* return audio_embeds
* }
* }
*
* register_pipeline('audio-feature-extraction', {
* pipeline: AudioFeatureExtractionPipeline,
* model: 'ClapAudioModelWithProjection',
* processor: 'AutoProcessor',
* default_model: 'Xenova/larger_clap_music_and_speech'
* })
*
* let pipe = await pipeline('audio-feature-extraction');
* let out = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav')
* console.log(out)
* ```
*/
function register_pipeline(
task, {
tokenizer,
pipeline: pipelineClass,
model,
processor,
default_model = '',
type = ''
} = {}
) {
if (!(
('prototype' in pipelineClass) &&
(pipelineClass.prototype instanceof Pipeline) &&
("_call" in pipelineClass.prototype)
)) {
throw Error('pipeline class must inherit from Pipeline, and contains _call')
}
if(typeof model == 'string') model = getModelClassFromName(model)
else if(Array.isArray(model) && typeof model[0] == 'string') model = model.map(x=>getModelClassFromName(x))
const custom = {
tokenizer: tokenizer == 'AutoTokenizer' ? AutoTokenizer : tokenizer,
pipeline: pipelineClass,
model,
processor: processor == 'AutoProcessor' ? AutoProcessor : processor,
'default': (!default_model ? '' : {
model: default_model
}),
type
};
if (task in SUPPORTED_TASKS) {
for (let key in custom) {
if (custom[key]) SUPPORTED_TASKS[task][key] = custom[key];
}
}
else SUPPORTED_TASKS[task] = custom;
}
/** --- FIX INFERENCE --- */
function fixInferenceSession(session_options){
// fix wasm relative path
if (apis.IS_BROWSER_ENV) {
let url = env.backends.onnx.wasm.wasmPaths;
if (!url.startsWith('http')) {
env.backends.onnx.wasm.wasmPaths = url[0] == '/' ? location.origin + url : location.href.replace(/[^\/]+$/, '') + url;
}
}
// set log level
let logLevel = ['verbose', 'info', 'warning', 'error', 'fatal'].indexOf(env.backends.onnx.logLevel);
if (logLevel >= 0) {
session_options.logVerbosityLevel = session_options.logSeverityLevel = logLevel;
}
}
/** --- TOKENIZER OFFSETS MAPPING --- */
/**
* Estimate offsets mapping from original context string
* @param {BatchEncoding|string|string[]|string[][]} search Object with input_ids from tokenizer, array[][] string or ids tokens, or full string
* @param {string|string[]} context
* @param {string} strategy 'none' or 'closest'
* @param {boolean} caseSensitive
* @returns {any[]} (char_start, char_end, token)
*/
function get_offsets_mapping(search, context, strategy = 'none') {
let toReturn, idx, lastIdx, len, contextEncodings;
if (typeof search == 'object' && 'input_ids' in search) {
search = search.input_ids.tolist();
}
else {
if (!(Array.isArray(search) && Array.isArray(search[0]))) search = [search];
if (typeof search[0] == 'string') {
search = this._call(search, {add_special_tokens: false})
search = search.input_ids.tolist()
}
}
if (typeof context == 'string') context = [context];
if(!'none tokenize closest'.includes(strategy)) strategy = 'none'
if(strategy == 'closest' || strategy == 'tokenize'){
contextEncodings = this._call(context, {
add_special_tokens: false,
return_offsets_mapping: true
})
context = []
contextEncodings.offset_mapping.forEach(offsets => {
context.push([])
offsets.forEach(offset => {
context.at(-1)[offset[0]] = offset[2]
})
})
}
else{
context.forEach((val, key) => {
context[key] = val.toLowerCase()
})
}
toReturn = []
search.forEach((tokens, i) => {
toReturn.push([]);
lastIdx = 0;
if (typeof tokens[0] != 'string') {
tokens = this.model.convert_ids_to_tokens(tokens);
}
tokens.forEach(token => {
let token2 = token.replace(this.model.config.continuing_subword_prefix, '').toLowerCase()
idx = context[i].indexOf(strategy == 'none' ? token2 : token, lastIdx);
// look behind and find closest match
if (strategy == 'closest' && idx >= 0) {
let a, substrStart, substrSearch, substrIdx, lastIdx;
lastIdx = idx;
for (a = toReturn.at(-1).length - 1; a >= 0; a--) {
substrStart = a > 0 ? lastIdx - 1 - (toReturn[i][a][0] - toReturn[i][a - 1][1] + toReturn[i][a][2].length) : 1;
if(substrStart <= toReturn[i][a][0]) break;
substrSearch = context[i].slice(substrStart, idx);
substrIdx = substrSearch.lastIndexOf(toReturn[i][a][2]);
if (substrIdx > 0) {
lastIdx = substrStart + substrIdx;
toReturn[i][a] = [lastIdx, lastIdx + toReturn[i][a][2].length, toReturn[i][a][2]];
} else break;
}
}
if (idx < 0) {
idx = lastIdx;
len = 0;
}
else len = token2.length;
toReturn.at(-1).push([idx, idx + len, token]);
lastIdx = idx + len;
})
})
return toReturn;
}
function get_aggregate_words(toReturn, aggregation_strategy = 'none', texts = ''){
// start end tokens
this.tokenizer.get_offsets_mapping(toReturn.map(x=>x.map(x=>x.word)), texts, 'tokenize').forEach((offsets, i) => {
offsets.forEach((offset, j) => {
toReturn[i][j].start = offset[0]
toReturn[i][j].end = offset[1]
})
})
// aggregation_strategy
if (!['none', 'simple', 'first', 'max', 'average'].includes(aggregation_strategy)) {
console.warn('Unknown aggregation_strategy.');
aggregation_strategy = 'none';
}
let toReturn2 = [], prefix;
prefix = this.tokenizer.model.config.continuing_subword_prefix
if (aggregation_strategy != 'none') {
toReturn2 = Array.from(toReturn);
toReturn.length = 0;
}
// Tagging schemes in NER
// I => “inside”, O => “outside”, B => “beginning”, E => “end”, S => “single token entity”.
// Convert to BIO
toReturn2.forEach(tokens => {
let tags = '';
tokens.forEach((token, i) => {
tags += token.entity[0];
})
if (tags.includes('E')) {
tags = tags.replaceAll(/I(I*)E/g, 'B$1I').replaceAll(/E/g, 'B');
}
if (tags.includes('S')) {
tags = tags.replaceAll(/S/g, 'B');
}
tokens.forEach((token, i) => {
tokens[i].entity = tags[i] + tokens[i].entity.substring(1);
})
})
// Aggregate
toReturn2.forEach(tokens => {
let agg_token = {};
toReturn.push([]);
tokens.forEach((token, i) => {
if (!agg_token.entity) {
agg_token = {
entity: [token.entity],
score: [token.score],
index: [token.index],
word: token.word,
start: [token.start],
end: [token.end],
};
} else {
agg_token.entity.push(token.entity);
agg_token.score.push(token.score);
agg_token.index.push(token.index);
agg_token.word += (token.word.includes(prefix) ? '' : ' ') + token.word.replaceAll(prefix, '');
agg_token.start.push(token.start);
agg_token.end.push(token.end);
}
if (
i == tokens.length - 1 ||
(tokens[i + 1].index - token.index > 1) ||
(tokens[i + 1].entity[0] != 'I' && tokens[i + 1].entity[0] != token.entity[0]) ||
(aggregation_strategy == 'simple' && tokens[i + 1].entity[0] == 'B')
) {
if (aggregation_strategy == 'simple' || aggregation_strategy == 'first') {
agg_token.entity = agg_token.entity[0].substring(2);
agg_token.score = agg_token.score[0];
} else {
const _max = Math.max(...agg_token.score);
agg_token.entity = agg_token.entity[agg_token.score.indexOf(_max)].substring(2);
if (aggregation_strategy == 'max') {
agg_token.score = _max;
} else if (aggregation_strategy == 'average') {
agg_token.score = (arr => arr.reduce((a, b, c, d) => (a + b / d.length), 0))(agg_token.score);
}
}
delete agg_token.index;
agg_token.start = agg_token.start.at(0)
agg_token.end = agg_token.end.at(-1)
toReturn.at(-1).push(agg_token);
agg_token = {};
}
})
})
return toReturn
}
function get_qa_offsets(toReturn, context){
let tmp, input_ids
input_ids = []
toReturn.forEach((val, key)=>{
tmp = this.tokenizer(val.answer, {add_special_tokens: false})
input_ids.push(tmp.input_ids.tolist()[0])
})
this.tokenizer.get_offsets_mapping(input_ids, context, 'closest').forEach((offsets, i) => {
toReturn[i].start = offsets.at(0)[0];
toReturn[i].end = offsets.at(-1)[1];
})
return toReturn
}
/** --- CUSTOM PIPELINES --- */
function register_custom_pipelines(module){
class CustomImageFeatureExtractionPipeline extends ImageFeatureExtractionPipeline {
constructor(options) {
super(options)
this.textModel = undefined
}
/**
* Get text embeddings
* @param {string|string[]} texts
* @returns
*/
async get_text_embeddings(texts) {
if (!this.textModel) {
let modelClassName = getClassNameFromClass(this.model)
if (!['CLIPVisionModelWithProjection'].includes(modelClassName)) {
throw new Error('modelText not supported for this model.');
}
this.textModel = await (getModelClassFromName(modelClassName.replace('Vision', 'Text'))).from_pretrained(this.model.config._name_or_path);
this.tokenizer = await AutoTokenizer.from_pretrained(this.model.config._name_or_path);
}
const text_inputs = this.tokenizer(texts, {
padding: 'max_length',
truncation: true
});
const {
text_embeds
} = await this.textModel(text_inputs);
return text_embeds;
}
/**
* Get similarities between image and text embeddings
* @param {*} object_embeds
* @param {*} text_embeds
* @returns array of [percentage, index]
*/
get_similarities(object_embeds, text_embeds) {
if (object_embeds.normalize) object_embeds = object_embeds.normalize().tolist();
if (text_embeds.normalize) text_embeds = text_embeds.normalize().tolist();
if (object_embeds.length > text_embeds.length) {
[object_embeds, text_embeds] = [text_embeds, object_embeds];
}
let similarities = text_embeds.map(
x => object_embeds.map(y => 100 * dot(x, y))
)
similarities = softmax(similarities).map((a, b) => [a, b]).sort((a, b) => b[0] - a[0]);
return similarities;
}
}
class CustomAudioFeatureExtractionPipeline extends CustomImageFeatureExtractionPipeline {
// get audio feature from audio
async _call(url){
let inputs = await read_audio(url).then(inputs=>this.processor(inputs))
let {audio_embeds} = await this.model(inputs)
return audio_embeds
}
// get audio feature from text
async get_text_embeddings(texts){
if (!this.textModel) {
let modelClassName = getClassNameFromClass(this.model)
if (!['ClapAudioModelWithProjection'].includes(modelClassName)) {
throw new Error('modelText not supported for this model.');
}
this.textModel = await (getModelClassFromName(modelClassName.replace('Audio', 'Text'))).from_pretrained(this.model.config._name_or_path);
this.tokenizer = await AutoTokenizer.from_pretrained(this.model.config._name_or_path);
}
let text_inputs = this.tokenizer(texts, { padding: true, truncation: true })
let {text_embeds} = await this.textModel(text_inputs)
return text_embeds
}
}
class CustomMaskGenerationPipeline extends Pipeline {
constructor(options){
super(options)
}
async _call(inputs, coords){
if(typeof coords != 'undefined' && coords.constructor.name == 'Array'){
if(coords[0].constructor.name == 'Number'){
coords = [coords]
}
}
else {
throw Error('TypeError coords')
}
let outputs
// from image url
if(typeof inputs == 'string'){
if(coords[0][0].constructor.name == 'Number') coords = [coords]
// [[[x, y], [x, y]]]
inputs = await RawImage.read(inputs);
try{
let tmp = await this.processor(inputs, coords)
if(!tmp.input_points) throw new Error('no input_points')
inputs = tmp
}
catch(e){
inputs = await this.processor(inputs, {input_points: coords})
}
// inputs_points pixel_values original_sizes reshaped_input_sizes
outputs = await this.model(inputs);
}
// from image embeddings
else {
let data = []
// this.processor.reshape_input_points(input_points, original_sizes, reshaped_input_sizes)
// this.processor.feature_extractor.add_input_labels(input_labels, input_points)
for(let i=0; i<coords.length; i++){
data.push({point: [
coords[i][0] / inputs.original_sizes[0][1],
coords[i][1] / inputs.original_sizes[0][0]
], label: 1})
}
const reshaped = inputs.reshaped_input_sizes[0]
const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
const labels = data.map(x => BigInt(x.label));
const input_points = new Tensor(
'float32',
points.flat(Infinity),
[1, 1, points.length, 2],
)
const input_labels = new Tensor(
'int64',
labels.flat(Infinity),
[1, 1, labels.length],
)
outputs = await this.model({
...inputs,
input_points,
input_labels,
})
}
const masks = await this.processor.post_process_masks(outputs.pred_masks, inputs.original_sizes, inputs.reshaped_input_sizes);
masks[0].original_sizes = inputs.original_sizes
masks[0].iou_scores = outputs.iou_scores
// masks[0].image = RawImage.fromTensor(masks[0][0].mul(255));
masks[0].image = RawImage.fromTensor(masks[0][0]);
// image.save('mask.png');
return masks
}
async get_image_embeddings(url){
let inputs = await RawImage.read(url).then(inputs=>this.processor(inputs));
let image_embeddings = await this.model.get_image_embeddings(inputs)
image_embeddings.original_sizes = inputs.original_sizes
image_embeddings.reshaped_input_sizes = inputs.reshaped_input_sizes
return image_embeddings
}
static to_canvas(masks, canvasId){
let maskCanvas
if(typeof canvasId == 'undefined'){
maskCanvas = document.createElement("canvas")
} else {
maskCanvas = document.getElementById(canvasId)
}
masks = masks[0]
maskCanvas.width = masks.original_sizes[0][1];
maskCanvas.height = masks.original_sizes[0][0];
const context = maskCanvas.getContext('2d');
const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
// Select best mask
let scores = masks.iou_scores.data
const numMasks = scores.length; // 3
let bestIndex = 0;
for (let i = 1; i < numMasks; ++i) {
if (scores[i] > scores[bestIndex]) {
bestIndex = i;
}
}
// Fill mask with colour
const pixelData = imageData.data;
for (let i = 0; i < pixelData.length; ++i) {
if (masks.image.data[numMasks * i + bestIndex] === 1) {
const offset = 4 * i;
pixelData[offset] = 0; // red
pixelData[offset + 1] = 114; // green
pixelData[offset + 2] = 189; // blue
pixelData[offset + 3] = 255; // alpha
}
}
// Draw image data to context
context.putImageData(imageData, 0, 0);
if(typeof canvasId == 'undefined'){
document.body.appendChild(maskCanvas)
}
}
}
class CustomImageSegmentationPipeline extends ImageSegmentationPipeline {
constructor(options){
super(options)
}
async _call(url){
if(getClassNameFromClass(this.model) != 'PreTrainedModel'){
return await super._call(...arguments)
}
/*
if(!this.processor){
this.processor = await AutoProcessor.from_pretrained(this.modelName)
// config: { model_type: 'custom' },
}
if(!this.model){
this.model = await AutoModel.from_pretrained(this.modelName)
/*
config: {
do_normalize: true,
do_pad: false,
do_rescale: true,
do_resize: true,
image_mean: [0.5, 0.5, 0.5],
feature_extractor_type: "ImageFeatureExtractor",
image_std: [1, 1, 1],
resample: 2,
rescale_factor: 0.00392156862745098,
size: { width: 1024, height: 1024 },
}
//
}
*/
let image = await RawImage.read(url)
let inputs = await this.processor(image)
let outputs = await this.model({ input: inputs.pixel_values })
let mask = await RawImage.fromTensor(outputs.output[0].mul(255).to('uint8')).resize(image.width, image.height);
return [{label: 'foreground', mask}]
}
}
class CustomObjectDetectionPipeline extends ObjectDetectionPipeline {
constructor(options){
super(options)
}
async _call(url){
if(getClassNameFromClass(this.model) != 'PreTrainedModel'){
return await super._call(...arguments)
}
/*
if(!this.processor){
this.processor = await AutoProcessor.from_pretrained(this.modelName)
// processor.feature_extractor.size = { shortest_edge: 128 } // (Optional) Update resize value
}
if(!this.model){
this.model = await AutoModel.from_pretrained(this.modelName)
}
*/
/*
const context = canvas.getContext('2d', { willReadFrequently: true });
context.drawImage(video, 0, 0, width, height);
const pixelData = context.getImageData(0, 0, width, height).data;
const image = new RawImage(pixelData, width, height, 4);
*/
let image = await RawImage.read(url);
let inputs = await this.processor(image)
const threshold = 0.3;
const { outputs } = await this.model(inputs);
const predictions = outputs.tolist();
const sizes = inputs.reshaped_input_sizes[0].reverse();
const res = []
for (const [xmin, ymin, xmax, ymax, score, id] of predictions) {
if (score < threshold) break;
const bbox = [xmin, ymin, xmax, ymax].map(x => x.toFixed(2)).join(', ')
// console.log(`Found "${this.model.config.id2label[id]}" at [${bbox}] with score ${score.toFixed(2)}.`)
res.push({score, label: this.model.config.id2label[id], box: {xmin, ymin, xmax, ymax}})
}
return res
}
}
class CustomZeroShotImageSegmentationPipeline extends Pipeline {
constructor(options){
super(options)
}
async _call(url, txt){
// Run tokenization
const texts = txt;
const text_inputs = this.tokenizer(texts, { padding: true, truncation: true });
// Read image and run processor
const image = await RawImage.read(url);
const image_inputs = await this.processor(image);
// Run model with both text and pixel inputs
const { logits } = await this.model({ ...text_inputs, ...image_inputs });
// logits: Tensor {
// dims: [4, 352, 352],
// type: 'float32',
// data: Float32Array(495616)[ ... ],
// size: 495616
// }
// Visualize images
const preds = logits
.unsqueeze_(1)
.sigmoid_()
.mul_(255)
.round_()
.to('uint8');
preds.imgs = []
for (let i = 0; i < preds.dims[0]; ++i) {
preds.imgs.push(RawImage.fromTensor(preds[i]));
}
preds.labels = texts
return preds
}
}
/**
* Performs ranking with the CrossEncoder on the given query and documents. Returns a sorted list with the document indices and scores.
* @param {string} query A single query
* @param {string[]} documents A list of documents
* @param {Object} options Options for ranking
* @param {number} [options.top_k=undefined] Return the top-k documents. If undefined, all documents are returned.
* @param {number} [options.return_documents=false] If true, also returns the documents. If false, only returns the indices and scores.
*/
class CustomTextClassificationPipeline extends TextClassificationPipeline {
constructor(options){
super(options)
}
async _call(){
if(!this.model.config._name_or_path.includes('rerank')){
return super._call(...arguments)
}
let {model, tokenizer} = this
async function rank(query, documents, {
top_k = undefined,
return_documents = false,
} = {}) {
const inputs = tokenizer(
new Array(documents.length).fill(query),
{
text_pair: documents,
padding: true,
truncation: true,
}
)
const { logits } = await model(inputs);
return logits
.sigmoid()
.tolist()
.map(([score], i) => ({
corpus_id: i,
score,
...(return_documents ? { text: documents[i] } : {})
}))
.sort((a, b) => b.score - a.score)
.slice(0, top_k);
}
return rank(...arguments)
}
}
register_pipeline('image-feature-extraction', {
pipeline: CustomImageFeatureExtractionPipeline,
model: 'AutoModelForImageFeatureExtraction',
processor: 'AutoProcessor',
default_model: 'Xenova/vit-base-patch16-224-in21k'
})
register_pipeline('audio-feature-extraction', {
pipeline: CustomAudioFeatureExtractionPipeline,
model: 'ClapAudioModelWithProjection',
processor: 'AutoProcessor',
default_model: 'Xenova/larger_clap_music_and_speech'
})
register_pipeline('mask-generation', {
pipeline: CustomMaskGenerationPipeline,
model: 'SamModel',
processor: 'AutoProcessor',
default_model: 'Xenova/1_slimsam-77-uniform'
})
register_pipeline('image-segmentation', {
pipeline: CustomImageSegmentationPipeline,
model: ['AutoModelForImageSegmentation', 'AutoModelForSemanticSegmentation', 'AutoModel'],
processor: 'AutoProcessor',
default_model: 'briaai/RMBG-1.4'
})
register_pipeline('object-detection', {
pipeline: CustomObjectDetectionPipeline,
model: ['AutoModelForObjectDetection', 'AutoModel'],
processor: 'AutoProcessor',
default_model: 'Xenova/1_gelan-c_all'
})
register_pipeline('zero-shot-image-segmentation', {
pipeline: CustomZeroShotImageSegmentationPipeline,
model: 'CLIPSegForImageSegmentation',
processor: 'AutoProcessor',
tokenizer: 'AutoTokenizer',
default_model: 'Xenova/1_clipseg-rd64-refined'
})
register_pipeline('text-classification', {
pipeline: CustomTextClassificationPipeline,
model: 'AutoModelForSequenceClassification',
tokenizer: 'AutoTokenizer',
default_model: 'mixedbread-ai/mxbai-rerank-xsmall-v1'
})
module.CustomImageFeatureExtractionPipeline = CustomImageFeatureExtractionPipeline
module.CustomAudioFeatureExtractionPipeline = CustomAudioFeatureExtractionPipeline
module.CustomMaskGenerationPipeline = CustomMaskGenerationPipeline
module.CustomImageSegmentationPipeline = CustomImageSegmentationPipeline
module.CustomObjectDetectionPipeline = CustomObjectDetectionPipeline
module.CustomZeroShotImageSegmentationPipeline = CustomZeroShotImageSegmentationPipeline
module.CustomTextClassificationPipeline = CustomTextClassificationPipeline
}
/** --- UNIT TEST --- */
async function runTests(local = true){
let pipe, res, ctx
if(local) setLocalEnv()
return
/*
console.log('%c ** test wav generation', 'background: blue; color: white');
pipe = await module.pipeline("text-to-speech", "Xenova/mms-tts-eng", {dispose_pipe: pipe})
res = await pipe('your dog is really cute')
res = new module.RawAudio(res.audio, res.sampling_rate)
console.log(res.toBlob())
// await res.save('test.wav')
console.log('%c ** test wav interleave', 'background: blue; color: white');
res = new module.RawAudio([new Float32Array([1, 2, 3]), new Float32Array([1, 2, 3])], 16000)
console.log(res.interleave(true).toString())
console.log('%c ** test getModelClassFromName', 'background: blue; color: white');
res = module.getModelClassFromName('AutoModel')
console.log(res)
console.log('%c ** test register_pipeline', 'background: blue; color: white');
class AudioFeatureExtractionPipeline extends module.Pipeline {
async _call(input, kwargs = {}) {
input = await module.read_audio(input).then(input=>this.processor(input))
let { audio_embeds } = await this.model(input)
return audio_embeds
}
}
module.register_pipeline('testing', {
pipeline: AudioFeatureExtractionPipeline,
model: 'ClapAudioModelWithProjection',
processor: 'AutoProcessor',
default_model: 'Xenova/larger_clap_music_and_speech'
})
pipe = await module.pipeline('testing', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav')
console.log(res)
console.log('%c ** test no warn log', 'background: blue; color: white');
if(module.env.version[0] == 3){
module.env.backends.onnx.logLevel = 'warning'
pipe = await module.pipeline("text-generation", "Xenova/distilgpt2", {model_file_name: "decoder_model_merged", dispose_pipe: pipe})
module.env.backends.onnx.logLevel = 'error'
pipe = await module.pipeline("text-generation", "Xenova/distilgpt2", {model_file_name: "decoder_model_merged", dispose_pipe: pipe})
}
console.log('%c ** test token classification aggregation_strategy / start end char', 'background: blue; color: white');
pipe = await module.pipeline("token-classification", "tarekziade/distilbert-NER", {dispose_pipe: pipe})
ctx = 'Hugging Face is a technology company that was founded in 2016 by Clément Delangue, Julien Chaumond, and Thomas Wolf.'
res = await pipe(ctx)
console.log(res)
res = await pipe(ctx, {aggregation_strategy: 'simple'})
console.log(res)
res = await pipe(ctx, {aggregation_strategy: 'average'})
console.log(res)
console.log(ctx.substring(res[1].start, res[1].end))
console.log('%c ** test question answering start end char / tokenizer get_offsets_mapping', 'background: blue; color: white');
pipe = await module.pipeline("question-answering", "Xenova/distilbert-base-uncased-distilled-squad", {dispose_pipe: pipe})
res = await pipe('how old am I?', 'My name is Thomas. I am 30 years old.')
console.log(res)
ctx = 'hello world, hello world, hello world user, user'
res = pipe.tokenizer(ctx, {return_offsets_mapping: true})
console.log(res)
res = pipe.tokenizer.get_offsets_mapping('hello world user', ctx)
console.log(res)
res = pipe.tokenizer.get_offsets_mapping('hello world user', ctx, 'closest')
console.log(res)
console.log(ctx.substring(res[0].at(0)[0], res[0].at(-1)[1]))
console.log('%c ** test register_pipeline updating task', 'background: blue; color: white');
module.register_pipeline('testing', {
pipeline: module.ImageFeatureExtractionPipeline,
model: 'AutoModelForImageFeatureExtraction',
processor: 'AutoProcessor',
default_model: 'Xenova/1_mobileclip_s0'
})
pipe = await module.pipeline('testing', '', {dispose_pipe: pipe})
console.log(typeof pipe.get_text_embeddings)
module.register_pipeline('testing', {
pipeline: module.CustomImageFeatureExtractionPipeline,
})
pipe = await module.pipeline('testing', '', {dispose_pipe: pipe})
console.log(typeof pipe.get_text_embeddings)
console.log('%c ** test custom image feature extraction', 'background: blue; color: white');
pipe = await pipeline('image-feature-extraction', 'Xenova/1_mobileclip_s0', {quantized: false, dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg')
ctx = await pipe.get_text_embeddings(['cats', 'dogs', 'birds'])
console.log(pipe.get_similarities(res, ctx))
console.log('%c ** test custom audio feature extraction', 'background: blue; color: white');
pipe = await pipeline('audio-feature-extraction', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav')
ctx = await pipe.get_text_embeddings(['cats', 'dogs', 'birds', 'man', 'woman'])
console.log(pipe.get_similarities(res, ctx))
console.log('%c ** test custom mask generation', 'background: blue; color: white');
pipe = await pipeline('mask-generation', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/corgi.jpg', [[340, 250]])
console.log(res)
module.CustomMaskGenerationPipeline.to_canvas(res)
*/
console.log('%c ** test custom image segmentation', 'background: blue; color: white');
pipe = await pipeline('image-segmentation', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/corgi.jpg')
console.log(res)
pipe = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg')
console.log(res)
console.log('%c ** test custom object detection', 'background: blue; color: white');
pipe = await pipeline('object-detection', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg')
console.log(res)
pipe = await pipeline('object-detection', 'Xenova/0_detr-resnet-50', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg')
console.log(res)
console.log('%c ** test custom zero-shot-image-segmentation', 'background: blue; color: white');
pipe = await pipeline('zero-shot-image-segmentation', '', {dispose_pipe: pipe})
res = await pipe('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg', ['cat', 'remote', 'wood', 'a jar'])
console.log(res)
}
/** --- MODULE --- */
(()=>{
setApis({IS_BROWSER_ENV: true});
module.TextToAudioPipeline.prototype.v2_call_text_to_waveform = module.TextToAudioPipeline.prototype._call_text_to_waveform
module.TextToAudioPipeline.prototype.v2_call_text_to_spectrogram = module.TextToAudioPipeline.prototype._call_text_to_spectrogram
module.TextToAudioPipeline.prototype._call_text_to_waveform = _call_text_to_waveform
module.TextToAudioPipeline.prototype._call_text_to_spectrogram = _call_text_to_spectrogram
module.saveBlob = saveBlob
module.RawAudio = RawAudio
populateMapping(module)
module.getModelClassFromName = getModelClassFromName
module.getClassNameFromClass = getClassNameFromClass
module.register_pipeline = register_pipeline
module.v3_pipeline = module.pipeline
module.pipeline = pipeline
module.PreTrainedTokenizer.prototype.get_offsets_mapping = get_offsets_mapping
module.PreTrainedTokenizer.prototype.v2_call = module.PreTrainedTokenizer.prototype._call
module.PreTrainedTokenizer.prototype._call = PreTrainedTokenizer_call
module.TokenClassificationPipeline.prototype.v2_call = module.TokenClassificationPipeline.prototype._call
module.TokenClassificationPipeline.prototype._call = TokenClassificationPipeline_call
module.QuestionAnsweringPipeline.prototype.v2_call = module.QuestionAnsweringPipeline.prototype._call
module.QuestionAnsweringPipeline.prototype._call = QuestionAnsweringPipeline_call
register_custom_pipelines(module)
// import2.register_v3_pipelines(module)
if(typeof import2 == 'object'){
Object.assign(
module,
...v2tasks.map(key => ({
[key]: import2[key]
}))
);
module.v2_pipeline = import2.pipeline
}
module.runTests = runTests
})()
export default module