| | import fs from 'fs'; |
| | import Ajv from 'ajv'; |
| |
|
| | export class ModelInfo { |
| | |
| | |
| | |
| | key: string; |
| | etag: string; |
| | lastModified: Date; |
| | size: number; |
| | modelId: string; |
| | author?: string; |
| | siblings: any[]; |
| | config: any; |
| | configTxt |
| |
|
| | ?: string; |
| | downloads?: number; |
| | naturalIdx: number; |
| | cardSource?: string; |
| | cardData?: any; |
| |
|
| | constructor(o: Partial<ModelInfo>) { |
| | Object.assign(this, o); |
| | this.config = this._loadConfig('ai_config.json'); |
| | } |
| |
|
| | private _loadConfig(filePath: string): any { |
| | try { |
| | const configData = fs.readFileSync(filePath, 'utf8'); |
| | return JSON.parse(configData); |
| | } catch (error) { |
| | console.error(`Failed to load config from ${filePath}:`, error); |
| | return { |
| | "model_name": "mistralai/Mistral-7B-Instruct-v0.2", |
| | "max_input_length": 4096, |
| | "safety_thresholds": { |
| | "memory": 85, |
| | "cpu": 90 |
| | } |
| | }; |
| | } |
| | } |
| |
|
| | get jsonUrl(): string { |
| | return `https://your-bucket-url/${this.key}`; |
| | } |
| |
|
| | get cdnJsonUrl(): string { |
| | return `https://cdn.your-bucket-url/${this.key}`; |
| | } |
| |
|
| | async validate(): Promise<Ajv.ErrorObject[] | undefined> { |
| | const jsonSchema = JSON.parse( |
| | await fs.promises.readFile('path/to/your/schema.json', 'utf8') |
| | ); |
| | const ajv = new Ajv(); |
| | ajv.validate(jsonSchema, this |
| |
|
| | .config); |
| | return ajv.errors ?? undefined; |
| | } |
| |
|
| | |
| | |
| | |
| | get readmeKey(): string { |
| | return this.key.replace("config.json", "README.md"); |
| | } |
| |
|
| | get readmeTrimmedKey(): string { |
| | return this.readmeKey.replace("S3_MODELS_PREFIX", ""); |
| | } |
| |
|
| | |
| | |
| | |
| | get mlFrameworks(): string[] { |
| | return Object.keys(FileType).filter(k => { |
| | const filename = FileType[k]; |
| | const isExtension = filename.startsWith("."); |
| | return isExtension |
| | ? this.siblings.some(sibling => sibling.rfilename.endsWith(filename)) |
| | : this.siblings.some(sibling => sibling.rfilename === filename); |
| | }); |
| | } |
| |
|
| | |
| | |
| | |
| | get autoArchitecture(): string { |
| | const useTF = this.mlFrameworks.includes("tf") && !this.mlFrameworks.includes("pytorch"); |
| | const arch = this.autoArchType[0]; |
| | return |
| |
|
| | useTF ? `TF${arch}` : arch; |
| | } |
| |
|
| | get autoArchType(): [string, string | undefined] { |
| | const architectures = this.config.architectures; |
| | if (!architectures || architectures.length === 0) { |
| | return ["AutoModel", undefined]; |
| | } |
| | const architecture = architectures[0].toString() as string; |
| | if (architecture.endsWith("ForQuestionAnswering")) { |
| | return ["AutoModelForQuestionAnswering", "question-answering"]; |
| | } |
| | else if (architecture.endsWith("ForTokenClassification")) { |
| | return ["AutoModelForTokenClassification", "token-classification"]; |
| | } |
| | else if (architecture endsWith("ForSequenceClassification")) { |
| | return ["AutoModelForSequenceClassification", "text-classification"]; |
| | } |
| | else if (architecture endsWith("ForMultipleChoice")) { |
| | return ["AutoModelForMultipleChoice", "multiple-choice"]; |
| | } |
| | else if (architecture endsWith("ForPreTraining")) { |
| | return ["AutoModelForPreTraining", "pretraining"]; |
| | } |
| | else if (architecture endsWith("ForMaskedLM")) { |
| | return ["AutoModelForMaskedLM", "masked-lm"]; |
| | } |
| | else if (architecture endsWith("ForCausalLM")) { |
| | return ["AutoModelForCausalLM", "causal-lm"]; |
| | } |
| | else if ( |
| | architecture endsWith("ForConditionalGeneration") |
| | || architecture endsWith("MTModel") |
| | || architecture == "EncoderDecoderModel" |
| | ) { |
| | return ["AutoModelForSeq2SeqLM", "seq2seq"]; |
| | } |
| | else if (architecture includes("LMHead")) { |
| | return ["AutoModelWithLMHead", "lm-head"]; |
| | } |
| | else if (architecture endsWith("Model")) { |
| | return ["AutoModel", undefined]; |
| | } |
| | else { |
| | return [architecture, undefined]; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | get tags(): string[] { |
| | const x = [ |
| | ...this.mlFrameworks, |
| | ]; |
| | if (this.config.model_type) { |
| | x.push(this.config.model_type); |
| | } |
| | const arch = this.autoArchType[1]; |
| | if (arch) { |
| | x.push(arch); |
| | } |
| | if (arch === "lm-head" && this.config.model_type) { |
| | if ( |
| | ["t5", "bart", "marian"].includes(this.config.model_type)) { |
| |
|
| |
|
| | x.push("seq2seq"); |
| | } |
| | else if (["gpt2", "ctrl", "openai-gpt", "xlnet", "transfo-xl", "reformer"].includes(this.config.model_type)) { |
| | x.push("causal-lm"); |
| | } |
| | else { |
| | x.push("masked-lm"); |
| | } |
| | } |
| | x.push(...this.languages() ?? []); |
| | x.push(...this.datasets().map(k => `dataset:${k}`)); |
| | for (let [k, v] of Object.entries(this.cardData ?? {})) { |
| | if (!['tags', 'license'].includes(k)) { |
| | |
| | continue; |
| | } |
| | if (typeof v === 'string') { |
| | v = [ v ]; |
| | } else if (Utils.isStrArray(v)) { |
| | |
| | } else { |
| | c.error(`Invalid ${k} tag type`, v); |
| | c.debug(this.modelId); |
| | |
| |
|
| | continue; |
| | } |
| | if (k === 'license') { |
| | x.push(...v.map(x => `license:${x.toLowerCase()}`)); |
| | } else { |
| | x.push(...v); |
| | } |
| | } |
| | if (this.config.task_specific_params) { |
| | const keys = Object.keys(this.config.task_specific_params); |
| | for (const key of keys) { |
| | x.push(`pipeline:${key}`); |
| | } |
| | } |
| | const explicit_ptag = this.cardData?.pipeline_tag; |
| | if (explicit_ptag) { |
| | if (typeof explicit_ptag === 'string') { |
| | x.push(`pipeline_tag:${explicit_ptag}`); |
| | } else { |
| | x.push(`pipeline_tag:invalid`); |
| | } |
| | } |
| | return [...new Set(x)]; |
| | } |
| |
|
| | get pipeline_tag(): (keyof typeof PipelineType) | undefined { |
| | if (isBlacklisted(this.modelId) || this.cardData?.inference === false) { |
| | return undefined; |
| | } |
| | |
| | const explicit_ptag = this.cardData?.pipeline_tag; |
| | if (explicit_ptag) { |
| | if (typeof explicit_ptag == 'string') { |
| | return explicit_ptag as keyof typeof PipelineType; |
| | } else { |
| | c.error(`Invalid explicit pipeline_tag`, explicit_ptag); |
| | return undefined; |
| | } |
| | } |
| | |
| | const tags = this.tags; |
| | |
| | |
| | const EXPLICIT_PREFIX = "pipeline:"; |
| | const explicit_tag = tags find(x => x.startsWith(EXPLICIT_PREFIX + `translation`)); |
| | if (!!explicit_tag) { |
| | return "translation"; |
| | } |
| | |
| | for (const ptag of ALL_PIPELINE_TYPES) { |
| | if (tags includes(ptag)) { |
| | return ptag; |
| | } |
| | } |
| | |
| | const mapping = new Map<string, keyof typeof PipelineType>([ |
| | ["seq2seq", "text-generation"], |
| | ["causal-lm", "text-generation"], |
| | ["masked-lm", "fill-mask"], |
| | ]); |
| | for (const [tag, ptag] of mapping) { |
| | if (tags includes(tag)) { |
| | return ptag; |
| | } |
| | } |
| | } |
| | } |