@@ -284,20 +284,67 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
284
284
########################
285
285
if model_card is not None :
286
286
287
- if "model_name" in model_card and metadata .name is None :
288
- # Not part of huggingface model card standard but notice some model creator using it
289
- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
290
- metadata .name = model_card .get ("model_name" )
287
+ def use_model_card_metadata (metadata_key : str , model_card_key : str ):
288
+ if model_card_key in model_card and getattr (metadata , metadata_key , None ) is None :
289
+ setattr (metadata , metadata_key , model_card .get (model_card_key ))
291
290
292
- if "model_creator" in model_card and metadata .author is None :
293
- # Not part of huggingface model card standard but notice some model creator using it
294
- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
295
- metadata .author = model_card .get ("model_creator" )
291
+ def use_array_model_card_metadata (metadata_key : str , model_card_key : str ):
292
+ # Note: Will append rather than replace if already exist
293
+ tags_value = model_card .get (model_card_key , None )
294
+ if tags_value is None :
295
+ return
296
296
297
- if "model_type" in model_card and metadata .basename is None :
298
- # Not part of huggingface model card standard but notice some model creator using it
299
- # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
300
- metadata .basename = model_card .get ("model_type" )
297
+ current_value = getattr (metadata , metadata_key , None )
298
+ if current_value is None :
299
+ current_value = []
300
+
301
+ if isinstance (tags_value , str ):
302
+ current_value .append (tags_value )
303
+ elif isinstance (tags_value , list ):
304
+ current_value .extend (tags_value )
305
+
306
+ setattr (metadata , metadata_key , current_value )
307
+
308
+ # LLAMA.cpp's direct internal convention
309
+ # (Definitely not part of hugging face formal/informal standard)
310
+ #########################################
311
+ use_model_card_metadata ("name" , "name" )
312
+ use_model_card_metadata ("author" , "author" )
313
+ use_model_card_metadata ("version" , "version" )
314
+ use_model_card_metadata ("organization" , "organization" )
315
+ use_model_card_metadata ("description" , "description" )
316
+ use_model_card_metadata ("finetune" , "finetune" )
317
+ use_model_card_metadata ("basename" , "basename" )
318
+ use_model_card_metadata ("size_label" , "size_label" )
319
+ use_model_card_metadata ("source_url" , "url" )
320
+ use_model_card_metadata ("source_doi" , "doi" )
321
+ use_model_card_metadata ("source_uuid" , "uuid" )
322
+ use_model_card_metadata ("source_repo_url" , "repo_url" )
323
+
324
+ # LLAMA.cpp's huggingface style convention
325
+ # (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
326
+ ###########################################
327
+ use_model_card_metadata ("name" , "model_name" )
328
+ use_model_card_metadata ("author" , "model_author" )
329
+ use_model_card_metadata ("version" , "model_version" )
330
+ use_model_card_metadata ("organization" , "model_organization" )
331
+ use_model_card_metadata ("description" , "model_description" )
332
+ use_model_card_metadata ("finetune" , "model_finetune" )
333
+ use_model_card_metadata ("basename" , "model_basename" )
334
+ use_model_card_metadata ("size_label" , "model_size_label" )
335
+ use_model_card_metadata ("source_url" , "model_url" )
336
+ use_model_card_metadata ("source_doi" , "model_doi" )
337
+ use_model_card_metadata ("source_uuid" , "model_uuid" )
338
+ use_model_card_metadata ("source_repo_url" , "model_repo_url" )
339
+
340
+ # Hugging Face Direct Convention
341
+ #################################
342
+
343
+ # Not part of huggingface model card standard but notice some model creator using it
344
+ # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
345
+ use_model_card_metadata ("name" , "model_name" )
346
+ use_model_card_metadata ("author" , "model_creator" )
347
+ use_model_card_metadata ("basename" , "model_type" )
301
348
302
349
if "base_model" in model_card :
303
350
# This represents the parent models that this is based on
@@ -329,58 +376,18 @@ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = No
329
376
base_model ["repo_url" ] = f"https://huggingface.co/{ org_component } /{ model_full_name_component } "
330
377
metadata .base_models .append (base_model )
331
378
332
- if "license" in model_card and metadata .license is None :
333
- metadata .license = model_card .get ("license" )
334
-
335
- if "license_name" in model_card and metadata .license_name is None :
336
- metadata .license_name = model_card .get ("license_name" )
337
-
338
- if "license_link" in model_card and metadata .license_link is None :
339
- metadata .license_link = model_card .get ("license_link" )
340
-
341
- tags_value = model_card .get ("tags" , None )
342
- if tags_value is not None :
343
-
344
- if metadata .tags is None :
345
- metadata .tags = []
346
-
347
- if isinstance (tags_value , str ):
348
- metadata .tags .append (tags_value )
349
- elif isinstance (tags_value , list ):
350
- metadata .tags .extend (tags_value )
351
-
352
- pipeline_tags_value = model_card .get ("pipeline_tag" , None )
353
- if pipeline_tags_value is not None :
354
-
355
- if metadata .tags is None :
356
- metadata .tags = []
357
-
358
- if isinstance (pipeline_tags_value , str ):
359
- metadata .tags .append (pipeline_tags_value )
360
- elif isinstance (pipeline_tags_value , list ):
361
- metadata .tags .extend (pipeline_tags_value )
362
-
363
- language_value = model_card .get ("languages" , model_card .get ("language" , None ))
364
- if language_value is not None :
365
-
366
- if metadata .languages is None :
367
- metadata .languages = []
368
-
369
- if isinstance (language_value , str ):
370
- metadata .languages .append (language_value )
371
- elif isinstance (language_value , list ):
372
- metadata .languages .extend (language_value )
379
+ use_model_card_metadata ("license" , "license" )
380
+ use_model_card_metadata ("license_name" , "license_name" )
381
+ use_model_card_metadata ("license_link" , "license_link" )
373
382
374
- dataset_value = model_card . get ( "datasets " , model_card . get ( "dataset" , None ) )
375
- if dataset_value is not None :
383
+ use_array_model_card_metadata ( "tags " , "tags" )
384
+ use_array_model_card_metadata ( "tags" , "pipeline_tag" )
376
385
377
- if metadata . datasets is None :
378
- metadata . datasets = []
386
+ use_array_model_card_metadata ( "languages" , "languages" )
387
+ use_array_model_card_metadata ( "languages" , "language" )
379
388
380
- if isinstance (dataset_value , str ):
381
- metadata .datasets .append (dataset_value )
382
- elif isinstance (dataset_value , list ):
383
- metadata .datasets .extend (dataset_value )
389
+ use_array_model_card_metadata ("datasets" , "datasets" )
390
+ use_array_model_card_metadata ("datasets" , "dataset" )
384
391
385
392
# Hugging Face Parameter Heuristics
386
393
####################################
0 commit comments