@@ -1175,11 +1175,11 @@ class LargeList:
1175
1175
It is backed by `pyarrow.LargeListType`, which is like `pyarrow.ListType` but with 64-bit rather than 32-bit offsets.
1176
1176
1177
1177
Args:
1178
- dtype ([`FeatureType`]):
1178
+ feature ([`FeatureType`]):
1179
1179
Child feature data type of each item within the large list.
1180
1180
"""
1181
1181
1182
- dtype : Any
1182
+ feature : Any
1183
1183
id : Optional [str ] = None
1184
1184
# Automatically constructed
1185
1185
pa_type : ClassVar [Any ] = None
@@ -1218,8 +1218,6 @@ def _check_non_null_non_empty_recursive(obj, schema: Optional[FeatureType] = Non
1218
1218
pass
1219
1219
elif isinstance (schema , (list , tuple )):
1220
1220
schema = schema [0 ]
1221
- elif isinstance (schema , LargeList ):
1222
- schema = schema .dtype
1223
1221
else :
1224
1222
schema = schema .feature
1225
1223
return _check_non_null_non_empty_recursive (obj [0 ], schema )
@@ -1252,7 +1250,7 @@ def get_nested_type(schema: FeatureType) -> pa.DataType:
1252
1250
value_type = get_nested_type (schema [0 ])
1253
1251
return pa .list_ (value_type )
1254
1252
elif isinstance (schema , LargeList ):
1255
- value_type = get_nested_type (schema .dtype )
1253
+ value_type = get_nested_type (schema .feature )
1256
1254
return pa .large_list (value_type )
1257
1255
elif isinstance (schema , Sequence ):
1258
1256
value_type = get_nested_type (schema .feature )
@@ -1303,7 +1301,7 @@ def encode_nested_example(schema, obj, level=0):
1303
1301
return None
1304
1302
else :
1305
1303
if len (obj ) > 0 :
1306
- sub_schema = schema .dtype
1304
+ sub_schema = schema .feature
1307
1305
for first_elmt in obj :
1308
1306
if _check_non_null_non_empty_recursive (first_elmt , sub_schema ):
1309
1307
break
@@ -1384,7 +1382,7 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[Dict[str, Uni
1384
1382
if obj is None :
1385
1383
return None
1386
1384
else :
1387
- sub_schema = schema .dtype
1385
+ sub_schema = schema .feature
1388
1386
if len (obj ) > 0 :
1389
1387
for first_elmt in obj :
1390
1388
if _check_non_null_non_empty_recursive (first_elmt , sub_schema ):
@@ -1463,8 +1461,8 @@ def generate_from_dict(obj: Any):
1463
1461
raise ValueError (f"Feature type '{ _type } ' not found. Available feature types: { list (_FEATURE_TYPES .keys ())} " )
1464
1462
1465
1463
if class_type == LargeList :
1466
- dtype = obj .pop ("dtype " )
1467
- return LargeList (generate_from_dict (dtype ), ** obj )
1464
+ feature = obj .pop ("feature " )
1465
+ return LargeList (feature = generate_from_dict (feature ), ** obj )
1468
1466
if class_type == Sequence :
1469
1467
feature = obj .pop ("feature" )
1470
1468
return Sequence (feature = generate_from_dict (feature ), ** obj )
@@ -1493,8 +1491,8 @@ def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType:
1493
1491
return [feature ]
1494
1492
return Sequence (feature = feature )
1495
1493
elif isinstance (pa_type , pa .LargeListType ):
1496
- dtype = generate_from_arrow_type (pa_type .value_type )
1497
- return LargeList (dtype )
1494
+ feature = generate_from_arrow_type (pa_type .value_type )
1495
+ return LargeList (feature = feature )
1498
1496
elif isinstance (pa_type , _ArrayXDExtensionType ):
1499
1497
array_feature = [None , None , Array2D , Array3D , Array4D , Array5D ][pa_type .ndims ]
1500
1498
return array_feature (shape = pa_type .shape , dtype = pa_type .value_type )
@@ -1601,7 +1599,7 @@ def _visit(feature: FeatureType, func: Callable[[FeatureType], Optional[FeatureT
1601
1599
elif isinstance (feature , (list , tuple )):
1602
1600
out = func ([_visit (feature [0 ], func )])
1603
1601
elif isinstance (feature , LargeList ):
1604
- out = func (LargeList (_visit (feature .dtype , func )))
1602
+ out = func (LargeList (_visit (feature .feature , func )))
1605
1603
elif isinstance (feature , Sequence ):
1606
1604
out = func (Sequence (_visit (feature .feature , func ), length = feature .length ))
1607
1605
else :
@@ -1624,7 +1622,7 @@ def require_decoding(feature: FeatureType, ignore_decode_attribute: bool = False
1624
1622
elif isinstance (feature , (list , tuple )):
1625
1623
return require_decoding (feature [0 ])
1626
1624
elif isinstance (feature , LargeList ):
1627
- return require_decoding (feature .dtype )
1625
+ return require_decoding (feature .feature )
1628
1626
elif isinstance (feature , Sequence ):
1629
1627
return require_decoding (feature .feature )
1630
1628
else :
@@ -1644,7 +1642,7 @@ def require_storage_cast(feature: FeatureType) -> bool:
1644
1642
elif isinstance (feature , (list , tuple )):
1645
1643
return require_storage_cast (feature [0 ])
1646
1644
elif isinstance (feature , LargeList ):
1647
- return require_storage_cast (feature .dtype )
1645
+ return require_storage_cast (feature .feature )
1648
1646
elif isinstance (feature , Sequence ):
1649
1647
return require_storage_cast (feature .feature )
1650
1648
else :
@@ -1664,7 +1662,7 @@ def require_storage_embed(feature: FeatureType) -> bool:
1664
1662
elif isinstance (feature , (list , tuple )):
1665
1663
return require_storage_cast (feature [0 ])
1666
1664
elif isinstance (feature , LargeList ):
1667
- return require_storage_cast (feature .dtype )
1665
+ return require_storage_cast (feature .feature )
1668
1666
elif isinstance (feature , Sequence ):
1669
1667
return require_storage_cast (feature .feature )
1670
1668
else :
@@ -1876,8 +1874,8 @@ def to_yaml_inner(obj: Union[dict, list]) -> dict:
1876
1874
if isinstance (obj , dict ):
1877
1875
_type = obj .pop ("_type" , None )
1878
1876
if _type == "LargeList" :
1879
- value_type = obj .pop ("dtype " )
1880
- return simplify ({"large_list" : to_yaml_inner (value_type ), ** obj })
1877
+ _feature = obj .pop ("feature " )
1878
+ return simplify ({"large_list" : to_yaml_inner (_feature ), ** obj })
1881
1879
elif _type == "Sequence" :
1882
1880
_feature = obj .pop ("feature" )
1883
1881
return simplify ({"sequence" : to_yaml_inner (_feature ), ** obj })
@@ -1947,8 +1945,8 @@ def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:
1947
1945
return {}
1948
1946
_type = next (iter (obj ))
1949
1947
if _type == "large_list" :
1950
- _dtype = unsimplify (obj ).pop (_type )
1951
- return {"dtype " : from_yaml_inner (_dtype ), ** obj , "_type" : "LargeList" }
1948
+ _feature = unsimplify (obj ).pop (_type )
1949
+ return {"feature " : from_yaml_inner (_feature ), ** obj , "_type" : "LargeList" }
1952
1950
if _type == "sequence" :
1953
1951
_feature = unsimplify (obj ).pop (_type )
1954
1952
return {"feature" : from_yaml_inner (_feature ), ** obj , "_type" : "Sequence" }
@@ -2180,7 +2178,7 @@ def recursive_reorder(source, target, stack=""):
2180
2178
elif isinstance (source , LargeList ):
2181
2179
if not isinstance (target , LargeList ):
2182
2180
raise ValueError (f"Type mismatch: between { source } and { target } " + stack_position )
2183
- return LargeList (recursive_reorder (source .dtype , target .dtype , stack ))
2181
+ return LargeList (recursive_reorder (source .feature , target .feature , stack ))
2184
2182
else :
2185
2183
return source
2186
2184
0 commit comments