@@ -482,7 +482,6 @@ def _find_dr_record_by_name(vd, path, encoding):
482
482
return root_dir_record
483
483
484
484
splitpath = utils .split_path (path )
485
-
486
485
currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
487
486
488
487
entry = root_dir_record
@@ -505,7 +504,6 @@ def _find_dr_record_by_name(vd, path, encoding):
505
504
index = lo
506
505
if index != len (thelist ) and thelist [index ].file_ident == currpath :
507
506
child = thelist [index ]
508
-
509
507
if child is None :
510
508
# We failed to find this component of the path, so break out of the
511
509
# loop and fail.
@@ -520,7 +518,6 @@ def _find_dr_record_by_name(vd, path, encoding):
520
518
# We found the last child we are looking for; return it.
521
519
if not splitpath :
522
520
return child
523
-
524
521
if not child .is_dir ():
525
522
break
526
523
entry = child
@@ -705,8 +702,8 @@ def _seek_to_extent(self, extent):
705
702
self ._cdfp .seek (extent * self .logical_block_size )
706
703
707
704
@functools .lru_cache (maxsize = 256 )
708
- def _find_iso_record (self , iso_path ):
709
- # type: (bytes) -> dr.DirectoryRecord
705
+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
706
+ # type: (bytes, str ) -> dr.DirectoryRecord
710
707
"""
711
708
An internal method to find a directory record on the ISO given an ISO
712
709
path. If the entry is found, it returns the directory record object
@@ -718,11 +715,11 @@ def _find_iso_record(self, iso_path):
718
715
Returns:
719
716
The directory record entry representing the entry on the ISO.
720
717
"""
721
- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
718
+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
722
719
723
720
@functools .lru_cache (maxsize = 256 )
724
- def _find_rr_record (self , rr_path ):
725
- # type: (bytes) -> dr.DirectoryRecord
721
+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
722
+ # type: (bytes, str ) -> dr.DirectoryRecord
726
723
"""
727
724
An internal method to find a directory record on the ISO given a Rock
728
725
Ridge path. If the entry is found, it returns the directory record
@@ -742,7 +739,7 @@ def _find_rr_record(self, rr_path):
742
739
743
740
splitpath = utils .split_path (rr_path )
744
741
745
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
742
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
746
743
747
744
entry = root_dir_record
748
745
@@ -793,13 +790,13 @@ def _find_rr_record(self, rr_path):
793
790
if not child .is_dir ():
794
791
break
795
792
entry = child
796
- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
793
+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
797
794
798
795
raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
799
796
800
797
@functools .lru_cache (maxsize = 256 )
801
- def _find_joliet_record (self , joliet_path ):
802
- # type: (bytes) -> dr.DirectoryRecord
798
+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
799
+ # type: (bytes, str ) -> dr.DirectoryRecord
803
800
"""
804
801
An internal method to find a directory record on the ISO given a Joliet
805
802
path. If the entry is found, it returns the directory record object
@@ -813,7 +810,7 @@ def _find_joliet_record(self, joliet_path):
813
810
"""
814
811
if self .joliet_vd is None :
815
812
raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
816
- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
813
+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
817
814
818
815
@functools .lru_cache (maxsize = 256 )
819
816
def _find_udf_record (self , udf_path ):
@@ -2412,8 +2409,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
2412
2409
utils .copy_data (data_len , blocksize , data_fp , outfp )
2413
2410
2414
2411
def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2415
- joliet_path ):
2416
- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2412
+ joliet_path , encoding = None ):
2413
+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
2417
2414
"""
2418
2415
An internal method to fetch a single file from the ISO and write it out
2419
2416
to the file object.
@@ -2433,13 +2430,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
2433
2430
if joliet_path is not None :
2434
2431
if self .joliet_vd is None :
2435
2432
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2436
- found_record = self ._find_joliet_record (joliet_path )
2433
+ encoding = encoding or 'utf-16_be'
2434
+ found_record = self ._find_joliet_record (joliet_path , encoding )
2437
2435
elif rr_path is not None :
2438
2436
if not self .rock_ridge :
2439
2437
raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2440
- found_record = self ._find_rr_record (rr_path )
2438
+ encoding = encoding or 'utf-8'
2439
+ found_record = self ._find_rr_record (rr_path , encoding )
2441
2440
elif iso_path is not None :
2442
- found_record = self ._find_iso_record (iso_path )
2441
+ encoding = encoding or 'utf-8'
2442
+ found_record = self ._find_iso_record (iso_path , encoding )
2443
2443
else :
2444
2444
raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
2445
2445
@@ -3471,8 +3471,8 @@ def _rm_joliet_dir(self, joliet_path):
3471
3471
3472
3472
return num_bytes_to_remove
3473
3473
3474
- def _get_iso_entry (self , iso_path ):
3475
- # type: (bytes) -> dr.DirectoryRecord
3474
+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3475
+ # type: (bytes, str ) -> dr.DirectoryRecord
3476
3476
"""
3477
3477
Internal method to get the directory record for an ISO path.
3478
3478
@@ -3484,10 +3484,10 @@ def _get_iso_entry(self, iso_path):
3484
3484
if self ._needs_reshuffle :
3485
3485
self ._reshuffle_extents ()
3486
3486
3487
- return self ._find_iso_record (iso_path )
3487
+ return self ._find_iso_record (iso_path , encoding )
3488
3488
3489
- def _get_rr_entry (self , rr_path ):
3490
- # type: (bytes) -> dr.DirectoryRecord
3489
+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3490
+ # type: (bytes, str ) -> dr.DirectoryRecord
3491
3491
"""
3492
3492
Internal method to get the directory record for a Rock Ridge path.
3493
3493
@@ -3500,10 +3500,10 @@ def _get_rr_entry(self, rr_path):
3500
3500
if self ._needs_reshuffle :
3501
3501
self ._reshuffle_extents ()
3502
3502
3503
- return self ._find_rr_record (rr_path )
3503
+ return self ._find_rr_record (rr_path , encoding )
3504
3504
3505
- def _get_joliet_entry (self , joliet_path ):
3506
- # type: (bytes) -> dr.DirectoryRecord
3505
+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3506
+ # type: (bytes, str ) -> dr.DirectoryRecord
3507
3507
"""
3508
3508
Internal method to get the directory record for a Joliet path.
3509
3509
@@ -3516,7 +3516,7 @@ def _get_joliet_entry(self, joliet_path):
3516
3516
if self ._needs_reshuffle :
3517
3517
self ._reshuffle_extents ()
3518
3518
3519
- return self ._find_joliet_record (joliet_path )
3519
+ return self ._find_joliet_record (joliet_path , encoding )
3520
3520
3521
3521
def _get_udf_entry (self , udf_path ):
3522
3522
# type: (str) -> udfmod.UDFFileEntry
@@ -4183,6 +4183,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4183
4183
iso_path = None
4184
4184
rr_path = None
4185
4185
udf_path = None
4186
+ encoding = None
4186
4187
num_paths = 0
4187
4188
for key , value in kwargs .items ():
4188
4189
if key == 'blocksize' :
@@ -4213,6 +4214,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4213
4214
num_paths += 1
4214
4215
elif value is not None :
4215
4216
raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4217
+ elif key == 'encoding' :
4218
+ encoding = value
4216
4219
else :
4217
4220
raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
4218
4221
@@ -4223,7 +4226,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
4223
4226
self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
4224
4227
else :
4225
4228
self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4226
- joliet_path )
4229
+ joliet_path , encoding )
4227
4230
4228
4231
def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
4229
4232
# type: (str, str, int) -> None
@@ -5459,6 +5462,8 @@ def list_children(self, **kwargs):
5459
5462
if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
5460
5463
if value is not None :
5461
5464
num_paths += 1
5465
+ elif key in ('encoding' ):
5466
+ continue
5462
5467
else :
5463
5468
raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
5464
5469
@@ -5476,12 +5481,15 @@ def list_children(self, **kwargs):
5476
5481
else :
5477
5482
use_rr = False
5478
5483
if 'joliet_path' in kwargs :
5479
- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5484
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5485
+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
5480
5486
elif 'rr_path' in kwargs :
5481
- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5487
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5488
+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
5482
5489
use_rr = True
5483
5490
else :
5484
- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5491
+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5492
+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
5485
5493
5486
5494
for c in _yield_children (rec , use_rr ):
5487
5495
yield c
@@ -5626,8 +5634,8 @@ def rm_isohybrid(self):
5626
5634
5627
5635
self .isohybrid_mbr = None
5628
5636
5629
- def full_path_from_dirrecord (self , rec , rockridge = False ):
5630
- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5637
+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5638
+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
5631
5639
"""
5632
5640
Get the absolute path of a directory record.
5633
5641
@@ -5646,6 +5654,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5646
5654
if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
5647
5655
encoding = 'utf-16_be'
5648
5656
5657
+ if user_encoding :
5658
+ encoding = user_encoding
5649
5659
# A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
5650
5660
# Always return / here.
5651
5661
if rec .is_root :
@@ -5685,6 +5695,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
5685
5695
encoding = rec .file_ident .encoding
5686
5696
else :
5687
5697
encoding = 'utf-8'
5698
+ if user_encoding :
5699
+ encoding = user_encoding
5688
5700
udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
5689
5701
while udf_rec is not None :
5690
5702
ident = udf_rec .file_identifier ()
@@ -5893,13 +5905,13 @@ def walk(self, **kwargs):
5893
5905
while dirs :
5894
5906
dir_record = dirs .popleft ()
5895
5907
5896
- relpath = self .full_path_from_dirrecord (dir_record ,
5897
- rockridge = path_type == 'rr_path' )
5908
+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5909
+ user_encoding = user_encoding )
5898
5910
dirlist = []
5899
5911
filelist = []
5900
5912
dirdict = {}
5901
5913
5902
- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5914
+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
5903
5915
if child is None or child .is_dot () or child .is_dotdot ():
5904
5916
continue
5905
5917
0 commit comments