Source code for dfvfs.file_io.encoded_stream_io

# -*- coding: utf-8 -*-
"""The encoded stream file-like object implementation."""

import os

from dfvfs.encoding import manager as encoding_manager
from dfvfs.file_io import file_io
from dfvfs.lib import errors
from dfvfs.resolver import resolver


[docs] class EncodedStream(file_io.FileIO): """File input/output (IO) object of a encoded stream.""" # The size of the encoded data buffer. _ENCODED_DATA_BUFFER_SIZE = 8 * 1024 * 1024
[docs] def __init__(self, resolver_context, path_spec): """Initializes a file input/output (IO) object. Args: resolver_context (Context): resolver context. path_spec (PathSpec): a path specification. """ super(EncodedStream, self).__init__(resolver_context, path_spec) self._current_offset = 0 self._decoded_data = b'' self._decoded_data_offset = 0 self._decoded_data_size = 0 self._decoded_stream_size = None self._decoder = None self._encoded_data = b'' self._encoding_method = None self._file_object = None self._realign_offset = True
def _Close(self): """Closes the file-like object. If the file-like object was passed in the init function the encoded stream file-like object does not control the file-like object and should not actually close it. """ self._decoder = None self._decoded_data = b'' self._encoded_data = b'' self._file_object = None def _GetDecoder(self): """Retrieves the decoder. Returns: Decoder: decoder. """ return encoding_manager.EncodingManager.GetDecoder(self._encoding_method) def _GetDecodedStreamSize(self): """Retrieves the decoded stream size. Returns: int: decoded stream size. """ self._file_object.seek(0, os.SEEK_SET) self._decoder = self._GetDecoder() self._decoded_data = b'' encoded_data_offset = 0 encoded_data_size = self._file_object.get_size() decoded_stream_size = 0 while encoded_data_offset < encoded_data_size: read_count = self._ReadEncodedData(self._ENCODED_DATA_BUFFER_SIZE) if read_count == 0: break encoded_data_offset += read_count decoded_stream_size += self._decoded_data_size return decoded_stream_size def _Open(self, mode='rb'): """Opens the file-like object. Args: mode (Optional[str]): file access mode. Raises: AccessError: if the access to open the file was denied. IOError: if the file-like object could not be opened. OSError: if the file-like object could not be opened. PathSpecError: if the path specification is incorrect. """ if not self._path_spec.HasParent(): raise errors.PathSpecError( 'Unsupported path specification without parent.') self._encoding_method = getattr(self._path_spec, 'encoding_method', None) if self._encoding_method is None: raise errors.PathSpecError( 'Path specification missing encoding method.') self._file_object = resolver.Resolver.OpenFileObject( self._path_spec.parent, resolver_context=self._resolver_context) def _AlignDecodedDataOffset(self, decoded_data_offset): """Aligns the encoded file with the decoded data offset. Args: decoded_data_offset (int): decoded data offset. """ self._file_object.seek(0, os.SEEK_SET) self._decoder = self._GetDecoder() self._decoded_data = b'' encoded_data_offset = 0 encoded_data_size = self._file_object.get_size() while encoded_data_offset < encoded_data_size: read_count = self._ReadEncodedData(self._ENCODED_DATA_BUFFER_SIZE) if read_count == 0: break encoded_data_offset += read_count if decoded_data_offset < self._decoded_data_size: self._decoded_data_offset = decoded_data_offset break decoded_data_offset -= self._decoded_data_size def _ReadEncodedData(self, read_size): """Reads encoded data from the file-like object. Args: read_size (int): number of bytes of encoded data to read. Returns: int: number of bytes of encoded data read. """ encoded_data = self._file_object.read(read_size) read_count = len(encoded_data) self._encoded_data = b''.join([self._encoded_data, encoded_data]) self._decoded_data, self._encoded_data = ( self._decoder.Decode(self._encoded_data)) self._decoded_data_size = len(self._decoded_data) return read_count
[docs] def SetDecodedStreamSize(self, decoded_stream_size): """Sets the decoded stream size. This function is used to set the decoded stream size if it can be determined separately. Args: decoded_stream_size (int): size of the decoded stream in bytes. Raises: IOError: if the file-like object is already open. OSError: if the file-like object is already open. ValueError: if the decoded stream size is invalid. """ if self._is_open: raise IOError('Already open.') if decoded_stream_size < 0: raise ValueError(( f'Invalid decoded stream size: {decoded_stream_size:d} value out of ' f'bounds.')) self._decoded_stream_size = decoded_stream_size
# Note: that the following functions do not follow the style guide # because they are part of the file-like object interface. # pylint: disable=invalid-name
[docs] def read(self, size=None): """Reads a byte string from the file-like object at the current offset. The function will read a byte string of the specified size or all of the remaining data if no size was specified. Args: size (Optional[int]): number of bytes to read, where None is all remaining data. Returns: bytes: data read. Raises: IOError: if the read failed. OSError: if the read failed. """ if not self._is_open: raise IOError('Not opened.') if self._current_offset < 0: raise IOError(( f'Invalid current offset: {self._current_offset:d} value less than ' f'zero.')) if self._decoded_stream_size is None: self._decoded_stream_size = self._GetDecodedStreamSize() if self._decoded_stream_size < 0: raise IOError('Invalid decoded stream size.') if self._current_offset >= self._decoded_stream_size: return b'' if self._realign_offset: self._AlignDecodedDataOffset(self._current_offset) self._realign_offset = False if size is None: size = self._decoded_stream_size if self._current_offset + size > self._decoded_stream_size: size = self._decoded_stream_size - self._current_offset decoded_data = b'' if size == 0: return decoded_data while size > self._decoded_data_size: decoded_data = b''.join([ decoded_data, self._decoded_data[self._decoded_data_offset:]]) remaining_decoded_data_size = ( self._decoded_data_size - self._decoded_data_offset) self._current_offset += remaining_decoded_data_size size -= remaining_decoded_data_size if self._current_offset >= self._decoded_stream_size: break read_count = self._ReadEncodedData(self._ENCODED_DATA_BUFFER_SIZE) self._decoded_data_offset = 0 if read_count == 0: break if size > 0: slice_start_offset = self._decoded_data_offset slice_end_offset = slice_start_offset + size decoded_data = b''.join([ decoded_data, self._decoded_data[slice_start_offset:slice_end_offset]]) self._decoded_data_offset += size self._current_offset += size return decoded_data
[docs] def seek(self, offset, whence=os.SEEK_SET): """Seeks to an offset within the file-like object. Args: offset (int): offset to seek to. whence (Optional(int)): value that indicates whether offset is an absolute or relative position within the file. Raises: IOError: if the seek failed. OSError: if the seek failed. """ if not self._is_open: raise IOError('Not opened.') if self._current_offset < 0: raise IOError(( f'Invalid current offset: {self._current_offset:d} value less than ' f'zero.')) if whence == os.SEEK_CUR: offset += self._current_offset elif whence == os.SEEK_END: if self._decoded_stream_size is None: self._decoded_stream_size = self._GetDecodedStreamSize() if self._decoded_stream_size is None: raise IOError('Invalid decoded stream size.') offset += self._decoded_stream_size elif whence != os.SEEK_SET: raise IOError('Unsupported whence.') if offset < 0: raise IOError('Invalid offset value less than zero.') if offset != self._current_offset: self._current_offset = offset self._realign_offset = True
[docs] def get_offset(self): """Retrieves the current offset into the file-like object. Returns: int: current offset in the decoded stream. Raises: IOError: if the file-like object has not been opened. OSError: if the file-like object has not been opened. """ if not self._is_open: raise IOError('Not opened.') return self._current_offset
[docs] def get_size(self): """Retrieves the size of the file-like object. Returns: int: size of the decoded stream. Raises: IOError: if the file-like object has not been opened. OSError: if the file-like object has not been opened. """ if not self._is_open: raise IOError('Not opened.') if self._decoded_stream_size is None: self._decoded_stream_size = self._GetDecodedStreamSize() return self._decoded_stream_size