Source code for numbers_parser.cell

from __future__ import annotations

import logging
import math
import re
from dataclasses import asdict, dataclass, field, fields
from datetime import datetime, timedelta
from enum import IntEnum
from fractions import Fraction
from hashlib import sha1
from os.path import basename
from struct import pack, unpack
from typing import Any, NamedTuple
from warnings import warn

from sigfig import round as sigfig

from numbers_parser import __name__ as numbers_parser_name
from numbers_parser.constants import (
    CHECKBOX_FALSE_VALUE,
    CHECKBOX_TRUE_VALUE,
    CURRENCY_CELL_TYPE,
    CUSTOM_TEXT_PLACEHOLDER,
    DATETIME_FIELD_MAP,
    DECIMAL128_BIAS,
    DECIMAL_PLACES_AUTO,
    DEFAULT_ALIGNMENT,
    DEFAULT_BORDER_COLOR,
    DEFAULT_BORDER_STYLE,
    DEFAULT_BORDER_WIDTH,
    DEFAULT_DATETIME_FORMAT,
    DEFAULT_FONT,
    DEFAULT_FONT_SIZE,
    DEFAULT_TEXT_INSET,
    DEFAULT_TEXT_WRAP,
    EMPTY_STORAGE_BUFFER,
    EPOCH,
    MAX_BASE,
    MAX_SIGNIFICANT_DIGITS,
    PACKAGE_ID,
    SECONDS_IN_DAY,
    SECONDS_IN_HOUR,
    SECONDS_IN_WEEK,
    STAR_RATING_VALUE,
    CellPadding,
    CellType,
    ControlFormattingType,
    CustomFormattingType,
    DurationStyle,
    DurationUnits,
    FormattingType,
    FormatType,
    FractionAccuracy,
    NegativeNumberStyle,
    PaddingType,
)
from numbers_parser.currencies import CURRENCIES, CURRENCY_SYMBOLS
from numbers_parser.exceptions import UnsupportedError, UnsupportedWarning
from numbers_parser.formula import Formula
from numbers_parser.generated import TSPMessages_pb2 as TSPMessages
from numbers_parser.generated import TSTArchives_pb2 as TSTArchives
from numbers_parser.generated.TSWPArchives_pb2 import (
    ParagraphStylePropertiesArchive as ParagraphStyle,
)
from numbers_parser.numbers_cache import Cacheable, cache
from numbers_parser.numbers_uuid import NumbersUUID
from numbers_parser.xrefs import xl_range

logger = logging.getLogger(numbers_parser_name)
debug = logger.debug


__all__ = [
    "RGB",
    "Alignment",
    "BackgroundImage",
    "BoolCell",
    "Border",
    "BorderType",
    "BulletedTextCell",
    "Cell",
    "CellBorder",
    "CustomFormatting",
    "DateCell",
    "DurationCell",
    "EmptyCell",
    "ErrorCell",
    "Formatting",
    "HorizontalJustification",
    "MergeAnchor",
    "MergeReference",
    "MergedCell",
    "NumberCell",
    "RichTextCell",
    "Style",
    "TextCell",
    "VerticalJustification",
]



[docs]
class BackgroundImage:
    """
    A named document style that can be applied to cells.

    .. code-block:: python

        fh = open("cats.png", mode="rb")
        image_data = fh.read()
        cats_bg = doc.add_style(
            name="Cats",
            bg_image=BackgroundImage(image_data, "cats.png")
        )
        table.write(0, 0, "❤️ cats", style=cats_bg)

    Currently only standard image files and not 'advanced' image fills are
    supported. Tiling and scaling is not reported back and cannot be changed
    when saving new cells.

    Parameters
    ----------
    data: bytes
        Raw image data for a cell background image.
    filename: str
        Path to the image file.

    """

    def __init__(self, data: bytes | None = None, filename: str | None = None) -> None:
        self._data = data
        self._filename = basename(filename)

    @property
    def data(self) -> bytes:
        """bytes: The background image as bytes for a cell, or None if no image."""
        return self._data

    @property
    def filename(self) -> str:
        """str: The image filename for a cell, or None if no image."""
        return self._filename



class HorizontalJustification(IntEnum):
    LEFT = ParagraphStyle.TextAlignmentType.TATvalue0
    RIGHT = ParagraphStyle.TextAlignmentType.TATvalue1
    CENTER = ParagraphStyle.TextAlignmentType.TATvalue2
    JUSTIFIED = ParagraphStyle.TextAlignmentType.TATvalue3
    AUTO = ParagraphStyle.TextAlignmentType.TATvalue4


class VerticalJustification(IntEnum):
    TOP = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue0
    MIDDLE = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue1
    BOTTOM = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue2


HORIZONTAL_MAP = {
    "left": HorizontalJustification.LEFT,
    "right": HorizontalJustification.RIGHT,
    "center": HorizontalJustification.CENTER,
    "justified": HorizontalJustification.JUSTIFIED,
    "auto": HorizontalJustification.AUTO,
}

VERTICAL_MAP = {
    "top": VerticalJustification.TOP,
    "middle": VerticalJustification.MIDDLE,
    "bottom": VerticalJustification.BOTTOM,
}


class _Alignment(NamedTuple):
    """Class for internal alignment type."""

    horizontal: str
    vertical: str


class Alignment(_Alignment):
    def __new__(cls, horizontal=DEFAULT_ALIGNMENT[0], vertical=DEFAULT_ALIGNMENT[1]):
        if isinstance(horizontal, str):
            horizontal = horizontal.lower()
            if horizontal not in HORIZONTAL_MAP:
                msg = "invalid horizontal alignment"
                raise TypeError(msg)
            horizontal = HORIZONTAL_MAP[horizontal]

        if isinstance(vertical, str):
            vertical = vertical.lower()
            if vertical not in VERTICAL_MAP:
                msg = "invalid vertical alignment"
                raise TypeError(msg)
            vertical = VERTICAL_MAP[vertical]

        return super(_Alignment, cls).__new__(cls, (horizontal, vertical))


DEFAULT_ALIGNMENT_CLASS = Alignment(*DEFAULT_ALIGNMENT)


class RGB(NamedTuple):
    """A color in RGB."""

    r: int
    g: int
    b: int


def default_color() -> RGB:
    return RGB(0, 0, 0)



[docs]
@dataclass
class Style:
    """
    A named document style that can be applied to cells.

    Parameters
    ----------
    alignment: Alignment, optional, default: Alignment("auto", "top")
        Horizontal and vertical alignment of the cell
    bg_color: RGB | List[RGB], optional, default: RGB(0, 0, 0)
        Background color or list of colors for gradients
    bold: bool, optional, default: False
        ``True`` if the cell font is bold
    font_color: RGB, optional, default: RGB(0, 0, 0)) – Font color
    font_size: float, optional, default: DEFAULT_FONT_SIZE
        Font size in points
    font_name: str, optional, default: DEFAULT_FONT_SIZE
        Font name
    italic: bool, optional, default: False
        ``True`` if the cell font is italic
    name: str, optional
        Style name
    underline: bool, optional, default: False) – True if the
        cell font is underline
    strikethrough: bool, optional, default: False) – True if
        the cell font is strikethrough
    first_indent: float, optional, default: 0.0) – First line
        indent in points
    left_indent: float, optional, default: 0.0
        Left indent in points
    right_indent: float, optional, default: 0.0
        Right indent in points
    text_inset: float, optional, default: DEFAULT_TEXT_INSET
        Text inset in points
    text_wrap: str, optional, default: True
        ``True`` if text wrapping is enabled

    Raises
    ------
    TypeError:
        If arguments do not match the specified type or for objects have invalid arguments
    IndexError:
        If an image filename already exists in document

    """

    alignment: Alignment = DEFAULT_ALIGNMENT_CLASS  # : horizontal and vertical alignment
    bg_image: object = None  # : backgroung image
    bg_color: RGB | list[RGB] = None
    font_color: RGB = field(default_factory=default_color)
    font_size: float = DEFAULT_FONT_SIZE
    font_name: str = DEFAULT_FONT
    bold: bool = False
    italic: bool = False
    strikethrough: bool = False
    underline: bool = False
    first_indent: float = 0
    left_indent: float = 0
    right_indent: float = 0
    text_inset: float = DEFAULT_TEXT_INSET
    text_wrap: bool = DEFAULT_TEXT_WRAP
    name: str = None
    _text_style_obj_id: int = None
    _cell_style_obj_id: int = None
    _update_cell_style: bool = False
    _update_text_style: bool = False

    @staticmethod
    def _text_attrs():
        return [
            "alignment",
            "bold",
            "first_indent",
            "font_color",
            "font_name",
            "font_size",
            "italic",
            "left_indent",
            "name",
            "right_indent",
            "strikethrough",
            "text_inset",
            "underline",
        ]

    @staticmethod
    def _cell_attrs():
        return [
            "alignment",
            "bg_color",
            "bg_image",
            "first_indent",
            "left_indent",
            "right_indent",
            "text_inset",
            "text_wrap",
        ]

    @classmethod
    def from_storage(cls, cell: object, model: object):
        bg_image = BackgroundImage(*cell._image_data) if cell._image_data is not None else None
        return Style(
            alignment=model.cell_alignment(cell),
            bg_image=bg_image,
            bg_color=model.cell_bg_color(cell),
            font_color=model.cell_font_color(cell),
            font_size=model.cell_font_size(cell),
            font_name=model.cell_font_name(cell),
            bold=model.cell_is_bold(cell),
            italic=model.cell_is_italic(cell),
            strikethrough=model.cell_is_strikethrough(cell),
            underline=model.cell_is_underline(cell),
            name=model.cell_style_name(cell),
            first_indent=model.cell_first_indent(cell),
            left_indent=model.cell_left_indent(cell),
            right_indent=model.cell_right_indent(cell),
            text_inset=model.cell_text_inset(cell),
            text_wrap=model.cell_text_wrap(cell),
            _text_style_obj_id=model.text_style_object_id(cell),
            _cell_style_obj_id=model.cell_style_object_id(cell),
        )

    def __post_init__(self):
        self.bg_color = rgb_color(self.bg_color)
        self.font_color = rgb_color(self.font_color)

        if not isinstance(self.font_size, float):
            msg = "size must be a float number of points"
            raise TypeError(msg)
        if not isinstance(self.font_name, str):
            msg = "font name must be a string"
            raise TypeError(msg)

        for attr in ["bold", "italic", "underline", "strikethrough"]:
            if not isinstance(getattr(self, attr), bool):
                msg = f"{attr} argument must be boolean"
                raise TypeError(msg)

    def __setattr__(self, name: str, value: Any) -> None:
        """
        Detect changes to cell styles and flag the style for
        possible updates when saving the document.
        """
        if name in ["bg_color", "font_color"]:
            value = rgb_color(value)
        if name == "alignment":
            value = alignment(value)
        if name in Style._text_attrs():
            self.__dict__["_update_text_style"] = True
        if name in Style._cell_attrs():
            self.__dict__["_update_cell_style"] = True

        if name not in ["_update_text_style", "_update_cell_style"]:
            self.__dict__[name] = value



def rgb_color(color) -> RGB:
    """Raise a TypeError if a color is not a valid RGB value."""
    if color is None:
        return None
    if isinstance(color, RGB):
        return color
    if isinstance(color, tuple):
        if not (len(color) == 3 and all(isinstance(x, int) for x in color)):
            msg = "RGB color must be an RGB or a tuple of 3 integers"
            raise TypeError(msg)
        return RGB(*color)
    if isinstance(color, list):
        return [rgb_color(c) for c in color]
    msg = "RGB color must be an RGB or a tuple of 3 integers"
    raise TypeError(msg)


def alignment(value) -> Alignment:
    """Raise a TypeError if a alignment is not a valid."""
    if value is None:
        return Alignment()
    if isinstance(value, Alignment):
        return value
    if isinstance(value, tuple):
        if not (len(value) == 2 and all(isinstance(x, (int, str)) for x in value)):
            msg = "Alignment must be an Alignment or a tuple of 2 integers/strings"
            raise TypeError(msg)
        return Alignment(*value)
    msg = "Alignment must be an Alignment or a tuple of 2 integers/strings"
    raise TypeError(msg)


BORDER_STYLE_MAP = {"solid": 0, "dashes": 1, "dots": 2, "none": 3}


class BorderType(IntEnum):
    SOLID = BORDER_STYLE_MAP["solid"]
    DASHES = BORDER_STYLE_MAP["dashes"]
    DOTS = BORDER_STYLE_MAP["dots"]
    NONE = BORDER_STYLE_MAP["none"]



[docs]
class Border:
    """
    Create a cell border to use with the :py:class:`~numbers_parser.Table` method
    :py:meth:`~numbers_parser.Table.set_cell_border`.

    .. code-block:: python

        border_style = Border(8.0, RGB(29, 177, 0), "solid")
        table.set_cell_border("B6", "left", border_style, 3)
        table.set_cell_border(6, 1, "right", border_style)

    Parameters
    ----------
    width: float, optional, default: 0.35
        Number of rows in the first table of a new document.
    color: RGB, optional, default: RGB(0, 0, 0)
        The line color for the border if present
    style: BorderType, optional, default: ``None``
        The type of border to create or ``None`` if there is no border defined. Valid
        border types are:

        * ``"solid"``: a solid line
        * ``"dashes"``: a dashed line
        * ``"dots"``: a dotted line

    Raises
    ------
    TypeError:
        If the width is not a float, or the border type is invalid.

    """

    def __init__(
        self,
        width: float = DEFAULT_BORDER_WIDTH,
        color: RGB = None,
        style: BorderType = None,
        _order: int = 0,
    ) -> None:
        if not isinstance(width, float):
            msg = "width must be a float number of points"
            raise TypeError(msg)
        self.width = width

        if color is None:
            color = RGB(*DEFAULT_BORDER_COLOR)
        self.color = rgb_color(color)

        if style is None:
            style = BorderType(BORDER_STYLE_MAP[DEFAULT_BORDER_STYLE])
        if isinstance(style, str):
            style = style.lower()
            if style not in BORDER_STYLE_MAP:
                msg = "invalid border style"
                raise TypeError(msg)
            self.style = BORDER_STYLE_MAP[style]
        else:
            self.style = style

        self._order = _order

    def __str__(self) -> str:
        style_name = BorderType(self.style).name.lower()
        return f"Border(width={self.width}, color={self.color}, style={style_name})"

    def __eq__(self, value: object) -> bool:
        return all(
            [self.width == value.width, self.color == value.color, self.style == value.style],
        )



class CellBorder:
    def __init__(
        self,
        top_merged: bool = False,
        right_merged: bool = False,
        bottom_merged: bool = False,
        left_merged: bool = False,
    ) -> None:
        self._top = None
        self._right = None
        self._bottom = None
        self._left = None
        self._top_merged = top_merged
        self._right_merged = right_merged
        self._bottom_merged = bottom_merged
        self._left_merged = left_merged

    @property
    def top(self):
        if self._top_merged or self._top is None:
            return None
        return self._top

    @top.setter
    def top(self, value) -> None:
        if self._top is None or value._order > self.top._order:
            self._top = value

    @property
    def right(self):
        if self._right_merged or self._right is None:
            return None
        return self._right

    @right.setter
    def right(self, value) -> None:
        if self._right is None or value._order > self._right._order:
            self._right = value

    @property
    def bottom(self):
        if self._bottom_merged or self._bottom is None:
            return None
        return self._bottom

    @bottom.setter
    def bottom(self, value) -> None:
        if self._bottom is None or value._order > self._bottom._order:
            self._bottom = value

    @property
    def left(self):
        if self._left_merged or self._left is None:
            return None
        return self._left

    @left.setter
    def left(self, value) -> None:
        if self._left is None or value._order > self._left._order:
            self._left = value


class MergeReference:
    """Cell reference for cells eliminated by a merge."""

    def __init__(self, row_start: int, col_start: int, row_end: int, col_end: int) -> None:
        self.rect = (row_start, col_start, row_end, col_end)


class MergeAnchor:
    """Cell reference for the merged cell."""

    def __init__(self, size: tuple) -> None:
        self.size = size


@dataclass
class CellStorageFlags:
    _string_id: int = None
    _rich_id: int = None
    _cell_style_id: int = None
    _text_style_id: int = None
    _formula_id: int = None
    _control_id: int = None
    _formula_error_id: int = None
    _suggest_id: int = None
    _num_format_id: int = None
    _currency_format_id: int = None
    _date_format_id: int = None
    _duration_format_id: int = None
    _text_format_id: int = None
    _bool_format_id: int = None

    def __str__(self) -> str:
        fields = [
            f"{k[1:]}={v}" for k, v in asdict(self).items() if k.endswith("_id") and v is not None
        ]
        return ", ".join([x for x in fields if x if not None])

    def flags(self):
        return [x.name for x in fields(self)]



[docs]
class Cell(CellStorageFlags, Cacheable):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int, value) -> None:
        self._value = value
        self.row = row
        self.col = col
        self._is_bulleted = False
        self._formula_id = None
        self._storage = None
        self._style = None
        self._d128 = None
        self._double = None
        self._seconds = None
        super().__init__()

    def __str__(self) -> str:
        table_name = self._model.table_name(self._table_id)
        sheet_name = self._model.sheet_name(self._model.table_id_to_sheet_id(self._table_id))
        cell_str = f"{sheet_name}@{table_name}[{self.row},{self.col}]:"
        cell_str += f"table_id={self._table_id}, type={self._type.name}, "
        cell_str += f"value={self._value}, flags={self._flags:08x}, extras={self._extras:04x}"
        return ", ".join([cell_str, super().__str__()])

    @property
    def image_filename(self):
        warn(
            "image_filename is deprecated and will be removed in the future. "
            + "Please use the style property",
            DeprecationWarning,
            stacklevel=2,
        )
        if self.style is not None and self.style.bg_image is not None:
            return self.style.bg_image.filename
        return None

    @property
    def image_data(self):
        warn(
            "image_data is deprecated and will be removed in the future. "
            + "Please use the style property",
            DeprecationWarning,
            stacklevel=2,
        )
        if self.style is not None and self.style.bg_image is not None:
            return self.style.bg_image.data
        return None

    @property
    def is_formula(self) -> bool:
        """bool: ``True`` if the cell contains a formula."""
        return self._formula_id is not None

    @property
    def formula(self) -> str:
        """
        str: The formula in a cell.

        Formula evaluation relies on Numbers storing current values which should
        usually be the case. In cells containing a formula, :py:meth:`numbers_parser.Cell.value`
        returns computed value of the formula.

        Returns
        -------
            str:
                The text of the foruma in a cell, or `None` if there is no formula
                present in a cell.

        """
        if self._formula_id is not None:
            table_formulas = self._model.table_formulas(self._table_id)
            return table_formulas.formula(self._formula_id, self.row, self.col)
        return None

    @formula.setter
    def formula(self, value: str) -> None:
        self._formula_id = Formula.from_str(
            self._model,
            self._table_id,
            self.row,
            self.col,
            value,
        )
        self._model.add_formula_dependency(self.row, self.col, self._table_id)

    @property
    def is_bulleted(self) -> bool:
        """bool: ``True`` if the cell contains text bullets."""
        return self._is_bulleted

    @property
    def bullets(self) -> list[str] | None:
        r"""
        List[str] | None: The bullets in a cell, or ``None``.

        Cells that contain bulleted or numbered lists are identified
        by :py:attr:`numbers_parser.Cell.is_bulleted`. For these cells,
        :py:attr:`numbers_parser.Cell.value` returns the whole cell contents.
        Bullets can also be extracted into a list of paragraphs cell without the
        bullet or numbering character. Newlines are not included in the
        bullet list.

        Example
        -------
        .. code-block:: python

            doc = Document("bullets.numbers")
            sheets = doc.sheets
            tables = sheets[0].tables
            table = tables[0]
            if not table.cell(0, 1).is_bulleted:
                print(table.cell(0, 1).value)
            else:
                bullets = ["* " + s for s in table.cell(0, 1).bullets]
                print("\n".join(bullets))
                    return None

        """
        return None

    @property
    def formatted_value(self) -> str:
        """
        str: The formatted value of the cell as it appears in Numbers.

        Interactive elements are converted into a suitable text format where
        supported, or as their number values where there is no suitable
        visual representation. Currently supported mappings are:

        * Checkboxes are U+2610 (Ballow Box) or U+2611 (Ballot Box with Check)
        * Ratings are their star value represented using (U+2605) (Black Star)

        .. code-block:: python

            >>> table = doc.default_table
            >>> table.cell(0,0).value
            False
            >>> table.cell(0,0).formatted_value
            '☐'
            >>> table.cell(0,1).value
            True
            >>> table.cell(0,1).formatted_value
            '☑'
            >>> table.cell(1,1).value
            3.0
            >>> table.cell(1,1).formatted_value
            '★★★'
        """
        if self._duration_format_id is not None and self._double is not None:
            return self._duration_format()
        if self._date_format_id is not None and self._seconds is not None:
            return self._date_format()
        if (
            self._text_format_id is not None
            or self._num_format_id is not None
            or self._currency_format_id is not None
            or self._bool_format_id is not None
        ):
            return self._custom_format()
        return str(self.value).upper() if isinstance(self.value, bool) else str(self.value)

    @property
    def style(self) -> Style | None:
        """
        Style | None: The :class:`Style` associated with the cell or ``None``.

        Warns
        -----
            UnsupportedWarning: On assignment; use
                :py:meth:`numbers_parser.Table.set_cell_style` instead.

        """
        if self._style is None:
            self._style = Style.from_storage(self, self._model)
        return self._style

    @style.setter
    def style(self, _) -> None:
        warn(
            "cell style cannot be set; use Table.set_cell_style() instead",
            UnsupportedWarning,
            stacklevel=2,
        )

    @property
    def border(self) -> CellBorder | None:
        """
        CellBorder| None: The :class:`CellBorder` associated with the cell or ``None``.

        Warns
        -----
            UnsupportedWarning: On assignment; use
                :py:meth:`numbers_parser.Table.set_cell_border` instead.

        """
        self._model.extract_strokes(self._table_id)
        return self._border

    @border.setter
    def border(self, _) -> None:
        warn(
            "cell border values cannot be set; use Table.set_cell_border() instead",
            UnsupportedWarning,
            stacklevel=2,
        )

    @classmethod
    def _empty_cell(cls, table_id: int, row: int, col: int, model: object):
        return Cell._from_storage(table_id, row, col, EMPTY_STORAGE_BUFFER, model)

    @classmethod
    def _merged_cell(cls, table_id: int, row: int, col: int, model: object):
        cell = MergedCell(row, col)
        cell._model = model
        cell._table_id = table_id
        merge_cells = model.merge_cells(table_id)
        cell._set_merge(merge_cells.get((row, col)))
        return cell

    @classmethod
    def _from_value(cls, row: int, col: int, value):
        # TODO: write needs to retain/init the border
        if isinstance(value, str):
            cell = TextCell(row, col, value)
        elif isinstance(value, bool):
            cell = BoolCell(row, col, value)
        elif isinstance(value, int):
            cell = NumberCell(row, col, value)
        elif isinstance(value, float):
            rounded_value = sigfig(value, sigfigs=MAX_SIGNIFICANT_DIGITS, warn=False)
            if rounded_value != value:
                warn(
                    f"'{value}' rounded to {MAX_SIGNIFICANT_DIGITS} significant digits",
                    RuntimeWarning,
                    stacklevel=2,
                )
            cell = NumberCell(row, col, rounded_value)
        elif isinstance(value, datetime):
            cell = DateCell(row, col, value)
        elif isinstance(value, timedelta):
            cell = DurationCell(row, col, value)
        else:
            msg = "Can't determine cell type from type " + type(value).__name__
            raise ValueError(msg)  # noqa: TRY004

        return cell

    @classmethod
    def _from_storage(  # noqa: PLR0912
        cls,
        table_id: int,
        row: int,
        col: int,
        buffer: bytearray,
        model: object,
    ) -> None:
        d128 = None
        double = None
        seconds = None

        version = buffer[0]
        if version != 5:
            msg = f"Cell storage version {version} is unsupported"
            raise UnsupportedError(msg)

        offset = 12
        storage_flags = CellStorageFlags()
        flags = unpack("<i", buffer[8:12])[0]

        if flags & 0x1:
            d128 = _unpack_decimal128(buffer[offset : offset + 16])
            offset += 16
        if flags & 0x2:
            double = unpack("<d", buffer[offset : offset + 8])[0]
            offset += 8
        if flags & 0x4:
            seconds = unpack("<d", buffer[offset : offset + 8])[0]
            offset += 8
        if flags & 0x8:
            storage_flags._string_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x10:
            storage_flags._rich_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x20:
            storage_flags._cell_style_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x40:
            storage_flags._text_style_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x80:
            # cond_style_id skipped
            offset += 4
        # Skip flag 0x100 (cond_rule_style_id)
        if flags & 0x200:
            storage_flags._formula_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x400:
            storage_flags._control_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        # Skip flag 0x800 (formula_error_id)
        if flags & 0x1000:
            storage_flags._suggest_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        # Skip unused flags
        offset += 4 * bin(flags & 0x900).count("1")
        if flags & 0x2000:
            storage_flags._num_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x4000:
            storage_flags._currency_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x8000:
            storage_flags._date_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x10000:
            storage_flags._duration_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x20000:
            storage_flags._text_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        if flags & 0x40000:
            storage_flags._bool_format_id = unpack("<i", buffer[offset : offset + 4])[0]
            offset += 4
        # Skip 0x80000 (comment_id) and 0x100000 (import_warning_id)

        cell_type = buffer[1]
        if cell_type == TSTArchives.genericCellType:
            cell = EmptyCell(row, col)
        elif cell_type == TSTArchives.numberCellType:
            cell = NumberCell(row, col, d128)
        elif cell_type == TSTArchives.textCellType:
            cell = TextCell(row, col, model.table_string(table_id, storage_flags._string_id))
        elif cell_type == TSTArchives.dateCellType:
            cell = DateCell(row, col, EPOCH + timedelta(seconds=seconds))
            cell._datetime = cell._value
        elif cell_type == TSTArchives.boolCellType:
            cell = BoolCell(row, col, double > 0.0)
        elif cell_type == TSTArchives.durationCellType:
            cell = DurationCell(row, col, timedelta(seconds=double))
        elif cell_type == TSTArchives.formulaErrorCellType:
            cell = ErrorCell(row, col)
        elif cell_type == TSTArchives.automaticCellType:
            cell = RichTextCell(row, col, model.table_rich_text(table_id, storage_flags._rich_id))
        elif cell_type == CURRENCY_CELL_TYPE:
            cell = NumberCell(row, col, d128, cell_type=CellType.CURRENCY)
        else:
            msg = f"Cell type ID {cell_type} is not recognised"
            raise UnsupportedError(msg)

        cell._copy_flags(storage_flags)
        cell._buffer = buffer
        cell._model = model
        cell._table_id = table_id
        cell._d128 = d128
        cell._double = double
        cell._seconds = seconds
        cell._extras = unpack("<H", buffer[6:8])[0]
        cell._flags = flags

        merge_cells = model.merge_cells(table_id)
        cell._set_merge(merge_cells.get((row, col)))

        if logging.getLogger(__package__).level == logging.DEBUG:
            # Guard to reduce expense of computing fields
            debug(str(cell))

        return cell

    def _copy_flags(self, storage_flags: CellStorageFlags) -> None:
        for flag in storage_flags.flags():
            setattr(self, flag, getattr(storage_flags, flag))

    def _set_merge(self, merge_ref) -> None:
        if isinstance(merge_ref, MergeAnchor):
            self.is_merged = True
            self.size = merge_ref.size
            self.merge_range = None
            self.rect = None
            self._border = CellBorder()
        elif isinstance(merge_ref, MergeReference):
            self.is_merged = False
            self.size = None
            self.row_start = merge_ref.rect[0]
            self.col_start = merge_ref.rect[1]
            self.row_end = merge_ref.rect[2]
            self.col_end = merge_ref.rect[3]
            self.merge_range = xl_range(*merge_ref.rect)
            self.rect = merge_ref.rect
            top_merged = self.row > self.row_start
            right_merged = self.col < self.col_end
            bottom_merged = self.row < self.row_end
            left_merged = self.col > self.col_start
            self._border = CellBorder(top_merged, right_merged, bottom_merged, left_merged)
        else:
            self.is_merged = False
            self.size = (1, 1)
            self.merge_range = None
            self.rect = None
            self._border = CellBorder()

    def _to_buffer(self) -> bytearray:  # noqa: PLR0912, PLR0915
        """Create a storage buffer for a cell using v5 (modern) layout."""
        if self._style is not None:
            if self._style._text_style_obj_id is not None:
                self._text_style_id = self._model._table_styles.lookup_key(
                    self._table_id,
                    TSPMessages.Reference(identifier=self._style._text_style_obj_id),
                )
                self._model.add_component_reference(
                    self._style._text_style_obj_id,
                    component_id=self._model._table_styles.id(self._table_id),
                )

            if self._style._cell_style_obj_id is not None:
                self._cell_style_id = self._model._table_styles.lookup_key(
                    self._table_id,
                    TSPMessages.Reference(identifier=self._style._cell_style_obj_id),
                )
                self._model.add_component_reference(
                    self._style._cell_style_obj_id,
                    component_id=self._model._table_styles.id(self._table_id),
                )

        length = 12
        if isinstance(self, NumberCell):
            flags = 1
            length += 16
            if self._type == CellType.CURRENCY:
                cell_type = CURRENCY_CELL_TYPE
            else:
                cell_type = TSTArchives.numberCellType
            value = _pack_decimal128(self.value)
        elif isinstance(self, TextCell):
            flags = 8
            length += 4
            cell_type = TSTArchives.textCellType
            value = pack("<i", self._model.table_string_key(self._table_id, self.value))
        elif isinstance(self, DateCell):
            flags = 4
            length += 8
            cell_type = TSTArchives.dateCellType
            if self._value.tzinfo is None:
                date_delta = self._value - EPOCH
            else:
                date_delta = self._value - EPOCH.astimezone(self._value.tzinfo)
            value = pack("<d", float(date_delta.total_seconds()))
        elif isinstance(self, BoolCell):
            flags = 2
            length += 8
            cell_type = TSTArchives.boolCellType
            value = pack("<d", float(self.value))
        elif isinstance(self, DurationCell):
            flags = 2
            length += 8
            cell_type = TSTArchives.durationCellType
            value = pack("<d", float(self.value.total_seconds()))
        elif isinstance(self, EmptyCell):
            flags = 0
            cell_type = TSTArchives.emptyCellValueType
            value = b""
        elif isinstance(self, MergedCell):
            return None
        elif isinstance(self, RichTextCell):
            flags = 0
            length += 4
            cell_type = TSTArchives.automaticCellType
            value = pack("<i", self._rich_id)
        else:
            data_type = type(self).__name__
            table_name = self._model.table_name(self._table_id)
            table_ref = f"@{table_name}:[{self.row},{self.col}]"
            warn(
                f"{table_ref}: unsupported data type {data_type} for save",
                UnsupportedWarning,
                stacklevel=1,
            )
            return None

        storage = bytearray(12)
        storage[0] = 5
        storage[1] = cell_type
        storage += value

        if self._rich_id is not None:
            flags |= 0x10
            length += 4
            storage += pack("<i", self._rich_id)
        if self._cell_style_id is not None:
            flags |= 0x20
            length += 4
            storage += pack("<i", self._cell_style_id)
        if self._text_style_id is not None:
            flags |= 0x40
            length += 4
            storage += pack("<i", self._text_style_id)
        if self._formula_id is not None:
            flags |= 0x200
            length += 4
            storage += pack("<i", self._formula_id)
        if self._control_id is not None:
            flags |= 0x400
            length += 4
            storage += pack("<i", self._control_id)
        if self._suggest_id is not None:
            flags |= 0x1000
            length += 4
            storage += pack("<i", self._suggest_id)
        if self._num_format_id is not None:
            flags |= 0x2000
            length += 4
            storage += pack("<i", self._num_format_id)
            storage[6] |= 1
        if self._currency_format_id is not None:
            flags |= 0x4000
            length += 4
            storage += pack("<i", self._currency_format_id)
            storage[6] |= 2
        if self._date_format_id is not None:
            flags |= 0x8000
            length += 4
            storage += pack("<i", self._date_format_id)
            storage[6] |= 8
        if self._duration_format_id is not None:
            flags |= 0x10000
            length += 4
            storage += pack("<i", self._duration_format_id)
            storage[6] |= 4
        if self._text_format_id is not None:
            flags |= 0x20000
            length += 4
            storage += pack("<i", self._text_format_id)
        if self._bool_format_id is not None:
            flags |= 0x40000
            length += 4
            storage += pack("<i", self._bool_format_id)
            storage[6] |= 0x20
        if self._string_id is not None:
            storage[6] |= 0x80

        storage[8:12] = pack("<i", flags)
        if len(storage) < 32:
            storage += bytearray(32 - length)

        return storage[0:length]

    def _update_value(self, value, cell: object) -> None:
        if cell._type == CellType.NUMBER:
            self._d128 = value
        elif cell._type == CellType.DATE:
            self._datetime = value
        self._value = value

    @property
    @cache(num_args=0)
    def _image_data(self) -> tuple[bytes, str]:
        """Return the background image data for a cell or None if no image."""
        if self._cell_style_id is None:
            return None
        style = self._model.table_style(self._table_id, self._cell_style_id)
        if not style.cell_properties.cell_fill.HasField("image"):
            return None

        image_id = style.cell_properties.cell_fill.image.imagedata.identifier
        datas = self._model.objects[PACKAGE_ID].datas
        stored_filename = next(
            x.file_name for x in datas if x.identifier == image_id
        )  # pragma: nocover (issue-1333)
        preferred_filename = next(
            x.preferred_file_name for x in datas if x.identifier == image_id
        )  # pragma: nocover (issue-1333)
        all_paths = self._model.objects.file_store.keys()
        image_pathnames = [x for x in all_paths if x == f"Data/{stored_filename}"]

        if len(image_pathnames) == 0:
            warn(
                f"Cannot find file '{preferred_filename}' in Numbers archive",
                RuntimeWarning,
                stacklevel=3,
            )
            return None
        image_data = self._model.objects.file_store[image_pathnames[0]]
        digest = sha1(image_data).digest()  # noqa: S324
        if digest not in self._model._images:
            self._model._images[digest] = image_id

        return (image_data, preferred_filename)

    def _custom_format(self) -> str:  # noqa: PLR0911
        if self._text_format_id is not None and self._type == CellType.TEXT:
            custom_format = self._model.table_format(self._table_id, self._text_format_id)
        elif self._currency_format_id is not None:
            custom_format = self._model.table_format(self._table_id, self._currency_format_id)
        elif self._bool_format_id is not None and self._type == CellType.BOOL:
            custom_format = self._model.table_format(self._table_id, self._bool_format_id)
        elif self._num_format_id is not None:
            custom_format = self._model.table_format(self._table_id, self._num_format_id)
        else:
            return str(self.value)

        debug(
            "custom_format: @[%d,%d]: format_type=%s, ",
            self.row,
            self.col,
            custom_format.format_type,
        )

        if custom_format.HasField("custom_uid"):
            format_uuid = NumbersUUID(custom_format.custom_uid).hex
            format_map = self._model.custom_format_map()
            custom_format = format_map[format_uuid].default_format
            if custom_format.requires_fraction_replacement:
                formatted_value = _format_fraction(self._d128, custom_format)
            elif custom_format.format_type == FormatType.CUSTOM_TEXT:
                formatted_value = _decode_text_format(
                    custom_format,
                    self._model.table_string(self._table_id, self._string_id),
                )
            else:
                formatted_value = _decode_number_format(
                    custom_format,
                    self._d128,
                    format_map[format_uuid].name,
                )
        elif custom_format.format_type == FormatType.DECIMAL:
            return _format_decimal(self._d128, custom_format)
        elif custom_format.format_type == FormatType.CURRENCY:
            return _format_currency(self._d128, custom_format)
        elif custom_format.format_type == FormatType.BOOLEAN:
            return "TRUE" if self.value else "FALSE"
        elif custom_format.format_type == FormatType.PERCENT:
            return _format_decimal(self._d128 * 100, custom_format, percent=True)
        elif custom_format.format_type == FormatType.BASE:
            return _format_base(self._d128, custom_format)
        elif custom_format.format_type == FormatType.FRACTION:
            return _format_fraction(self._d128, custom_format)
        elif custom_format.format_type == FormatType.SCIENTIFIC:
            return _format_scientific(self._d128, custom_format)
        elif custom_format.format_type == FormatType.CHECKBOX:
            return CHECKBOX_TRUE_VALUE if self.value else CHECKBOX_FALSE_VALUE
        elif custom_format.format_type == FormatType.RATING:
            return STAR_RATING_VALUE * int(self._d128)
        else:
            formatted_value = str(self.value)
        return formatted_value

    def _date_format(self) -> str:
        date_format = self._model.table_format(self._table_id, self._date_format_id)
        if date_format.HasField("custom_uid"):
            format_uuid = NumbersUUID(date_format.custom_uid).hex
            format_map = self._model.custom_format_map()
            custom_format = format_map[format_uuid].default_format
            custom_format_string = custom_format.custom_format_string
            if custom_format.format_type == FormatType.CUSTOM_DATE:
                formatted_value = _decode_date_format(custom_format_string, self._datetime)
            else:
                warn(
                    f"Unexpected custom format type {custom_format.format_type}",
                    UnsupportedWarning,
                    stacklevel=3,
                )
                return ""
        else:
            formatted_value = _decode_date_format(date_format.date_time_format, self._datetime)
        return formatted_value

    def _duration_format(self) -> str:
        duration_format = self._model.table_format(self._table_id, self._duration_format_id)
        debug(
            "duration_format: @[%d,%d]: table_id=%d, duration_format_id=%d, duration_style=%s",
            self.row,
            self.col,
            self._table_id,
            self._duration_format_id,
            duration_format.duration_style,
        )

        duration_style = duration_format.duration_style
        unit_largest = duration_format.duration_unit_largest
        unit_smallest = duration_format.duration_unit_smallest
        if duration_format.use_automatic_duration_units:
            unit_smallest, unit_largest = _auto_units(self._double, duration_format)

        d = self._double
        dd = int(self._double)
        dstr = []

        def unit_in_range(largest, smallest, unit_type):
            return largest <= unit_type and smallest >= unit_type

        def pad_digits(d, largest, smallest, unit_type):
            return (largest == unit_type and smallest == unit_type) or d >= 10

        if unit_largest == DurationUnits.WEEK:
            dd = int(d / SECONDS_IN_WEEK)
            if unit_smallest != DurationUnits.WEEK:
                d -= SECONDS_IN_WEEK * dd
            dstr.append(str(dd) + _unit_format("week", dd, duration_style))

        if unit_in_range(unit_largest, unit_smallest, DurationUnits.DAY):
            dd = int(d / SECONDS_IN_DAY)
            if unit_smallest > DurationUnits.DAY:
                d -= SECONDS_IN_DAY * dd
            dstr.append(str(dd) + _unit_format("day", dd, duration_style))

        if unit_in_range(unit_largest, unit_smallest, DurationUnits.HOUR):
            dd = int(d / SECONDS_IN_HOUR)
            if unit_smallest > DurationUnits.HOUR:
                d -= SECONDS_IN_HOUR * dd
            dstr.append(str(dd) + _unit_format("hour", dd, duration_style))

        if unit_in_range(unit_largest, unit_smallest, DurationUnits.MINUTE):
            dd = int(d / 60)
            if unit_smallest > DurationUnits.MINUTE:
                d -= 60 * dd
            if duration_style == DurationStyle.COMPACT:
                pad = pad_digits(dd, unit_smallest, unit_largest, DurationUnits.MINUTE)
                dstr.append(("" if pad else "0") + str(dd))
            else:
                dstr.append(str(dd) + _unit_format("minute", dd, duration_style))

        if unit_in_range(unit_largest, unit_smallest, DurationUnits.SECOND):
            dd = int(d)
            if unit_smallest > DurationUnits.SECOND:
                d -= dd
            if duration_style == DurationStyle.COMPACT:
                pad = pad_digits(dd, unit_smallest, unit_largest, DurationUnits.SECOND)
                dstr.append(("" if pad else "0") + str(dd))
            else:
                dstr.append(str(dd) + _unit_format("second", dd, duration_style))

        if unit_smallest >= DurationUnits.MILLISECOND:
            dd = int(round(1000 * d))
            if duration_style == DurationStyle.COMPACT:
                padding = "0" if dd >= 10 else "00"
                padding = "" if dd >= 100 else padding
                dstr.append(f"{padding}{dd}")
            else:
                dstr.append(str(dd) + _unit_format("millisecond", dd, duration_style, "ms"))
        duration_str = (":" if duration_style == 0 else " ").join(dstr)
        if duration_style == DurationStyle.COMPACT:
            duration_str = re.sub(r":(\d\d\d)$", r".\1", duration_str)

        return duration_str

    def _set_formatting(
        self,
        format_id: int,
        format_type: FormattingType | CustomFormattingType,
        control_id: int | None = None,
        is_currency: bool = False,
    ) -> None:
        self._is_currency = is_currency
        if is_currency:
            self._type = CellType.CURRENCY
        if format_type == FormattingType.CURRENCY:
            self._currency_format_id = format_id
        elif format_type == FormattingType.TICKBOX:
            self._bool_format_id = format_id
            self._control_id = control_id
        elif format_type == FormattingType.RATING:
            self._num_format_id = format_id
            self._control_id = control_id
        elif format_type in [FormattingType.SLIDER, FormattingType.STEPPER]:
            if is_currency:
                self._currency_format_id = format_id
            else:
                self._num_format_id = format_id
            self._control_id = control_id
        elif format_type == FormattingType.POPUP:
            self._text_format_id = format_id
            self._control_id = control_id
        elif format_type in [FormattingType.DATETIME, CustomFormattingType.DATETIME]:
            self._date_format_id = format_id
        elif format_type in [FormattingType.TEXT, CustomFormattingType.TEXT]:
            self._text_format_id = format_id
        else:
            self._num_format_id = format_id



class NumberCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int, value: float, cell_type=CellType.NUMBER) -> None:
        self._type = cell_type
        super().__init__(row, col, value)

    @property
    def value(self) -> int:
        return self._value


class TextCell(Cell):
    def __init__(self, row: int, col: int, value: str) -> None:
        self._type = CellType.TEXT
        super().__init__(row, col, value)

    @property
    def value(self) -> str:
        return self._value



[docs]
class RichTextCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int, value) -> None:
        super().__init__(row, col, value["text"])
        self._type = CellType.RICH_TEXT
        self._bullets = value["bullets"]
        self._hyperlinks = value["hyperlinks"]
        if value["bulleted"]:
            self._formatted_bullets = [
                (
                    value["bullet_chars"][i] + " " + value["bullets"][i]
                    if value["bullet_chars"][i] is not None
                    else value["bullets"][i]
                )
                for i in range(len(self._bullets))
            ]
            self._is_bulleted = True

    @property
    def value(self) -> str:
        return self._value

    @property
    def bullets(self) -> list[str]:
        """List[str]: A list of the text bullets in the cell."""
        return self._bullets

    @property
    def formatted_bullets(self) -> str:
        """str: The bullets as a formatted multi-line string."""
        return self._formatted_bullets

    @property
    def hyperlinks(self) -> list[tuple] | None:
        """
        List[Tuple] | None: the hyperlinks in a cell or ``None``.

        Numbers does not support hyperlinks to cells within a spreadsheet, but does
        allow embedding links in cells. When cells contain hyperlinks,
        `numbers_parser` returns the text version of the cell. The `hyperlinks` property
        of cells where :py:attr:`numbers_parser.Cell.is_bulleted` is ``True`` is a
        list of text and URL tuples.

        Example
        -------
        .. code-block:: python

            cell = table.cell(0, 0)
            (text, url) = cell.hyperlinks[0]

        """
        return self._hyperlinks



# Backwards compatibility to earlier class names
class BulletedTextCell(RichTextCell):
    pass


class EmptyCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int) -> None:
        super().__init__(row, col, None)
        self._type = CellType.EMPTY

    @property
    def value(self) -> None:
        return None

    @property
    def formatted_value(self) -> str:
        return ""


class BoolCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int, value: bool) -> None:
        super().__init__(row, col, value)
        self._type = CellType.BOOL
        self._value = value

    @property
    def value(self) -> bool:
        return self._value


class DateCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int, value: datetime) -> None:
        super().__init__(row, col, value)
        self._type = CellType.DATE

    @property
    def value(self) -> datetime:
        return self._value


class DurationCell(Cell):
    def __init__(self, row: int, col: int, value: timedelta) -> None:
        super().__init__(row, col, value)
        self._type = CellType.DURATION

    @property
    def value(self) -> timedelta:
        return self._value


class ErrorCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int) -> None:
        super().__init__(row, col, None)
        self._type = CellType.ERROR

    @property
    def value(self) -> None:
        return None



[docs]
class MergedCell(Cell):
    """
    .. NOTE::

       Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
    """

    def __init__(self, row: int, col: int) -> None:
        super().__init__(row, col, None)
        self._type = CellType.MERGED

    @property
    def value(self) -> None:
        return None



def _pack_decimal128(value: float) -> bytearray:
    buffer = bytearray(16)
    exp = math.floor(math.log10(math.e) * math.log(abs(value))) if value != 0.0 else 0
    exp += DECIMAL128_BIAS - 16
    mantissa = abs(int(value / math.pow(10, exp - DECIMAL128_BIAS)))
    buffer[15] |= exp >> 7
    buffer[14] |= (exp & 0x7F) << 1
    i = 0
    while mantissa >= 1:
        buffer[i] = mantissa & 0xFF
        i += 1
        mantissa = int(mantissa / 256)
    if value < 0:
        buffer[15] |= 0x80
    return buffer


def _unpack_decimal128(buffer: bytearray) -> float:
    exp = (((buffer[15] & 0x7F) << 7) | (buffer[14] >> 1)) - DECIMAL128_BIAS
    mantissa = buffer[14] & 1
    for i in range(13, -1, -1):
        mantissa = mantissa * 256 + buffer[i]
    sign = 1 if buffer[15] & 0x80 else 0
    if sign == 1:
        mantissa = -mantissa
    value = mantissa * 10**exp
    return float(value)


def _decode_date_format_field(field: str, value: datetime) -> str:
    if field in DATETIME_FIELD_MAP:
        s = DATETIME_FIELD_MAP[field]
        if callable(s):
            return s(value)
        return value.strftime(s)
    warn(f"Unsupported field code '{field}'", UnsupportedWarning, stacklevel=4)
    return ""


def _decode_date_format(date_format, value):
    """Parse a custom date format string and return a formatted datetime value."""
    chars = [*date_format]
    index = 0
    in_string = False
    in_field = False
    result = ""
    field = ""
    while index < len(chars):
        current_char = chars[index]
        next_char = chars[index + 1] if index < len(chars) - 1 else None
        if current_char == "'":
            if next_char is None:
                break
            if chars[index + 1] == "'":
                result += "'"
                index += 2
            elif in_string:
                in_string = False
                index += 1
            else:
                in_string = True
                if in_field:
                    result += _decode_date_format_field(field, value)
                    in_field = False
                index += 1
        elif in_string:
            result += current_char
            index += 1
        elif not current_char.isalpha():
            if in_field:
                result += _decode_date_format_field(field, value)
                in_field = False
            result += current_char
            index += 1
        elif in_field:
            field += current_char
            index += 1
        else:
            in_field = True
            field = current_char
            index += 1
    if in_field:
        result += _decode_date_format_field(field, value)

    return result


def _decode_text_format(text_format, value: str):
    """Parse a custom date format string and return a formatted number value."""
    custom_format_string = text_format.custom_format_string
    return custom_format_string.replace(CUSTOM_TEXT_PLACEHOLDER, value)


def _expand_quotes(value: str) -> str:
    chars = [*value]
    index = 0
    in_string = False
    formatted_value = ""
    while index < len(chars):
        current_char = chars[index]
        next_char = chars[index + 1] if index < len(chars) - 1 else None
        if current_char == "'":
            if next_char is None:
                break
            if chars[index + 1] == "'":
                formatted_value += "'"
                index += 2
            elif in_string:
                in_string = False
                index += 1
            else:
                in_string = True
                index += 1
        else:
            formatted_value += current_char
            index += 1
    return formatted_value


def _decode_number_format(number_format, value, name):  # noqa: PLR0912
    """Parse a custom date format string and return a formatted number value."""
    custom_format_string = number_format.custom_format_string
    value *= number_format.scale_factor
    if "%" in custom_format_string and number_format.scale_factor == 1.0:
        # Per cent scale has 100x but % does not
        value *= 100.0

    if number_format.currency_code != "":
        # Replace currency code with symbol and no-break space
        custom_format_string = custom_format_string.replace(
            "\u00a4",
            number_format.currency_code + "\u00a0",
        )

    if (match := re.search(r"([#0.,]+(E[+]\d+)?)", custom_format_string)) is None:
        warn(
            f"Can't parse format string '{custom_format_string}'; skipping",
            UnsupportedWarning,
            stacklevel=1,
        )
        return custom_format_string
    format_spec = match.group(1)
    scientific_spec = match.group(2)

    if format_spec[0] == ".":
        (int_part, dec_part) = ("", format_spec[1:])
    elif "." in custom_format_string:
        (int_part, dec_part) = format_spec.split(".")
    else:
        (int_part, dec_part) = (format_spec, "")

    if scientific_spec is not None:
        # Scientific notation
        formatted_value = f"{value:.{len(dec_part) - 4}E}"
        formatted_value = custom_format_string.replace(format_spec, formatted_value)
        return _expand_quotes(formatted_value)

    num_decimals = len(dec_part)
    if num_decimals > 0:
        if dec_part[0] == "#":
            dec_pad = None
        elif number_format.num_nonspace_decimal_digits > 0:
            dec_pad = CellPadding.ZERO
        else:
            dec_pad = CellPadding.SPACE
    else:
        dec_pad = None
    dec_width = num_decimals

    (integer, decimal) = str(float(value)).split(".")
    if num_decimals > 0:
        integer = int(integer)
        decimal = round(float(f"0.{decimal}"), num_decimals)
    else:
        integer = round(value)
        decimal = float(f"0.{decimal}")

    num_integers = len(int_part.replace(",", ""))
    if not number_format.show_thousands_separator:
        int_part = int_part.replace(",", "")
    if num_integers > 0:
        if int_part[0] == "#":
            int_pad = None
            int_width = len(int_part)
        elif number_format.num_nonspace_integer_digits > 0:
            int_pad = CellPadding.ZERO
            if number_format.show_thousands_separator:
                num_commas = int(math.floor(math.log10(integer)) / 3) if integer != 0 else 0
                num_commas = max([num_commas, int((num_integers - 1) / 3)])
                int_width = num_integers + num_commas
            else:
                int_width = num_integers
        else:
            int_pad = CellPadding.SPACE
            int_width = len(int_part)
    else:
        int_pad = None
        int_width = num_integers

    int_pad_space_as_zero = False

    # Formatting integer zero:
    #   Blank (padded if needed) if int_pad is SPACE and no decimals
    #   No leading zero if:
    #     int_pad is NONE, dec_pad is SPACE
    #     int_pad is SPACE, dec_pad is SPACE
    #     int_pad is SPACE, dec_pad is ZERO
    #     int_pad is SPACE, dec_pad is NONE if num decimals < decimals length
    if integer == 0 and int_pad == CellPadding.SPACE and num_decimals == 0:
        formatted_value = "".rjust(int_width)
    elif integer == 0 and int_pad is None and dec_pad == CellPadding.SPACE:
        formatted_value = ""
    elif (integer == 0 and int_pad == CellPadding.SPACE and dec_pad is not None) or (
        integer == 0
        and int_pad == CellPadding.SPACE
        and dec_pad is None
        and len(str(decimal)) > num_decimals
    ):
        formatted_value = "".rjust(int_width)
    elif int_pad_space_as_zero or int_pad == CellPadding.ZERO:
        if number_format.show_thousands_separator:
            formatted_value = f"{integer:0{int_width},}"
        else:
            formatted_value = f"{integer:0{int_width}}"
    elif int_pad == CellPadding.SPACE:
        if number_format.show_thousands_separator:
            formatted_value = f"{integer:,}".rjust(int_width)
        else:
            formatted_value = str(integer).rjust(int_width)
    elif number_format.show_thousands_separator:
        formatted_value = f"{integer:,}"
    else:
        formatted_value = str(integer)

    if num_decimals:
        if dec_pad == CellPadding.ZERO or (dec_pad == CellPadding.SPACE and num_integers == 0):
            formatted_value += "." + f"{decimal:,.{dec_width}f}"[2:]
        elif dec_pad == CellPadding.SPACE and decimal == 0 and num_integers > 0:
            formatted_value += ".".ljust(dec_width + 1)
        elif dec_pad == CellPadding.SPACE:
            decimal_str = str(decimal)[2:]
            formatted_value += "." + decimal_str.ljust(dec_width)
        elif decimal or num_integers == 0:
            formatted_value += "." + str(decimal)[2:]

    formatted_value = custom_format_string.replace(format_spec, formatted_value)
    return _expand_quotes(formatted_value)


def _format_decimal(value: float, number_format, percent: bool = False) -> str:
    if value is None:
        return ""
    if value < 0 and number_format.negative_style == 1:
        accounting_style = False
        value = -value
    elif value < 0 and number_format.negative_style >= 2:
        accounting_style = True
        value = -value
    else:
        accounting_style = False
    thousands = "," if number_format.show_thousands_separator else ""

    if value.is_integer() and number_format.decimal_places >= DECIMAL_PLACES_AUTO:
        formatted_value = f"{int(value):{thousands}}"
    else:
        if number_format.decimal_places >= DECIMAL_PLACES_AUTO:
            formatted_value = str(sigfig(value, MAX_SIGNIFICANT_DIGITS, warn=False))
        else:
            formatted_value = sigfig(value, MAX_SIGNIFICANT_DIGITS, type=str, warn=False)
            formatted_value = sigfig(
                formatted_value,
                decimals=number_format.decimal_places,
                type=str,
            )
        if number_format.show_thousands_separator:
            formatted_value = sigfig(formatted_value, spacer=",", spacing=3, type=str)
            try:
                (integer, decimal) = formatted_value.split(".")
                formatted_value = integer + "." + decimal.replace(",", "")
            except ValueError:
                pass

    if percent:
        formatted_value += "%"

    if accounting_style:
        return f"({formatted_value})"
    return formatted_value


def _format_currency(value: float, number_format) -> str:
    formatted_value = _format_decimal(value, number_format)
    if number_format.currency_code in CURRENCY_SYMBOLS:
        symbol = CURRENCY_SYMBOLS[number_format.currency_code]
    else:
        symbol = number_format.currency_code + " "
    if number_format.use_accounting_style and value < 0:
        return f"{symbol}\t({formatted_value[1:]})"
    if number_format.use_accounting_style:
        return f"{symbol}\t{formatted_value}"
    return symbol + formatted_value


INT_TO_BASE_CHAR = [str(x) for x in range(10)] + [chr(x) for x in range(ord("A"), ord("Z") + 1)]


def _invert_bit_str(value: str) -> str:
    """Invert a binary value."""
    return "".join(["0" if b == "1" else "1" for b in value])


def _twos_complement(value: int, base: int) -> str:
    """Calculate the twos complement of a negative integer with minimum 32-bit precision."""
    num_bits = max([32, math.ceil(math.log2(abs(value))) + 1])
    bin_value = bin(abs(value))[2:]
    inverted_bin_value = _invert_bit_str(bin_value).rjust(num_bits, "1")
    twos_complement_dec = int(inverted_bin_value, 2) + 1

    if base == 2:
        return bin(twos_complement_dec)[2:].rjust(num_bits, "1")
    if base == 8:
        return oct(twos_complement_dec)[2:]
    return hex(twos_complement_dec)[2:].upper()


def _format_base(value: float, number_format) -> str:
    if value == 0:
        return "0".zfill(number_format.base_places)

    value = round(value)

    is_negative = False
    if not number_format.base_use_minus_sign and number_format.base in [2, 8, 16]:
        if value < 0:
            return _twos_complement(value, number_format.base)
        value = abs(value)
    elif value < 0:
        is_negative = True
        value = abs(value)

    formatted_value = []
    while value:
        formatted_value.append(int(value % number_format.base))
        value //= number_format.base
    formatted_value = "".join([INT_TO_BASE_CHAR[x] for x in formatted_value[::-1]])

    if is_negative:
        return "-" + formatted_value.zfill(number_format.base_places)
    return formatted_value.zfill(number_format.base_places)


def _format_fraction_parts_to(whole: int, numerator: int, denominator: int):
    if whole > 0:
        if numerator == 0:
            return str(whole)
        return f"{whole} {numerator}/{denominator}"
    if numerator == 0:
        return "0"
    if numerator == denominator:
        return "1"
    return f"{numerator}/{denominator}"


def _float_to_fraction(value: float, denominator: int) -> str:
    """Convert a float to the nearest fraction and return as a string."""
    whole = int(value)
    numerator = round(denominator * (value - whole))
    return _format_fraction_parts_to(whole, numerator, denominator)


def _float_to_n_digit_fraction(value: float, max_digits: int) -> str:
    """
    Convert a float to a fraction of a maxinum number of digits
    and return as a string.
    """
    max_denominator = 10**max_digits - 1
    (numerator, denominator) = (
        Fraction.from_float(value).limit_denominator(max_denominator).as_integer_ratio()
    )
    whole = int(value)
    numerator -= whole * denominator
    return _format_fraction_parts_to(whole, numerator, denominator)


def _format_fraction(value: float, number_format) -> str:
    accuracy = number_format.fraction_accuracy
    if accuracy & 0xFF000000:
        num_digits = 0x100000000 - accuracy
        return _float_to_n_digit_fraction(value, num_digits)
    return _float_to_fraction(value, accuracy)


def _format_scientific(value: float, number_format) -> str:
    formatted_value = sigfig(value, sigfigs=MAX_SIGNIFICANT_DIGITS, warn=False)
    return f"{formatted_value:.{number_format.decimal_places}E}"


def _unit_format(unit: str, value: int, style: int, abbrev: str | None = None):
    plural = "" if value == 1 else "s"
    if abbrev is None:
        abbrev = unit[0]
    if style == DurationStyle.COMPACT:
        return ""
    if style == DurationStyle.SHORT:
        return f"{abbrev}"
    return f" {unit}" + plural


def _auto_units(cell_value, number_format):
    unit_largest = number_format.duration_unit_largest
    unit_smallest = number_format.duration_unit_smallest

    if cell_value == 0:
        unit_largest = DurationUnits.DAY
        unit_smallest = DurationUnits.DAY
    else:
        if cell_value >= SECONDS_IN_WEEK:
            unit_largest = DurationUnits.WEEK
        elif cell_value >= SECONDS_IN_DAY:
            unit_largest = DurationUnits.DAY
        elif cell_value >= SECONDS_IN_HOUR:
            unit_largest = DurationUnits.HOUR
        elif cell_value >= 60:
            unit_largest = DurationUnits.MINUTE
        elif cell_value >= 1:
            unit_largest = DurationUnits.SECOND
        else:
            unit_largest = DurationUnits.MILLISECOND

        if math.floor(cell_value) != cell_value:
            unit_smallest = DurationUnits.MILLISECOND
        elif cell_value % 60:
            unit_smallest = DurationUnits.SECOND
        elif cell_value % SECONDS_IN_HOUR:
            unit_smallest = DurationUnits.MINUTE
        elif cell_value % SECONDS_IN_DAY:
            unit_smallest = DurationUnits.HOUR
        elif cell_value % SECONDS_IN_WEEK:
            unit_smallest = DurationUnits.DAY
        unit_smallest = max(unit_smallest, unit_largest)

    return unit_smallest, unit_largest


@dataclass()
class Formatting:
    allow_none: bool = False
    base_places: int = 0
    base_use_minus_sign: bool = True
    base: int = 10
    control_format: ControlFormattingType = ControlFormattingType.NUMBER
    currency_code: str = "GBP"
    date_time_format: str = DEFAULT_DATETIME_FORMAT
    decimal_places: int = None
    fraction_accuracy: FractionAccuracy = FractionAccuracy.THREE
    increment: float = 1.0
    maximum: float = 100.0
    minimum: float = 1.0
    popup_values: list[str] = field(default_factory=lambda: ["Item 1"])
    negative_style: NegativeNumberStyle = NegativeNumberStyle.MINUS
    show_thousands_separator: bool = False
    type: FormattingType = FormattingType.NUMBER
    use_accounting_style: bool = False
    _format_id = None

    def __post_init__(self):
        if not isinstance(self.type, FormattingType):
            type_name = type(self.type).__name__
            msg = f"Invalid format type '{type_name}'"
            raise TypeError(msg)

        if self.use_accounting_style and self.negative_style != NegativeNumberStyle.MINUS:
            warn(
                "use_accounting_style overriding negative_style",
                RuntimeWarning,
                stacklevel=4,
            )

        if self.type == FormattingType.DATETIME:
            formats = re.sub(r"[^a-zA-Z\s]", " ", self.date_time_format).split()
            for el in formats:
                if el not in DATETIME_FIELD_MAP:
                    msg = f"Invalid format specifier '{el}' in date/time format"
                    raise TypeError(msg)

        if self.type == FormattingType.CURRENCY and self.currency_code not in CURRENCIES:
            msg = f"Unsupported currency code '{self.currency_code}'"
            raise TypeError(msg)

        if self.decimal_places is None:
            if self.type == FormattingType.CURRENCY:
                self.decimal_places = 2
            else:
                self.decimal_places = DECIMAL_PLACES_AUTO

        if (
            self.type == FormattingType.BASE
            and not self.base_use_minus_sign
            and self.base not in (2, 8, 16)
        ):
            msg = f"base_use_minus_sign must be True for base {self.base}"
            raise TypeError(msg)

        if self.type == FormattingType.BASE and (self.base < 2 or self.base > MAX_BASE):
            msg = "base must be in range 2-36"
            raise TypeError(msg)


@dataclass
class CustomFormatting:
    type: CustomFormattingType = CustomFormattingType.NUMBER
    name: str = None
    integer_format: PaddingType = PaddingType.NONE
    decimal_format: PaddingType = PaddingType.NONE
    num_integers: int = 0
    num_decimals: int = 0
    show_thousands_separator: bool = False
    format: str = "%s"

    def __post_init__(self):
        if not isinstance(self.type, CustomFormattingType):
            type_name = type(self.type).__name__
            msg = f"Invalid format type '{type_name}'"
            raise TypeError(msg)

        if self.type == CustomFormattingType.TEXT and self.format.count("%s") > 1:
            msg = "Custom formats only allow one text substitution"
            raise TypeError(msg)

    @classmethod
    def from_archive(cls, archive: object):
        if archive.format_type == FormatType.CUSTOM_DATE:
            format_type = CustomFormattingType.DATETIME
        elif archive.format_type == FormatType.CUSTOM_NUMBER:
            format_type = CustomFormattingType.NUMBER
        else:
            format_type = CustomFormattingType.TEXT

        return CustomFormatting(name=archive.name, type=format_type)