from __future__ import annotations
import logging
import math
import re
from dataclasses import asdict, dataclass, field, fields
from datetime import datetime, timedelta
from enum import IntEnum
from fractions import Fraction
from hashlib import sha1
from os.path import basename
from struct import pack, unpack
from typing import Any, NamedTuple
from warnings import warn
from sigfig import round as sigfig
from numbers_parser import __name__ as numbers_parser_name
from numbers_parser.constants import (
CHECKBOX_FALSE_VALUE,
CHECKBOX_TRUE_VALUE,
CURRENCY_CELL_TYPE,
CUSTOM_TEXT_PLACEHOLDER,
DATETIME_FIELD_MAP,
DECIMAL128_BIAS,
DECIMAL_PLACES_AUTO,
DEFAULT_ALIGNMENT,
DEFAULT_BORDER_COLOR,
DEFAULT_BORDER_STYLE,
DEFAULT_BORDER_WIDTH,
DEFAULT_DATETIME_FORMAT,
DEFAULT_FONT,
DEFAULT_FONT_SIZE,
DEFAULT_TEXT_INSET,
DEFAULT_TEXT_WRAP,
EMPTY_STORAGE_BUFFER,
EPOCH,
MAX_BASE,
MAX_SIGNIFICANT_DIGITS,
PACKAGE_ID,
SECONDS_IN_DAY,
SECONDS_IN_HOUR,
SECONDS_IN_WEEK,
STAR_RATING_VALUE,
CellPadding,
CellType,
ControlFormattingType,
CustomFormattingType,
DurationStyle,
DurationUnits,
FormattingType,
FormatType,
FractionAccuracy,
NegativeNumberStyle,
PaddingType,
)
from numbers_parser.currencies import CURRENCIES, CURRENCY_SYMBOLS
from numbers_parser.exceptions import UnsupportedError, UnsupportedWarning
from numbers_parser.formula import Formula
from numbers_parser.generated import TSPMessages_pb2 as TSPMessages
from numbers_parser.generated import TSTArchives_pb2 as TSTArchives
from numbers_parser.generated.TSWPArchives_pb2 import (
ParagraphStylePropertiesArchive as ParagraphStyle,
)
from numbers_parser.numbers_cache import Cacheable, cache
from numbers_parser.numbers_uuid import NumbersUUID
from numbers_parser.xrefs import xl_range
logger = logging.getLogger(numbers_parser_name)
debug = logger.debug
__all__ = [
"RGB",
"Alignment",
"BackgroundImage",
"BoolCell",
"Border",
"BorderType",
"BulletedTextCell",
"Cell",
"CellBorder",
"CustomFormatting",
"DateCell",
"DurationCell",
"EmptyCell",
"ErrorCell",
"Formatting",
"HorizontalJustification",
"MergeAnchor",
"MergeReference",
"MergedCell",
"NumberCell",
"RichTextCell",
"Style",
"TextCell",
"VerticalJustification",
]
[docs]
class BackgroundImage:
"""
A named document style that can be applied to cells.
.. code-block:: python
fh = open("cats.png", mode="rb")
image_data = fh.read()
cats_bg = doc.add_style(
name="Cats",
bg_image=BackgroundImage(image_data, "cats.png")
)
table.write(0, 0, "❤️ cats", style=cats_bg)
Currently only standard image files and not 'advanced' image fills are
supported. Tiling and scaling is not reported back and cannot be changed
when saving new cells.
Parameters
----------
data: bytes
Raw image data for a cell background image.
filename: str
Path to the image file.
"""
def __init__(self, data: bytes | None = None, filename: str | None = None) -> None:
self._data = data
self._filename = basename(filename)
@property
def data(self) -> bytes:
"""bytes: The background image as bytes for a cell, or None if no image."""
return self._data
@property
def filename(self) -> str:
"""str: The image filename for a cell, or None if no image."""
return self._filename
class HorizontalJustification(IntEnum):
LEFT = ParagraphStyle.TextAlignmentType.TATvalue0
RIGHT = ParagraphStyle.TextAlignmentType.TATvalue1
CENTER = ParagraphStyle.TextAlignmentType.TATvalue2
JUSTIFIED = ParagraphStyle.TextAlignmentType.TATvalue3
AUTO = ParagraphStyle.TextAlignmentType.TATvalue4
class VerticalJustification(IntEnum):
TOP = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue0
MIDDLE = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue1
BOTTOM = ParagraphStyle.DeprecatedParagraphBorderType.PBTvalue2
HORIZONTAL_MAP = {
"left": HorizontalJustification.LEFT,
"right": HorizontalJustification.RIGHT,
"center": HorizontalJustification.CENTER,
"justified": HorizontalJustification.JUSTIFIED,
"auto": HorizontalJustification.AUTO,
}
VERTICAL_MAP = {
"top": VerticalJustification.TOP,
"middle": VerticalJustification.MIDDLE,
"bottom": VerticalJustification.BOTTOM,
}
class _Alignment(NamedTuple):
"""Class for internal alignment type."""
horizontal: str
vertical: str
class Alignment(_Alignment):
def __new__(cls, horizontal=DEFAULT_ALIGNMENT[0], vertical=DEFAULT_ALIGNMENT[1]):
if isinstance(horizontal, str):
horizontal = horizontal.lower()
if horizontal not in HORIZONTAL_MAP:
msg = "invalid horizontal alignment"
raise TypeError(msg)
horizontal = HORIZONTAL_MAP[horizontal]
if isinstance(vertical, str):
vertical = vertical.lower()
if vertical not in VERTICAL_MAP:
msg = "invalid vertical alignment"
raise TypeError(msg)
vertical = VERTICAL_MAP[vertical]
return super(_Alignment, cls).__new__(cls, (horizontal, vertical))
DEFAULT_ALIGNMENT_CLASS = Alignment(*DEFAULT_ALIGNMENT)
class RGB(NamedTuple):
"""A color in RGB."""
r: int
g: int
b: int
def default_color() -> RGB:
return RGB(0, 0, 0)
[docs]
@dataclass
class Style:
"""
A named document style that can be applied to cells.
Parameters
----------
alignment: Alignment, optional, default: Alignment("auto", "top")
Horizontal and vertical alignment of the cell
bg_color: RGB | List[RGB], optional, default: RGB(0, 0, 0)
Background color or list of colors for gradients
bold: bool, optional, default: False
``True`` if the cell font is bold
font_color: RGB, optional, default: RGB(0, 0, 0)) – Font color
font_size: float, optional, default: DEFAULT_FONT_SIZE
Font size in points
font_name: str, optional, default: DEFAULT_FONT_SIZE
Font name
italic: bool, optional, default: False
``True`` if the cell font is italic
name: str, optional
Style name
underline: bool, optional, default: False) – True if the
cell font is underline
strikethrough: bool, optional, default: False) – True if
the cell font is strikethrough
first_indent: float, optional, default: 0.0) – First line
indent in points
left_indent: float, optional, default: 0.0
Left indent in points
right_indent: float, optional, default: 0.0
Right indent in points
text_inset: float, optional, default: DEFAULT_TEXT_INSET
Text inset in points
text_wrap: str, optional, default: True
``True`` if text wrapping is enabled
Raises
------
TypeError:
If arguments do not match the specified type or for objects have invalid arguments
IndexError:
If an image filename already exists in document
"""
alignment: Alignment = DEFAULT_ALIGNMENT_CLASS # : horizontal and vertical alignment
bg_image: object = None # : backgroung image
bg_color: RGB | list[RGB] = None
font_color: RGB = field(default_factory=default_color)
font_size: float = DEFAULT_FONT_SIZE
font_name: str = DEFAULT_FONT
bold: bool = False
italic: bool = False
strikethrough: bool = False
underline: bool = False
first_indent: float = 0
left_indent: float = 0
right_indent: float = 0
text_inset: float = DEFAULT_TEXT_INSET
text_wrap: bool = DEFAULT_TEXT_WRAP
name: str = None
_text_style_obj_id: int = None
_cell_style_obj_id: int = None
_update_cell_style: bool = False
_update_text_style: bool = False
@staticmethod
def _text_attrs():
return [
"alignment",
"bold",
"first_indent",
"font_color",
"font_name",
"font_size",
"italic",
"left_indent",
"name",
"right_indent",
"strikethrough",
"text_inset",
"underline",
]
@staticmethod
def _cell_attrs():
return [
"alignment",
"bg_color",
"bg_image",
"first_indent",
"left_indent",
"right_indent",
"text_inset",
"text_wrap",
]
@classmethod
def from_storage(cls, cell: object, model: object):
bg_image = BackgroundImage(*cell._image_data) if cell._image_data is not None else None
return Style(
alignment=model.cell_alignment(cell),
bg_image=bg_image,
bg_color=model.cell_bg_color(cell),
font_color=model.cell_font_color(cell),
font_size=model.cell_font_size(cell),
font_name=model.cell_font_name(cell),
bold=model.cell_is_bold(cell),
italic=model.cell_is_italic(cell),
strikethrough=model.cell_is_strikethrough(cell),
underline=model.cell_is_underline(cell),
name=model.cell_style_name(cell),
first_indent=model.cell_first_indent(cell),
left_indent=model.cell_left_indent(cell),
right_indent=model.cell_right_indent(cell),
text_inset=model.cell_text_inset(cell),
text_wrap=model.cell_text_wrap(cell),
_text_style_obj_id=model.text_style_object_id(cell),
_cell_style_obj_id=model.cell_style_object_id(cell),
)
def __post_init__(self):
self.bg_color = rgb_color(self.bg_color)
self.font_color = rgb_color(self.font_color)
if not isinstance(self.font_size, float):
msg = "size must be a float number of points"
raise TypeError(msg)
if not isinstance(self.font_name, str):
msg = "font name must be a string"
raise TypeError(msg)
for attr in ["bold", "italic", "underline", "strikethrough"]:
if not isinstance(getattr(self, attr), bool):
msg = f"{attr} argument must be boolean"
raise TypeError(msg)
def __setattr__(self, name: str, value: Any) -> None:
"""
Detect changes to cell styles and flag the style for
possible updates when saving the document.
"""
if name in ["bg_color", "font_color"]:
value = rgb_color(value)
if name == "alignment":
value = alignment(value)
if name in Style._text_attrs():
self.__dict__["_update_text_style"] = True
if name in Style._cell_attrs():
self.__dict__["_update_cell_style"] = True
if name not in ["_update_text_style", "_update_cell_style"]:
self.__dict__[name] = value
def rgb_color(color) -> RGB:
"""Raise a TypeError if a color is not a valid RGB value."""
if color is None:
return None
if isinstance(color, RGB):
return color
if isinstance(color, tuple):
if not (len(color) == 3 and all(isinstance(x, int) for x in color)):
msg = "RGB color must be an RGB or a tuple of 3 integers"
raise TypeError(msg)
return RGB(*color)
if isinstance(color, list):
return [rgb_color(c) for c in color]
msg = "RGB color must be an RGB or a tuple of 3 integers"
raise TypeError(msg)
def alignment(value) -> Alignment:
"""Raise a TypeError if a alignment is not a valid."""
if value is None:
return Alignment()
if isinstance(value, Alignment):
return value
if isinstance(value, tuple):
if not (len(value) == 2 and all(isinstance(x, (int, str)) for x in value)):
msg = "Alignment must be an Alignment or a tuple of 2 integers/strings"
raise TypeError(msg)
return Alignment(*value)
msg = "Alignment must be an Alignment or a tuple of 2 integers/strings"
raise TypeError(msg)
BORDER_STYLE_MAP = {"solid": 0, "dashes": 1, "dots": 2, "none": 3}
class BorderType(IntEnum):
SOLID = BORDER_STYLE_MAP["solid"]
DASHES = BORDER_STYLE_MAP["dashes"]
DOTS = BORDER_STYLE_MAP["dots"]
NONE = BORDER_STYLE_MAP["none"]
[docs]
class Border:
"""
Create a cell border to use with the :py:class:`~numbers_parser.Table` method
:py:meth:`~numbers_parser.Table.set_cell_border`.
.. code-block:: python
border_style = Border(8.0, RGB(29, 177, 0), "solid")
table.set_cell_border("B6", "left", border_style, 3)
table.set_cell_border(6, 1, "right", border_style)
Parameters
----------
width: float, optional, default: 0.35
Number of rows in the first table of a new document.
color: RGB, optional, default: RGB(0, 0, 0)
The line color for the border if present
style: BorderType, optional, default: ``None``
The type of border to create or ``None`` if there is no border defined. Valid
border types are:
* ``"solid"``: a solid line
* ``"dashes"``: a dashed line
* ``"dots"``: a dotted line
Raises
------
TypeError:
If the width is not a float, or the border type is invalid.
"""
def __init__(
self,
width: float = DEFAULT_BORDER_WIDTH,
color: RGB = None,
style: BorderType = None,
_order: int = 0,
) -> None:
if not isinstance(width, float):
msg = "width must be a float number of points"
raise TypeError(msg)
self.width = width
if color is None:
color = RGB(*DEFAULT_BORDER_COLOR)
self.color = rgb_color(color)
if style is None:
style = BorderType(BORDER_STYLE_MAP[DEFAULT_BORDER_STYLE])
if isinstance(style, str):
style = style.lower()
if style not in BORDER_STYLE_MAP:
msg = "invalid border style"
raise TypeError(msg)
self.style = BORDER_STYLE_MAP[style]
else:
self.style = style
self._order = _order
def __str__(self) -> str:
style_name = BorderType(self.style).name.lower()
return f"Border(width={self.width}, color={self.color}, style={style_name})"
def __eq__(self, value: object) -> bool:
return all(
[self.width == value.width, self.color == value.color, self.style == value.style],
)
class CellBorder:
def __init__(
self,
top_merged: bool = False,
right_merged: bool = False,
bottom_merged: bool = False,
left_merged: bool = False,
) -> None:
self._top = None
self._right = None
self._bottom = None
self._left = None
self._top_merged = top_merged
self._right_merged = right_merged
self._bottom_merged = bottom_merged
self._left_merged = left_merged
@property
def top(self):
if self._top_merged or self._top is None:
return None
return self._top
@top.setter
def top(self, value) -> None:
if self._top is None or value._order > self.top._order:
self._top = value
@property
def right(self):
if self._right_merged or self._right is None:
return None
return self._right
@right.setter
def right(self, value) -> None:
if self._right is None or value._order > self._right._order:
self._right = value
@property
def bottom(self):
if self._bottom_merged or self._bottom is None:
return None
return self._bottom
@bottom.setter
def bottom(self, value) -> None:
if self._bottom is None or value._order > self._bottom._order:
self._bottom = value
@property
def left(self):
if self._left_merged or self._left is None:
return None
return self._left
@left.setter
def left(self, value) -> None:
if self._left is None or value._order > self._left._order:
self._left = value
class MergeReference:
"""Cell reference for cells eliminated by a merge."""
def __init__(self, row_start: int, col_start: int, row_end: int, col_end: int) -> None:
self.rect = (row_start, col_start, row_end, col_end)
class MergeAnchor:
"""Cell reference for the merged cell."""
def __init__(self, size: tuple) -> None:
self.size = size
@dataclass
class CellStorageFlags:
_string_id: int = None
_rich_id: int = None
_cell_style_id: int = None
_text_style_id: int = None
_formula_id: int = None
_control_id: int = None
_formula_error_id: int = None
_suggest_id: int = None
_num_format_id: int = None
_currency_format_id: int = None
_date_format_id: int = None
_duration_format_id: int = None
_text_format_id: int = None
_bool_format_id: int = None
def __str__(self) -> str:
fields = [
f"{k[1:]}={v}" for k, v in asdict(self).items() if k.endswith("_id") and v is not None
]
return ", ".join([x for x in fields if x if not None])
def flags(self):
return [x.name for x in fields(self)]
[docs]
class Cell(CellStorageFlags, Cacheable):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int, value) -> None:
self._value = value
self.row = row
self.col = col
self._is_bulleted = False
self._formula_id = None
self._storage = None
self._style = None
self._d128 = None
self._double = None
self._seconds = None
super().__init__()
def __str__(self) -> str:
table_name = self._model.table_name(self._table_id)
sheet_name = self._model.sheet_name(self._model.table_id_to_sheet_id(self._table_id))
cell_str = f"{sheet_name}@{table_name}[{self.row},{self.col}]:"
cell_str += f"table_id={self._table_id}, type={self._type.name}, "
cell_str += f"value={self._value}, flags={self._flags:08x}, extras={self._extras:04x}"
return ", ".join([cell_str, super().__str__()])
@property
def image_filename(self):
warn(
"image_filename is deprecated and will be removed in the future. "
+ "Please use the style property",
DeprecationWarning,
stacklevel=2,
)
if self.style is not None and self.style.bg_image is not None:
return self.style.bg_image.filename
return None
@property
def image_data(self):
warn(
"image_data is deprecated and will be removed in the future. "
+ "Please use the style property",
DeprecationWarning,
stacklevel=2,
)
if self.style is not None and self.style.bg_image is not None:
return self.style.bg_image.data
return None
@property
def is_formula(self) -> bool:
"""bool: ``True`` if the cell contains a formula."""
return self._formula_id is not None
@property
def formula(self) -> str:
"""
str: The formula in a cell.
Formula evaluation relies on Numbers storing current values which should
usually be the case. In cells containing a formula, :py:meth:`numbers_parser.Cell.value`
returns computed value of the formula.
Returns
-------
str:
The text of the foruma in a cell, or `None` if there is no formula
present in a cell.
"""
if self._formula_id is not None:
table_formulas = self._model.table_formulas(self._table_id)
return table_formulas.formula(self._formula_id, self.row, self.col)
return None
@formula.setter
def formula(self, value: str) -> None:
self._formula_id = Formula.from_str(
self._model,
self._table_id,
self.row,
self.col,
value,
)
self._model.add_formula_dependency(self.row, self.col, self._table_id)
@property
def is_bulleted(self) -> bool:
"""bool: ``True`` if the cell contains text bullets."""
return self._is_bulleted
@property
def bullets(self) -> list[str] | None:
r"""
List[str] | None: The bullets in a cell, or ``None``.
Cells that contain bulleted or numbered lists are identified
by :py:attr:`numbers_parser.Cell.is_bulleted`. For these cells,
:py:attr:`numbers_parser.Cell.value` returns the whole cell contents.
Bullets can also be extracted into a list of paragraphs cell without the
bullet or numbering character. Newlines are not included in the
bullet list.
Example
-------
.. code-block:: python
doc = Document("bullets.numbers")
sheets = doc.sheets
tables = sheets[0].tables
table = tables[0]
if not table.cell(0, 1).is_bulleted:
print(table.cell(0, 1).value)
else:
bullets = ["* " + s for s in table.cell(0, 1).bullets]
print("\n".join(bullets))
return None
"""
return None
@property
def formatted_value(self) -> str:
"""
str: The formatted value of the cell as it appears in Numbers.
Interactive elements are converted into a suitable text format where
supported, or as their number values where there is no suitable
visual representation. Currently supported mappings are:
* Checkboxes are U+2610 (Ballow Box) or U+2611 (Ballot Box with Check)
* Ratings are their star value represented using (U+2605) (Black Star)
.. code-block:: python
>>> table = doc.default_table
>>> table.cell(0,0).value
False
>>> table.cell(0,0).formatted_value
'☐'
>>> table.cell(0,1).value
True
>>> table.cell(0,1).formatted_value
'☑'
>>> table.cell(1,1).value
3.0
>>> table.cell(1,1).formatted_value
'★★★'
"""
if self._duration_format_id is not None and self._double is not None:
return self._duration_format()
if self._date_format_id is not None and self._seconds is not None:
return self._date_format()
if (
self._text_format_id is not None
or self._num_format_id is not None
or self._currency_format_id is not None
or self._bool_format_id is not None
):
return self._custom_format()
return str(self.value).upper() if isinstance(self.value, bool) else str(self.value)
@property
def style(self) -> Style | None:
"""
Style | None: The :class:`Style` associated with the cell or ``None``.
Warns
-----
UnsupportedWarning: On assignment; use
:py:meth:`numbers_parser.Table.set_cell_style` instead.
"""
if self._style is None:
self._style = Style.from_storage(self, self._model)
return self._style
@style.setter
def style(self, _) -> None:
warn(
"cell style cannot be set; use Table.set_cell_style() instead",
UnsupportedWarning,
stacklevel=2,
)
@property
def border(self) -> CellBorder | None:
"""
CellBorder| None: The :class:`CellBorder` associated with the cell or ``None``.
Warns
-----
UnsupportedWarning: On assignment; use
:py:meth:`numbers_parser.Table.set_cell_border` instead.
"""
self._model.extract_strokes(self._table_id)
return self._border
@border.setter
def border(self, _) -> None:
warn(
"cell border values cannot be set; use Table.set_cell_border() instead",
UnsupportedWarning,
stacklevel=2,
)
@classmethod
def _empty_cell(cls, table_id: int, row: int, col: int, model: object):
return Cell._from_storage(table_id, row, col, EMPTY_STORAGE_BUFFER, model)
@classmethod
def _merged_cell(cls, table_id: int, row: int, col: int, model: object):
cell = MergedCell(row, col)
cell._model = model
cell._table_id = table_id
merge_cells = model.merge_cells(table_id)
cell._set_merge(merge_cells.get((row, col)))
return cell
@classmethod
def _from_value(cls, row: int, col: int, value):
# TODO: write needs to retain/init the border
if isinstance(value, str):
cell = TextCell(row, col, value)
elif isinstance(value, bool):
cell = BoolCell(row, col, value)
elif isinstance(value, int):
cell = NumberCell(row, col, value)
elif isinstance(value, float):
rounded_value = sigfig(value, sigfigs=MAX_SIGNIFICANT_DIGITS, warn=False)
if rounded_value != value:
warn(
f"'{value}' rounded to {MAX_SIGNIFICANT_DIGITS} significant digits",
RuntimeWarning,
stacklevel=2,
)
cell = NumberCell(row, col, rounded_value)
elif isinstance(value, datetime):
cell = DateCell(row, col, value)
elif isinstance(value, timedelta):
cell = DurationCell(row, col, value)
else:
msg = "Can't determine cell type from type " + type(value).__name__
raise ValueError(msg) # noqa: TRY004
return cell
@classmethod
def _from_storage( # noqa: PLR0912
cls,
table_id: int,
row: int,
col: int,
buffer: bytearray,
model: object,
) -> None:
d128 = None
double = None
seconds = None
version = buffer[0]
if version != 5:
msg = f"Cell storage version {version} is unsupported"
raise UnsupportedError(msg)
offset = 12
storage_flags = CellStorageFlags()
flags = unpack("<i", buffer[8:12])[0]
if flags & 0x1:
d128 = _unpack_decimal128(buffer[offset : offset + 16])
offset += 16
if flags & 0x2:
double = unpack("<d", buffer[offset : offset + 8])[0]
offset += 8
if flags & 0x4:
seconds = unpack("<d", buffer[offset : offset + 8])[0]
offset += 8
if flags & 0x8:
storage_flags._string_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x10:
storage_flags._rich_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x20:
storage_flags._cell_style_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x40:
storage_flags._text_style_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x80:
# cond_style_id skipped
offset += 4
# Skip flag 0x100 (cond_rule_style_id)
if flags & 0x200:
storage_flags._formula_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x400:
storage_flags._control_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
# Skip flag 0x800 (formula_error_id)
if flags & 0x1000:
storage_flags._suggest_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
# Skip unused flags
offset += 4 * bin(flags & 0x900).count("1")
if flags & 0x2000:
storage_flags._num_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x4000:
storage_flags._currency_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x8000:
storage_flags._date_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x10000:
storage_flags._duration_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x20000:
storage_flags._text_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
if flags & 0x40000:
storage_flags._bool_format_id = unpack("<i", buffer[offset : offset + 4])[0]
offset += 4
# Skip 0x80000 (comment_id) and 0x100000 (import_warning_id)
cell_type = buffer[1]
if cell_type == TSTArchives.genericCellType:
cell = EmptyCell(row, col)
elif cell_type == TSTArchives.numberCellType:
cell = NumberCell(row, col, d128)
elif cell_type == TSTArchives.textCellType:
cell = TextCell(row, col, model.table_string(table_id, storage_flags._string_id))
elif cell_type == TSTArchives.dateCellType:
cell = DateCell(row, col, EPOCH + timedelta(seconds=seconds))
cell._datetime = cell._value
elif cell_type == TSTArchives.boolCellType:
cell = BoolCell(row, col, double > 0.0)
elif cell_type == TSTArchives.durationCellType:
cell = DurationCell(row, col, timedelta(seconds=double))
elif cell_type == TSTArchives.formulaErrorCellType:
cell = ErrorCell(row, col)
elif cell_type == TSTArchives.automaticCellType:
cell = RichTextCell(row, col, model.table_rich_text(table_id, storage_flags._rich_id))
elif cell_type == CURRENCY_CELL_TYPE:
cell = NumberCell(row, col, d128, cell_type=CellType.CURRENCY)
else:
msg = f"Cell type ID {cell_type} is not recognised"
raise UnsupportedError(msg)
cell._copy_flags(storage_flags)
cell._buffer = buffer
cell._model = model
cell._table_id = table_id
cell._d128 = d128
cell._double = double
cell._seconds = seconds
cell._extras = unpack("<H", buffer[6:8])[0]
cell._flags = flags
merge_cells = model.merge_cells(table_id)
cell._set_merge(merge_cells.get((row, col)))
if logging.getLogger(__package__).level == logging.DEBUG:
# Guard to reduce expense of computing fields
debug(str(cell))
return cell
def _copy_flags(self, storage_flags: CellStorageFlags) -> None:
for flag in storage_flags.flags():
setattr(self, flag, getattr(storage_flags, flag))
def _set_merge(self, merge_ref) -> None:
if isinstance(merge_ref, MergeAnchor):
self.is_merged = True
self.size = merge_ref.size
self.merge_range = None
self.rect = None
self._border = CellBorder()
elif isinstance(merge_ref, MergeReference):
self.is_merged = False
self.size = None
self.row_start = merge_ref.rect[0]
self.col_start = merge_ref.rect[1]
self.row_end = merge_ref.rect[2]
self.col_end = merge_ref.rect[3]
self.merge_range = xl_range(*merge_ref.rect)
self.rect = merge_ref.rect
top_merged = self.row > self.row_start
right_merged = self.col < self.col_end
bottom_merged = self.row < self.row_end
left_merged = self.col > self.col_start
self._border = CellBorder(top_merged, right_merged, bottom_merged, left_merged)
else:
self.is_merged = False
self.size = (1, 1)
self.merge_range = None
self.rect = None
self._border = CellBorder()
def _to_buffer(self) -> bytearray: # noqa: PLR0912, PLR0915
"""Create a storage buffer for a cell using v5 (modern) layout."""
if self._style is not None:
if self._style._text_style_obj_id is not None:
self._text_style_id = self._model._table_styles.lookup_key(
self._table_id,
TSPMessages.Reference(identifier=self._style._text_style_obj_id),
)
self._model.add_component_reference(
self._style._text_style_obj_id,
component_id=self._model._table_styles.id(self._table_id),
)
if self._style._cell_style_obj_id is not None:
self._cell_style_id = self._model._table_styles.lookup_key(
self._table_id,
TSPMessages.Reference(identifier=self._style._cell_style_obj_id),
)
self._model.add_component_reference(
self._style._cell_style_obj_id,
component_id=self._model._table_styles.id(self._table_id),
)
length = 12
if isinstance(self, NumberCell):
flags = 1
length += 16
if self._type == CellType.CURRENCY:
cell_type = CURRENCY_CELL_TYPE
else:
cell_type = TSTArchives.numberCellType
value = _pack_decimal128(self.value)
elif isinstance(self, TextCell):
flags = 8
length += 4
cell_type = TSTArchives.textCellType
value = pack("<i", self._model.table_string_key(self._table_id, self.value))
elif isinstance(self, DateCell):
flags = 4
length += 8
cell_type = TSTArchives.dateCellType
if self._value.tzinfo is None:
date_delta = self._value - EPOCH
else:
date_delta = self._value - EPOCH.astimezone(self._value.tzinfo)
value = pack("<d", float(date_delta.total_seconds()))
elif isinstance(self, BoolCell):
flags = 2
length += 8
cell_type = TSTArchives.boolCellType
value = pack("<d", float(self.value))
elif isinstance(self, DurationCell):
flags = 2
length += 8
cell_type = TSTArchives.durationCellType
value = pack("<d", float(self.value.total_seconds()))
elif isinstance(self, EmptyCell):
flags = 0
cell_type = TSTArchives.emptyCellValueType
value = b""
elif isinstance(self, MergedCell):
return None
elif isinstance(self, RichTextCell):
flags = 0
length += 4
cell_type = TSTArchives.automaticCellType
value = pack("<i", self._rich_id)
else:
data_type = type(self).__name__
table_name = self._model.table_name(self._table_id)
table_ref = f"@{table_name}:[{self.row},{self.col}]"
warn(
f"{table_ref}: unsupported data type {data_type} for save",
UnsupportedWarning,
stacklevel=1,
)
return None
storage = bytearray(12)
storage[0] = 5
storage[1] = cell_type
storage += value
if self._rich_id is not None:
flags |= 0x10
length += 4
storage += pack("<i", self._rich_id)
if self._cell_style_id is not None:
flags |= 0x20
length += 4
storage += pack("<i", self._cell_style_id)
if self._text_style_id is not None:
flags |= 0x40
length += 4
storage += pack("<i", self._text_style_id)
if self._formula_id is not None:
flags |= 0x200
length += 4
storage += pack("<i", self._formula_id)
if self._control_id is not None:
flags |= 0x400
length += 4
storage += pack("<i", self._control_id)
if self._suggest_id is not None:
flags |= 0x1000
length += 4
storage += pack("<i", self._suggest_id)
if self._num_format_id is not None:
flags |= 0x2000
length += 4
storage += pack("<i", self._num_format_id)
storage[6] |= 1
if self._currency_format_id is not None:
flags |= 0x4000
length += 4
storage += pack("<i", self._currency_format_id)
storage[6] |= 2
if self._date_format_id is not None:
flags |= 0x8000
length += 4
storage += pack("<i", self._date_format_id)
storage[6] |= 8
if self._duration_format_id is not None:
flags |= 0x10000
length += 4
storage += pack("<i", self._duration_format_id)
storage[6] |= 4
if self._text_format_id is not None:
flags |= 0x20000
length += 4
storage += pack("<i", self._text_format_id)
if self._bool_format_id is not None:
flags |= 0x40000
length += 4
storage += pack("<i", self._bool_format_id)
storage[6] |= 0x20
if self._string_id is not None:
storage[6] |= 0x80
storage[8:12] = pack("<i", flags)
if len(storage) < 32:
storage += bytearray(32 - length)
return storage[0:length]
def _update_value(self, value, cell: object) -> None:
if cell._type == CellType.NUMBER:
self._d128 = value
elif cell._type == CellType.DATE:
self._datetime = value
self._value = value
@property
@cache(num_args=0)
def _image_data(self) -> tuple[bytes, str]:
"""Return the background image data for a cell or None if no image."""
if self._cell_style_id is None:
return None
style = self._model.table_style(self._table_id, self._cell_style_id)
if not style.cell_properties.cell_fill.HasField("image"):
return None
image_id = style.cell_properties.cell_fill.image.imagedata.identifier
datas = self._model.objects[PACKAGE_ID].datas
stored_filename = next(
x.file_name for x in datas if x.identifier == image_id
) # pragma: nocover (issue-1333)
preferred_filename = next(
x.preferred_file_name for x in datas if x.identifier == image_id
) # pragma: nocover (issue-1333)
all_paths = self._model.objects.file_store.keys()
image_pathnames = [x for x in all_paths if x == f"Data/{stored_filename}"]
if len(image_pathnames) == 0:
warn(
f"Cannot find file '{preferred_filename}' in Numbers archive",
RuntimeWarning,
stacklevel=3,
)
return None
image_data = self._model.objects.file_store[image_pathnames[0]]
digest = sha1(image_data).digest() # noqa: S324
if digest not in self._model._images:
self._model._images[digest] = image_id
return (image_data, preferred_filename)
def _custom_format(self) -> str: # noqa: PLR0911
if self._text_format_id is not None and self._type == CellType.TEXT:
custom_format = self._model.table_format(self._table_id, self._text_format_id)
elif self._currency_format_id is not None:
custom_format = self._model.table_format(self._table_id, self._currency_format_id)
elif self._bool_format_id is not None and self._type == CellType.BOOL:
custom_format = self._model.table_format(self._table_id, self._bool_format_id)
elif self._num_format_id is not None:
custom_format = self._model.table_format(self._table_id, self._num_format_id)
else:
return str(self.value)
debug(
"custom_format: @[%d,%d]: format_type=%s, ",
self.row,
self.col,
custom_format.format_type,
)
if custom_format.HasField("custom_uid"):
format_uuid = NumbersUUID(custom_format.custom_uid).hex
format_map = self._model.custom_format_map()
custom_format = format_map[format_uuid].default_format
if custom_format.requires_fraction_replacement:
formatted_value = _format_fraction(self._d128, custom_format)
elif custom_format.format_type == FormatType.CUSTOM_TEXT:
formatted_value = _decode_text_format(
custom_format,
self._model.table_string(self._table_id, self._string_id),
)
else:
formatted_value = _decode_number_format(
custom_format,
self._d128,
format_map[format_uuid].name,
)
elif custom_format.format_type == FormatType.DECIMAL:
return _format_decimal(self._d128, custom_format)
elif custom_format.format_type == FormatType.CURRENCY:
return _format_currency(self._d128, custom_format)
elif custom_format.format_type == FormatType.BOOLEAN:
return "TRUE" if self.value else "FALSE"
elif custom_format.format_type == FormatType.PERCENT:
return _format_decimal(self._d128 * 100, custom_format, percent=True)
elif custom_format.format_type == FormatType.BASE:
return _format_base(self._d128, custom_format)
elif custom_format.format_type == FormatType.FRACTION:
return _format_fraction(self._d128, custom_format)
elif custom_format.format_type == FormatType.SCIENTIFIC:
return _format_scientific(self._d128, custom_format)
elif custom_format.format_type == FormatType.CHECKBOX:
return CHECKBOX_TRUE_VALUE if self.value else CHECKBOX_FALSE_VALUE
elif custom_format.format_type == FormatType.RATING:
return STAR_RATING_VALUE * int(self._d128)
else:
formatted_value = str(self.value)
return formatted_value
def _date_format(self) -> str:
date_format = self._model.table_format(self._table_id, self._date_format_id)
if date_format.HasField("custom_uid"):
format_uuid = NumbersUUID(date_format.custom_uid).hex
format_map = self._model.custom_format_map()
custom_format = format_map[format_uuid].default_format
custom_format_string = custom_format.custom_format_string
if custom_format.format_type == FormatType.CUSTOM_DATE:
formatted_value = _decode_date_format(custom_format_string, self._datetime)
else:
warn(
f"Unexpected custom format type {custom_format.format_type}",
UnsupportedWarning,
stacklevel=3,
)
return ""
else:
formatted_value = _decode_date_format(date_format.date_time_format, self._datetime)
return formatted_value
def _duration_format(self) -> str:
duration_format = self._model.table_format(self._table_id, self._duration_format_id)
debug(
"duration_format: @[%d,%d]: table_id=%d, duration_format_id=%d, duration_style=%s",
self.row,
self.col,
self._table_id,
self._duration_format_id,
duration_format.duration_style,
)
duration_style = duration_format.duration_style
unit_largest = duration_format.duration_unit_largest
unit_smallest = duration_format.duration_unit_smallest
if duration_format.use_automatic_duration_units:
unit_smallest, unit_largest = _auto_units(self._double, duration_format)
d = self._double
dd = int(self._double)
dstr = []
def unit_in_range(largest, smallest, unit_type):
return largest <= unit_type and smallest >= unit_type
def pad_digits(d, largest, smallest, unit_type):
return (largest == unit_type and smallest == unit_type) or d >= 10
if unit_largest == DurationUnits.WEEK:
dd = int(d / SECONDS_IN_WEEK)
if unit_smallest != DurationUnits.WEEK:
d -= SECONDS_IN_WEEK * dd
dstr.append(str(dd) + _unit_format("week", dd, duration_style))
if unit_in_range(unit_largest, unit_smallest, DurationUnits.DAY):
dd = int(d / SECONDS_IN_DAY)
if unit_smallest > DurationUnits.DAY:
d -= SECONDS_IN_DAY * dd
dstr.append(str(dd) + _unit_format("day", dd, duration_style))
if unit_in_range(unit_largest, unit_smallest, DurationUnits.HOUR):
dd = int(d / SECONDS_IN_HOUR)
if unit_smallest > DurationUnits.HOUR:
d -= SECONDS_IN_HOUR * dd
dstr.append(str(dd) + _unit_format("hour", dd, duration_style))
if unit_in_range(unit_largest, unit_smallest, DurationUnits.MINUTE):
dd = int(d / 60)
if unit_smallest > DurationUnits.MINUTE:
d -= 60 * dd
if duration_style == DurationStyle.COMPACT:
pad = pad_digits(dd, unit_smallest, unit_largest, DurationUnits.MINUTE)
dstr.append(("" if pad else "0") + str(dd))
else:
dstr.append(str(dd) + _unit_format("minute", dd, duration_style))
if unit_in_range(unit_largest, unit_smallest, DurationUnits.SECOND):
dd = int(d)
if unit_smallest > DurationUnits.SECOND:
d -= dd
if duration_style == DurationStyle.COMPACT:
pad = pad_digits(dd, unit_smallest, unit_largest, DurationUnits.SECOND)
dstr.append(("" if pad else "0") + str(dd))
else:
dstr.append(str(dd) + _unit_format("second", dd, duration_style))
if unit_smallest >= DurationUnits.MILLISECOND:
dd = int(round(1000 * d))
if duration_style == DurationStyle.COMPACT:
padding = "0" if dd >= 10 else "00"
padding = "" if dd >= 100 else padding
dstr.append(f"{padding}{dd}")
else:
dstr.append(str(dd) + _unit_format("millisecond", dd, duration_style, "ms"))
duration_str = (":" if duration_style == 0 else " ").join(dstr)
if duration_style == DurationStyle.COMPACT:
duration_str = re.sub(r":(\d\d\d)$", r".\1", duration_str)
return duration_str
def _set_formatting(
self,
format_id: int,
format_type: FormattingType | CustomFormattingType,
control_id: int | None = None,
is_currency: bool = False,
) -> None:
self._is_currency = is_currency
if is_currency:
self._type = CellType.CURRENCY
if format_type == FormattingType.CURRENCY:
self._currency_format_id = format_id
elif format_type == FormattingType.TICKBOX:
self._bool_format_id = format_id
self._control_id = control_id
elif format_type == FormattingType.RATING:
self._num_format_id = format_id
self._control_id = control_id
elif format_type in [FormattingType.SLIDER, FormattingType.STEPPER]:
if is_currency:
self._currency_format_id = format_id
else:
self._num_format_id = format_id
self._control_id = control_id
elif format_type == FormattingType.POPUP:
self._text_format_id = format_id
self._control_id = control_id
elif format_type in [FormattingType.DATETIME, CustomFormattingType.DATETIME]:
self._date_format_id = format_id
elif format_type in [FormattingType.TEXT, CustomFormattingType.TEXT]:
self._text_format_id = format_id
else:
self._num_format_id = format_id
class NumberCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int, value: float, cell_type=CellType.NUMBER) -> None:
self._type = cell_type
super().__init__(row, col, value)
@property
def value(self) -> int:
return self._value
class TextCell(Cell):
def __init__(self, row: int, col: int, value: str) -> None:
self._type = CellType.TEXT
super().__init__(row, col, value)
@property
def value(self) -> str:
return self._value
[docs]
class RichTextCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int, value) -> None:
super().__init__(row, col, value["text"])
self._type = CellType.RICH_TEXT
self._bullets = value["bullets"]
self._hyperlinks = value["hyperlinks"]
if value["bulleted"]:
self._formatted_bullets = [
(
value["bullet_chars"][i] + " " + value["bullets"][i]
if value["bullet_chars"][i] is not None
else value["bullets"][i]
)
for i in range(len(self._bullets))
]
self._is_bulleted = True
@property
def value(self) -> str:
return self._value
@property
def bullets(self) -> list[str]:
"""List[str]: A list of the text bullets in the cell."""
return self._bullets
@property
def formatted_bullets(self) -> str:
"""str: The bullets as a formatted multi-line string."""
return self._formatted_bullets
@property
def hyperlinks(self) -> list[tuple] | None:
"""
List[Tuple] | None: the hyperlinks in a cell or ``None``.
Numbers does not support hyperlinks to cells within a spreadsheet, but does
allow embedding links in cells. When cells contain hyperlinks,
`numbers_parser` returns the text version of the cell. The `hyperlinks` property
of cells where :py:attr:`numbers_parser.Cell.is_bulleted` is ``True`` is a
list of text and URL tuples.
Example
-------
.. code-block:: python
cell = table.cell(0, 0)
(text, url) = cell.hyperlinks[0]
"""
return self._hyperlinks
# Backwards compatibility to earlier class names
class BulletedTextCell(RichTextCell):
pass
class EmptyCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int) -> None:
super().__init__(row, col, None)
self._type = CellType.EMPTY
@property
def value(self) -> None:
return None
@property
def formatted_value(self) -> str:
return ""
class BoolCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int, value: bool) -> None:
super().__init__(row, col, value)
self._type = CellType.BOOL
self._value = value
@property
def value(self) -> bool:
return self._value
class DateCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int, value: datetime) -> None:
super().__init__(row, col, value)
self._type = CellType.DATE
@property
def value(self) -> datetime:
return self._value
class DurationCell(Cell):
def __init__(self, row: int, col: int, value: timedelta) -> None:
super().__init__(row, col, value)
self._type = CellType.DURATION
@property
def value(self) -> timedelta:
return self._value
class ErrorCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int) -> None:
super().__init__(row, col, None)
self._type = CellType.ERROR
@property
def value(self) -> None:
return None
[docs]
class MergedCell(Cell):
"""
.. NOTE::
Do not instantiate directly. Cells are created by :py:class:`~numbers_parser.Document`.
"""
def __init__(self, row: int, col: int) -> None:
super().__init__(row, col, None)
self._type = CellType.MERGED
@property
def value(self) -> None:
return None
def _pack_decimal128(value: float) -> bytearray:
buffer = bytearray(16)
exp = math.floor(math.log10(math.e) * math.log(abs(value))) if value != 0.0 else 0
exp += DECIMAL128_BIAS - 16
mantissa = abs(int(value / math.pow(10, exp - DECIMAL128_BIAS)))
buffer[15] |= exp >> 7
buffer[14] |= (exp & 0x7F) << 1
i = 0
while mantissa >= 1:
buffer[i] = mantissa & 0xFF
i += 1
mantissa = int(mantissa / 256)
if value < 0:
buffer[15] |= 0x80
return buffer
def _unpack_decimal128(buffer: bytearray) -> float:
exp = (((buffer[15] & 0x7F) << 7) | (buffer[14] >> 1)) - DECIMAL128_BIAS
mantissa = buffer[14] & 1
for i in range(13, -1, -1):
mantissa = mantissa * 256 + buffer[i]
sign = 1 if buffer[15] & 0x80 else 0
if sign == 1:
mantissa = -mantissa
value = mantissa * 10**exp
return float(value)
def _decode_date_format_field(field: str, value: datetime) -> str:
if field in DATETIME_FIELD_MAP:
s = DATETIME_FIELD_MAP[field]
if callable(s):
return s(value)
return value.strftime(s)
warn(f"Unsupported field code '{field}'", UnsupportedWarning, stacklevel=4)
return ""
def _decode_date_format(date_format, value):
"""Parse a custom date format string and return a formatted datetime value."""
chars = [*date_format]
index = 0
in_string = False
in_field = False
result = ""
field = ""
while index < len(chars):
current_char = chars[index]
next_char = chars[index + 1] if index < len(chars) - 1 else None
if current_char == "'":
if next_char is None:
break
if chars[index + 1] == "'":
result += "'"
index += 2
elif in_string:
in_string = False
index += 1
else:
in_string = True
if in_field:
result += _decode_date_format_field(field, value)
in_field = False
index += 1
elif in_string:
result += current_char
index += 1
elif not current_char.isalpha():
if in_field:
result += _decode_date_format_field(field, value)
in_field = False
result += current_char
index += 1
elif in_field:
field += current_char
index += 1
else:
in_field = True
field = current_char
index += 1
if in_field:
result += _decode_date_format_field(field, value)
return result
def _decode_text_format(text_format, value: str):
"""Parse a custom date format string and return a formatted number value."""
custom_format_string = text_format.custom_format_string
return custom_format_string.replace(CUSTOM_TEXT_PLACEHOLDER, value)
def _expand_quotes(value: str) -> str:
chars = [*value]
index = 0
in_string = False
formatted_value = ""
while index < len(chars):
current_char = chars[index]
next_char = chars[index + 1] if index < len(chars) - 1 else None
if current_char == "'":
if next_char is None:
break
if chars[index + 1] == "'":
formatted_value += "'"
index += 2
elif in_string:
in_string = False
index += 1
else:
in_string = True
index += 1
else:
formatted_value += current_char
index += 1
return formatted_value
def _decode_number_format(number_format, value, name): # noqa: PLR0912
"""Parse a custom date format string and return a formatted number value."""
custom_format_string = number_format.custom_format_string
value *= number_format.scale_factor
if "%" in custom_format_string and number_format.scale_factor == 1.0:
# Per cent scale has 100x but % does not
value *= 100.0
if number_format.currency_code != "":
# Replace currency code with symbol and no-break space
custom_format_string = custom_format_string.replace(
"\u00a4",
number_format.currency_code + "\u00a0",
)
if (match := re.search(r"([#0.,]+(E[+]\d+)?)", custom_format_string)) is None:
warn(
f"Can't parse format string '{custom_format_string}'; skipping",
UnsupportedWarning,
stacklevel=1,
)
return custom_format_string
format_spec = match.group(1)
scientific_spec = match.group(2)
if format_spec[0] == ".":
(int_part, dec_part) = ("", format_spec[1:])
elif "." in custom_format_string:
(int_part, dec_part) = format_spec.split(".")
else:
(int_part, dec_part) = (format_spec, "")
if scientific_spec is not None:
# Scientific notation
formatted_value = f"{value:.{len(dec_part) - 4}E}"
formatted_value = custom_format_string.replace(format_spec, formatted_value)
return _expand_quotes(formatted_value)
num_decimals = len(dec_part)
if num_decimals > 0:
if dec_part[0] == "#":
dec_pad = None
elif number_format.num_nonspace_decimal_digits > 0:
dec_pad = CellPadding.ZERO
else:
dec_pad = CellPadding.SPACE
else:
dec_pad = None
dec_width = num_decimals
(integer, decimal) = str(float(value)).split(".")
if num_decimals > 0:
integer = int(integer)
decimal = round(float(f"0.{decimal}"), num_decimals)
else:
integer = round(value)
decimal = float(f"0.{decimal}")
num_integers = len(int_part.replace(",", ""))
if not number_format.show_thousands_separator:
int_part = int_part.replace(",", "")
if num_integers > 0:
if int_part[0] == "#":
int_pad = None
int_width = len(int_part)
elif number_format.num_nonspace_integer_digits > 0:
int_pad = CellPadding.ZERO
if number_format.show_thousands_separator:
num_commas = int(math.floor(math.log10(integer)) / 3) if integer != 0 else 0
num_commas = max([num_commas, int((num_integers - 1) / 3)])
int_width = num_integers + num_commas
else:
int_width = num_integers
else:
int_pad = CellPadding.SPACE
int_width = len(int_part)
else:
int_pad = None
int_width = num_integers
int_pad_space_as_zero = False
# Formatting integer zero:
# Blank (padded if needed) if int_pad is SPACE and no decimals
# No leading zero if:
# int_pad is NONE, dec_pad is SPACE
# int_pad is SPACE, dec_pad is SPACE
# int_pad is SPACE, dec_pad is ZERO
# int_pad is SPACE, dec_pad is NONE if num decimals < decimals length
if integer == 0 and int_pad == CellPadding.SPACE and num_decimals == 0:
formatted_value = "".rjust(int_width)
elif integer == 0 and int_pad is None and dec_pad == CellPadding.SPACE:
formatted_value = ""
elif (integer == 0 and int_pad == CellPadding.SPACE and dec_pad is not None) or (
integer == 0
and int_pad == CellPadding.SPACE
and dec_pad is None
and len(str(decimal)) > num_decimals
):
formatted_value = "".rjust(int_width)
elif int_pad_space_as_zero or int_pad == CellPadding.ZERO:
if number_format.show_thousands_separator:
formatted_value = f"{integer:0{int_width},}"
else:
formatted_value = f"{integer:0{int_width}}"
elif int_pad == CellPadding.SPACE:
if number_format.show_thousands_separator:
formatted_value = f"{integer:,}".rjust(int_width)
else:
formatted_value = str(integer).rjust(int_width)
elif number_format.show_thousands_separator:
formatted_value = f"{integer:,}"
else:
formatted_value = str(integer)
if num_decimals:
if dec_pad == CellPadding.ZERO or (dec_pad == CellPadding.SPACE and num_integers == 0):
formatted_value += "." + f"{decimal:,.{dec_width}f}"[2:]
elif dec_pad == CellPadding.SPACE and decimal == 0 and num_integers > 0:
formatted_value += ".".ljust(dec_width + 1)
elif dec_pad == CellPadding.SPACE:
decimal_str = str(decimal)[2:]
formatted_value += "." + decimal_str.ljust(dec_width)
elif decimal or num_integers == 0:
formatted_value += "." + str(decimal)[2:]
formatted_value = custom_format_string.replace(format_spec, formatted_value)
return _expand_quotes(formatted_value)
def _format_decimal(value: float, number_format, percent: bool = False) -> str:
if value is None:
return ""
if value < 0 and number_format.negative_style == 1:
accounting_style = False
value = -value
elif value < 0 and number_format.negative_style >= 2:
accounting_style = True
value = -value
else:
accounting_style = False
thousands = "," if number_format.show_thousands_separator else ""
if value.is_integer() and number_format.decimal_places >= DECIMAL_PLACES_AUTO:
formatted_value = f"{int(value):{thousands}}"
else:
if number_format.decimal_places >= DECIMAL_PLACES_AUTO:
formatted_value = str(sigfig(value, MAX_SIGNIFICANT_DIGITS, warn=False))
else:
formatted_value = sigfig(value, MAX_SIGNIFICANT_DIGITS, type=str, warn=False)
formatted_value = sigfig(
formatted_value,
decimals=number_format.decimal_places,
type=str,
)
if number_format.show_thousands_separator:
formatted_value = sigfig(formatted_value, spacer=",", spacing=3, type=str)
try:
(integer, decimal) = formatted_value.split(".")
formatted_value = integer + "." + decimal.replace(",", "")
except ValueError:
pass
if percent:
formatted_value += "%"
if accounting_style:
return f"({formatted_value})"
return formatted_value
def _format_currency(value: float, number_format) -> str:
formatted_value = _format_decimal(value, number_format)
if number_format.currency_code in CURRENCY_SYMBOLS:
symbol = CURRENCY_SYMBOLS[number_format.currency_code]
else:
symbol = number_format.currency_code + " "
if number_format.use_accounting_style and value < 0:
return f"{symbol}\t({formatted_value[1:]})"
if number_format.use_accounting_style:
return f"{symbol}\t{formatted_value}"
return symbol + formatted_value
INT_TO_BASE_CHAR = [str(x) for x in range(10)] + [chr(x) for x in range(ord("A"), ord("Z") + 1)]
def _invert_bit_str(value: str) -> str:
"""Invert a binary value."""
return "".join(["0" if b == "1" else "1" for b in value])
def _twos_complement(value: int, base: int) -> str:
"""Calculate the twos complement of a negative integer with minimum 32-bit precision."""
num_bits = max([32, math.ceil(math.log2(abs(value))) + 1])
bin_value = bin(abs(value))[2:]
inverted_bin_value = _invert_bit_str(bin_value).rjust(num_bits, "1")
twos_complement_dec = int(inverted_bin_value, 2) + 1
if base == 2:
return bin(twos_complement_dec)[2:].rjust(num_bits, "1")
if base == 8:
return oct(twos_complement_dec)[2:]
return hex(twos_complement_dec)[2:].upper()
def _format_base(value: float, number_format) -> str:
if value == 0:
return "0".zfill(number_format.base_places)
value = round(value)
is_negative = False
if not number_format.base_use_minus_sign and number_format.base in [2, 8, 16]:
if value < 0:
return _twos_complement(value, number_format.base)
value = abs(value)
elif value < 0:
is_negative = True
value = abs(value)
formatted_value = []
while value:
formatted_value.append(int(value % number_format.base))
value //= number_format.base
formatted_value = "".join([INT_TO_BASE_CHAR[x] for x in formatted_value[::-1]])
if is_negative:
return "-" + formatted_value.zfill(number_format.base_places)
return formatted_value.zfill(number_format.base_places)
def _format_fraction_parts_to(whole: int, numerator: int, denominator: int):
if whole > 0:
if numerator == 0:
return str(whole)
return f"{whole} {numerator}/{denominator}"
if numerator == 0:
return "0"
if numerator == denominator:
return "1"
return f"{numerator}/{denominator}"
def _float_to_fraction(value: float, denominator: int) -> str:
"""Convert a float to the nearest fraction and return as a string."""
whole = int(value)
numerator = round(denominator * (value - whole))
return _format_fraction_parts_to(whole, numerator, denominator)
def _float_to_n_digit_fraction(value: float, max_digits: int) -> str:
"""
Convert a float to a fraction of a maxinum number of digits
and return as a string.
"""
max_denominator = 10**max_digits - 1
(numerator, denominator) = (
Fraction.from_float(value).limit_denominator(max_denominator).as_integer_ratio()
)
whole = int(value)
numerator -= whole * denominator
return _format_fraction_parts_to(whole, numerator, denominator)
def _format_fraction(value: float, number_format) -> str:
accuracy = number_format.fraction_accuracy
if accuracy & 0xFF000000:
num_digits = 0x100000000 - accuracy
return _float_to_n_digit_fraction(value, num_digits)
return _float_to_fraction(value, accuracy)
def _format_scientific(value: float, number_format) -> str:
formatted_value = sigfig(value, sigfigs=MAX_SIGNIFICANT_DIGITS, warn=False)
return f"{formatted_value:.{number_format.decimal_places}E}"
def _unit_format(unit: str, value: int, style: int, abbrev: str | None = None):
plural = "" if value == 1 else "s"
if abbrev is None:
abbrev = unit[0]
if style == DurationStyle.COMPACT:
return ""
if style == DurationStyle.SHORT:
return f"{abbrev}"
return f" {unit}" + plural
def _auto_units(cell_value, number_format):
unit_largest = number_format.duration_unit_largest
unit_smallest = number_format.duration_unit_smallest
if cell_value == 0:
unit_largest = DurationUnits.DAY
unit_smallest = DurationUnits.DAY
else:
if cell_value >= SECONDS_IN_WEEK:
unit_largest = DurationUnits.WEEK
elif cell_value >= SECONDS_IN_DAY:
unit_largest = DurationUnits.DAY
elif cell_value >= SECONDS_IN_HOUR:
unit_largest = DurationUnits.HOUR
elif cell_value >= 60:
unit_largest = DurationUnits.MINUTE
elif cell_value >= 1:
unit_largest = DurationUnits.SECOND
else:
unit_largest = DurationUnits.MILLISECOND
if math.floor(cell_value) != cell_value:
unit_smallest = DurationUnits.MILLISECOND
elif cell_value % 60:
unit_smallest = DurationUnits.SECOND
elif cell_value % SECONDS_IN_HOUR:
unit_smallest = DurationUnits.MINUTE
elif cell_value % SECONDS_IN_DAY:
unit_smallest = DurationUnits.HOUR
elif cell_value % SECONDS_IN_WEEK:
unit_smallest = DurationUnits.DAY
unit_smallest = max(unit_smallest, unit_largest)
return unit_smallest, unit_largest
@dataclass()
class Formatting:
allow_none: bool = False
base_places: int = 0
base_use_minus_sign: bool = True
base: int = 10
control_format: ControlFormattingType = ControlFormattingType.NUMBER
currency_code: str = "GBP"
date_time_format: str = DEFAULT_DATETIME_FORMAT
decimal_places: int = None
fraction_accuracy: FractionAccuracy = FractionAccuracy.THREE
increment: float = 1.0
maximum: float = 100.0
minimum: float = 1.0
popup_values: list[str] = field(default_factory=lambda: ["Item 1"])
negative_style: NegativeNumberStyle = NegativeNumberStyle.MINUS
show_thousands_separator: bool = False
type: FormattingType = FormattingType.NUMBER
use_accounting_style: bool = False
_format_id = None
def __post_init__(self):
if not isinstance(self.type, FormattingType):
type_name = type(self.type).__name__
msg = f"Invalid format type '{type_name}'"
raise TypeError(msg)
if self.use_accounting_style and self.negative_style != NegativeNumberStyle.MINUS:
warn(
"use_accounting_style overriding negative_style",
RuntimeWarning,
stacklevel=4,
)
if self.type == FormattingType.DATETIME:
formats = re.sub(r"[^a-zA-Z\s]", " ", self.date_time_format).split()
for el in formats:
if el not in DATETIME_FIELD_MAP:
msg = f"Invalid format specifier '{el}' in date/time format"
raise TypeError(msg)
if self.type == FormattingType.CURRENCY and self.currency_code not in CURRENCIES:
msg = f"Unsupported currency code '{self.currency_code}'"
raise TypeError(msg)
if self.decimal_places is None:
if self.type == FormattingType.CURRENCY:
self.decimal_places = 2
else:
self.decimal_places = DECIMAL_PLACES_AUTO
if (
self.type == FormattingType.BASE
and not self.base_use_minus_sign
and self.base not in (2, 8, 16)
):
msg = f"base_use_minus_sign must be True for base {self.base}"
raise TypeError(msg)
if self.type == FormattingType.BASE and (self.base < 2 or self.base > MAX_BASE):
msg = "base must be in range 2-36"
raise TypeError(msg)
@dataclass
class CustomFormatting:
type: CustomFormattingType = CustomFormattingType.NUMBER
name: str = None
integer_format: PaddingType = PaddingType.NONE
decimal_format: PaddingType = PaddingType.NONE
num_integers: int = 0
num_decimals: int = 0
show_thousands_separator: bool = False
format: str = "%s"
def __post_init__(self):
if not isinstance(self.type, CustomFormattingType):
type_name = type(self.type).__name__
msg = f"Invalid format type '{type_name}'"
raise TypeError(msg)
if self.type == CustomFormattingType.TEXT and self.format.count("%s") > 1:
msg = "Custom formats only allow one text substitution"
raise TypeError(msg)
@classmethod
def from_archive(cls, archive: object):
if archive.format_type == FormatType.CUSTOM_DATE:
format_type = CustomFormattingType.DATETIME
elif archive.format_type == FormatType.CUSTOM_NUMBER:
format_type = CustomFormattingType.NUMBER
else:
format_type = CustomFormattingType.TEXT
return CustomFormatting(name=archive.name, type=format_type)