| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- from __future__ import annotations
- from datetime import (
- date,
- datetime,
- time,
- timedelta,
- )
- from typing import (
- TYPE_CHECKING,
- Any,
- Union,
- )
- from pandas.compat._optional import import_optional_dependency
- from pandas.util._decorators import doc
- import pandas as pd
- from pandas.core.shared_docs import _shared_docs
- from pandas.io.excel._base import BaseExcelReader
- if TYPE_CHECKING:
- from python_calamine import (
- CalamineSheet,
- CalamineWorkbook,
- )
- from pandas._typing import (
- FilePath,
- NaTType,
- ReadBuffer,
- Scalar,
- StorageOptions,
- )
- _CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
- class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
- @doc(storage_options=_shared_docs["storage_options"])
- def __init__(
- self,
- filepath_or_buffer: FilePath | ReadBuffer[bytes],
- storage_options: StorageOptions | None = None,
- engine_kwargs: dict | None = None,
- ) -> None:
- """
- Reader using calamine engine (xlsx/xls/xlsb/ods).
- Parameters
- ----------
- filepath_or_buffer : str, path to be parsed or
- an open readable stream.
- {storage_options}
- engine_kwargs : dict, optional
- Arbitrary keyword arguments passed to excel engine.
- """
- import_optional_dependency("python_calamine")
- super().__init__(
- filepath_or_buffer,
- storage_options=storage_options,
- engine_kwargs=engine_kwargs,
- )
- @property
- def _workbook_class(self) -> type[CalamineWorkbook]:
- from python_calamine import CalamineWorkbook
- return CalamineWorkbook
- def load_workbook(
- self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
- ) -> CalamineWorkbook:
- from python_calamine import load_workbook
- return load_workbook(filepath_or_buffer, **engine_kwargs)
- @property
- def sheet_names(self) -> list[str]:
- from python_calamine import SheetTypeEnum
- return [
- sheet.name
- for sheet in self.book.sheets_metadata
- if sheet.typ == SheetTypeEnum.WorkSheet
- ]
- def get_sheet_by_name(self, name: str) -> CalamineSheet:
- self.raise_if_bad_sheet_by_name(name)
- return self.book.get_sheet_by_name(name)
- def get_sheet_by_index(self, index: int) -> CalamineSheet:
- self.raise_if_bad_sheet_by_index(index)
- return self.book.get_sheet_by_index(index)
- def get_sheet_data(
- self, sheet: CalamineSheet, file_rows_needed: int | None = None
- ) -> list[list[Scalar | NaTType | time]]:
- def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
- if isinstance(value, float):
- val = int(value)
- if val == value:
- return val
- else:
- return value
- elif isinstance(value, date):
- return pd.Timestamp(value)
- elif isinstance(value, timedelta):
- return pd.Timedelta(value)
- elif isinstance(value, time):
- return value
- return value
- rows: list[list[_CellValue]] = sheet.to_python(
- skip_empty_area=False, nrows=file_rows_needed
- )
- data = [[_convert_cell(cell) for cell in row] for row in rows]
- return data
|