imagesize.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. import io
  2. import os
  3. import re
  4. import struct
  5. from xml.etree import ElementTree
  6. __all__ = ["get", "getDPI", "__version__"]
  7. __version__ = "1.4.1"
  8. _UNIT_KM = -3
  9. _UNIT_100M = -2
  10. _UNIT_10M = -1
  11. _UNIT_1M = 0
  12. _UNIT_10CM = 1
  13. _UNIT_CM = 2
  14. _UNIT_MM = 3
  15. _UNIT_0_1MM = 4
  16. _UNIT_0_01MM = 5
  17. _UNIT_UM = 6
  18. _UNIT_INCH = 6
  19. _TIFF_TYPE_SIZES = {
  20. 1: 1,
  21. 2: 1,
  22. 3: 2,
  23. 4: 4,
  24. 5: 8,
  25. 6: 1,
  26. 7: 1,
  27. 8: 2,
  28. 9: 4,
  29. 10: 8,
  30. 11: 4,
  31. 12: 8,
  32. }
  33. def _convertToDPI(density, unit):
  34. if unit == _UNIT_KM:
  35. return int(density * 0.0000254 + 0.5)
  36. elif unit == _UNIT_100M:
  37. return int(density * 0.000254 + 0.5)
  38. elif unit == _UNIT_10M:
  39. return int(density * 0.00254 + 0.5)
  40. elif unit == _UNIT_1M:
  41. return int(density * 0.0254 + 0.5)
  42. elif unit == _UNIT_10CM:
  43. return int(density * 0.254 + 0.5)
  44. elif unit == _UNIT_CM:
  45. return int(density * 2.54 + 0.5)
  46. elif unit == _UNIT_MM:
  47. return int(density * 25.4 + 0.5)
  48. elif unit == _UNIT_0_1MM:
  49. return density * 254
  50. elif unit == _UNIT_0_01MM:
  51. return density * 2540
  52. elif unit == _UNIT_UM:
  53. return density * 25400
  54. return density
  55. def _convertToPx(value):
  56. matched = re.match(r"(\d+(?:\.\d+)?)?([a-z]*)$", value)
  57. if not matched:
  58. raise ValueError("unknown length value: %s" % value)
  59. length, unit = matched.groups()
  60. if unit == "":
  61. return float(length)
  62. elif unit == "cm":
  63. return float(length) * 96 / 2.54
  64. elif unit == "mm":
  65. return float(length) * 96 / 2.54 / 10
  66. elif unit == "in":
  67. return float(length) * 96
  68. elif unit == "pc":
  69. return float(length) * 96 / 6
  70. elif unit == "pt":
  71. return float(length) * 96 / 6
  72. elif unit == "px":
  73. return float(length)
  74. raise ValueError("unknown unit type: %s" % unit)
  75. def get(filepath):
  76. """
  77. Return (width, height) for a given img file content
  78. no requirements
  79. :type filepath: Union[bytes, str, pathlib.Path]
  80. :rtype Tuple[int, int]
  81. """
  82. height = -1
  83. width = -1
  84. if isinstance(filepath, io.BytesIO): # file-like object
  85. fhandle = filepath
  86. else:
  87. fhandle = open(filepath, 'rb')
  88. try:
  89. head = fhandle.read(31)
  90. size = len(head)
  91. # handle GIFs
  92. if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
  93. # Check to see if content_type is correct
  94. try:
  95. width, height = struct.unpack("<hh", head[6:10])
  96. except struct.error:
  97. raise ValueError("Invalid GIF file")
  98. # see png edition spec bytes are below chunk length then and finally the
  99. elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n') and head[12:16] == b'IHDR':
  100. try:
  101. width, height = struct.unpack(">LL", head[16:24])
  102. except struct.error:
  103. raise ValueError("Invalid PNG file")
  104. # Maybe this is for an older PNG version.
  105. elif size >= 16 and head.startswith(b'\211PNG\r\n\032\n'):
  106. # Check to see if we have the right content type
  107. try:
  108. width, height = struct.unpack(">LL", head[8:16])
  109. except struct.error:
  110. raise ValueError("Invalid PNG file")
  111. # handle JPEGs
  112. elif size >= 2 and head.startswith(b'\377\330'):
  113. try:
  114. fhandle.seek(0) # Read 0xff next
  115. size = 2
  116. ftype = 0
  117. while not 0xc0 <= ftype <= 0xcf or ftype in [0xc4, 0xc8, 0xcc]:
  118. fhandle.seek(size, 1)
  119. byte = fhandle.read(1)
  120. while ord(byte) == 0xff:
  121. byte = fhandle.read(1)
  122. ftype = ord(byte)
  123. size = struct.unpack('>H', fhandle.read(2))[0] - 2
  124. # We are at a SOFn block
  125. fhandle.seek(1, 1) # Skip `precision' byte.
  126. height, width = struct.unpack('>HH', fhandle.read(4))
  127. except (struct.error, TypeError):
  128. raise ValueError("Invalid JPEG file")
  129. # handle JPEG2000s
  130. elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'):
  131. fhandle.seek(48)
  132. try:
  133. height, width = struct.unpack('>LL', fhandle.read(8))
  134. except struct.error:
  135. raise ValueError("Invalid JPEG2000 file")
  136. # handle big endian TIFF
  137. elif size >= 8 and head.startswith(b"\x4d\x4d\x00\x2a"):
  138. offset = struct.unpack('>L', head[4:8])[0]
  139. fhandle.seek(offset)
  140. ifdsize = struct.unpack(">H", fhandle.read(2))[0]
  141. for i in range(ifdsize):
  142. tag, datatype, count, data = struct.unpack(">HHLL", fhandle.read(12))
  143. if tag == 256:
  144. if datatype == 3:
  145. width = int(data / 65536)
  146. elif datatype == 4:
  147. width = data
  148. else:
  149. raise ValueError("Invalid TIFF file: width column data type should be SHORT/LONG.")
  150. elif tag == 257:
  151. if datatype == 3:
  152. height = int(data / 65536)
  153. elif datatype == 4:
  154. height = data
  155. else:
  156. raise ValueError("Invalid TIFF file: height column data type should be SHORT/LONG.")
  157. if width != -1 and height != -1:
  158. break
  159. if width == -1 or height == -1:
  160. raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
  161. elif size >= 8 and head.startswith(b"\x49\x49\x2a\x00"):
  162. offset = struct.unpack('<L', head[4:8])[0]
  163. fhandle.seek(offset)
  164. ifdsize = struct.unpack("<H", fhandle.read(2))[0]
  165. for i in range(ifdsize):
  166. tag, datatype, count, data = struct.unpack("<HHLL", fhandle.read(12))
  167. if tag == 256:
  168. width = data
  169. elif tag == 257:
  170. height = data
  171. if width != -1 and height != -1:
  172. break
  173. if width == -1 or height == -1:
  174. raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
  175. # handle little endian BigTiff
  176. elif size >= 8 and head.startswith(b"\x49\x49\x2b\x00"):
  177. bytesize_offset = struct.unpack('<L', head[4:8])[0]
  178. if bytesize_offset != 8:
  179. raise ValueError('Invalid BigTIFF file: Expected offset to be 8, found {} instead.'.format(offset))
  180. offset = struct.unpack('<Q', head[8:16])[0]
  181. fhandle.seek(offset)
  182. ifdsize = struct.unpack("<Q", fhandle.read(8))[0]
  183. for i in range(ifdsize):
  184. tag, datatype, count, data = struct.unpack("<HHQQ", fhandle.read(20))
  185. if tag == 256:
  186. width = data
  187. elif tag == 257:
  188. height = data
  189. if width != -1 and height != -1:
  190. break
  191. if width == -1 or height == -1:
  192. raise ValueError("Invalid BigTIFF file: width and/or height IDS entries are missing.")
  193. # handle SVGs
  194. elif size >= 5 and (head.startswith(b'<?xml') or head.startswith(b'<svg')):
  195. fhandle.seek(0)
  196. data = fhandle.read(1024)
  197. try:
  198. data = data.decode('utf-8')
  199. width = re.search(r'[^-]width="(.*?)"', data).group(1)
  200. height = re.search(r'[^-]height="(.*?)"', data).group(1)
  201. except Exception:
  202. raise ValueError("Invalid SVG file")
  203. width = _convertToPx(width)
  204. height = _convertToPx(height)
  205. # handle Netpbm
  206. elif head[:1] == b"P" and head[1:2] in b"123456":
  207. fhandle.seek(2)
  208. sizes = []
  209. while True:
  210. next_chr = fhandle.read(1)
  211. if next_chr.isspace():
  212. continue
  213. if next_chr == b"":
  214. raise ValueError("Invalid Netpbm file")
  215. if next_chr == b"#":
  216. fhandle.readline()
  217. continue
  218. if not next_chr.isdigit():
  219. raise ValueError("Invalid character found on Netpbm file")
  220. size = next_chr
  221. next_chr = fhandle.read(1)
  222. while next_chr.isdigit():
  223. size += next_chr
  224. next_chr = fhandle.read(1)
  225. sizes.append(int(size))
  226. if len(sizes) == 2:
  227. break
  228. fhandle.seek(-1, os.SEEK_CUR)
  229. width, height = sizes
  230. elif head.startswith(b"RIFF") and head[8:12] == b"WEBP":
  231. if head[12:16] == b"VP8 ":
  232. width, height = struct.unpack("<HH", head[26:30])
  233. elif head[12:16] == b"VP8X":
  234. width = struct.unpack("<I", head[24:27] + b"\0")[0]
  235. height = struct.unpack("<I", head[27:30] + b"\0")[0]
  236. elif head[12:16] == b"VP8L":
  237. b = head[21:25]
  238. width = (((b[1] & 63) << 8) | b[0]) + 1
  239. height = (((b[3] & 15) << 10) | (b[2] << 2) | ((b[1] & 192) >> 6)) + 1
  240. else:
  241. raise ValueError("Unsupported WebP file")
  242. finally:
  243. fhandle.close()
  244. return width, height
  245. def getDPI(filepath):
  246. """
  247. Return (x DPI, y DPI) for a given img file content
  248. no requirements
  249. :type filepath: Union[bytes, str, pathlib.Path]
  250. :rtype Tuple[int, int]
  251. """
  252. xDPI = -1
  253. yDPI = -1
  254. if not isinstance(filepath, bytes):
  255. filepath = str(filepath)
  256. with open(filepath, 'rb') as fhandle:
  257. head = fhandle.read(24)
  258. size = len(head)
  259. # handle GIFs
  260. # GIFs doesn't have density
  261. if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
  262. pass
  263. # see png edition spec bytes are below chunk length then and finally the
  264. elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n'):
  265. chunkOffset = 8
  266. chunk = head[8:]
  267. while True:
  268. chunkType = chunk[4:8]
  269. if chunkType == b'pHYs':
  270. try:
  271. xDensity, yDensity, unit = struct.unpack(">LLB", chunk[8:])
  272. except struct.error:
  273. raise ValueError("Invalid PNG file")
  274. if unit:
  275. xDPI = _convertToDPI(xDensity, _UNIT_1M)
  276. yDPI = _convertToDPI(yDensity, _UNIT_1M)
  277. else: # no unit
  278. xDPI = xDensity
  279. yDPI = yDensity
  280. break
  281. elif chunkType == b'IDAT':
  282. break
  283. else:
  284. try:
  285. dataSize, = struct.unpack(">L", chunk[0:4])
  286. except struct.error:
  287. raise ValueError("Invalid PNG file")
  288. chunkOffset += dataSize + 12
  289. fhandle.seek(chunkOffset)
  290. chunk = fhandle.read(17)
  291. # handle JPEGs
  292. elif size >= 2 and head.startswith(b'\377\330'):
  293. try:
  294. fhandle.seek(0) # Read 0xff next
  295. size = 2
  296. ftype = 0
  297. while not 0xc0 <= ftype <= 0xcf:
  298. if ftype == 0xe0: # APP0 marker
  299. fhandle.seek(7, 1)
  300. unit, xDensity, yDensity = struct.unpack(">BHH", fhandle.read(5))
  301. if unit == 1 or unit == 0:
  302. xDPI = xDensity
  303. yDPI = yDensity
  304. elif unit == 2:
  305. xDPI = _convertToDPI(xDensity, _UNIT_CM)
  306. yDPI = _convertToDPI(yDensity, _UNIT_CM)
  307. break
  308. fhandle.seek(size, 1)
  309. byte = fhandle.read(1)
  310. while ord(byte) == 0xff:
  311. byte = fhandle.read(1)
  312. ftype = ord(byte)
  313. size = struct.unpack('>H', fhandle.read(2))[0] - 2
  314. except struct.error:
  315. raise ValueError("Invalid JPEG file")
  316. # handle JPEG2000s
  317. elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'):
  318. fhandle.seek(32)
  319. # skip JP2 image header box
  320. headerSize = struct.unpack('>L', fhandle.read(4))[0] - 8
  321. fhandle.seek(4, 1)
  322. foundResBox = False
  323. try:
  324. while headerSize > 0:
  325. boxHeader = fhandle.read(8)
  326. boxType = boxHeader[4:]
  327. if boxType == b'res ': # find resolution super box
  328. foundResBox = True
  329. headerSize -= 8
  330. break
  331. boxSize, = struct.unpack('>L', boxHeader[:4])
  332. fhandle.seek(boxSize - 8, 1)
  333. headerSize -= boxSize
  334. if foundResBox:
  335. while headerSize > 0:
  336. boxHeader = fhandle.read(8)
  337. boxType = boxHeader[4:]
  338. if boxType == b'resd': # Display resolution box
  339. yDensity, xDensity, yUnit, xUnit = struct.unpack(">HHBB", fhandle.read(10))
  340. xDPI = _convertToDPI(xDensity, xUnit)
  341. yDPI = _convertToDPI(yDensity, yUnit)
  342. break
  343. boxSize, = struct.unpack('>L', boxHeader[:4])
  344. fhandle.seek(boxSize - 8, 1)
  345. headerSize -= boxSize
  346. except struct.error as e:
  347. raise ValueError("Invalid JPEG2000 file")
  348. return xDPI, yDPI