imagesize.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. import io
  2. import os
  3. import re
  4. import struct
  5. from xml.etree import ElementTree
  6. _UNIT_KM = -3
  7. _UNIT_100M = -2
  8. _UNIT_10M = -1
  9. _UNIT_1M = 0
  10. _UNIT_10CM = 1
  11. _UNIT_CM = 2
  12. _UNIT_MM = 3
  13. _UNIT_0_1MM = 4
  14. _UNIT_0_01MM = 5
  15. _UNIT_UM = 6
  16. _UNIT_INCH = 6
  17. _TIFF_TYPE_SIZES = {
  18. 1: 1,
  19. 2: 1,
  20. 3: 2,
  21. 4: 4,
  22. 5: 8,
  23. 6: 1,
  24. 7: 1,
  25. 8: 2,
  26. 9: 4,
  27. 10: 8,
  28. 11: 4,
  29. 12: 8,
  30. }
  31. def _convertToDPI(density, unit):
  32. if unit == _UNIT_KM:
  33. return int(density * 0.0000254 + 0.5)
  34. elif unit == _UNIT_100M:
  35. return int(density * 0.000254 + 0.5)
  36. elif unit == _UNIT_10M:
  37. return int(density * 0.00254 + 0.5)
  38. elif unit == _UNIT_1M:
  39. return int(density * 0.0254 + 0.5)
  40. elif unit == _UNIT_10CM:
  41. return int(density * 0.254 + 0.5)
  42. elif unit == _UNIT_CM:
  43. return int(density * 2.54 + 0.5)
  44. elif unit == _UNIT_MM:
  45. return int(density * 25.4 + 0.5)
  46. elif unit == _UNIT_0_1MM:
  47. return density * 254
  48. elif unit == _UNIT_0_01MM:
  49. return density * 2540
  50. elif unit == _UNIT_UM:
  51. return density * 25400
  52. return density
  53. def _convertToPx(value):
  54. matched = re.match(r"(\d+(?:\.\d+)?)?([a-z]*)$", value)
  55. if not matched:
  56. raise ValueError("unknown length value: %s" % value)
  57. length, unit = matched.groups()
  58. if unit == "":
  59. return float(length)
  60. elif unit == "cm":
  61. return float(length) * 96 / 2.54
  62. elif unit == "mm":
  63. return float(length) * 96 / 2.54 / 10
  64. elif unit == "in":
  65. return float(length) * 96
  66. elif unit == "pc":
  67. return float(length) * 96 / 6
  68. elif unit == "pt":
  69. return float(length) * 96 / 6
  70. elif unit == "px":
  71. return float(length)
  72. raise ValueError("unknown unit type: %s" % unit)
  73. def get(filepath):
  74. """
  75. Return (width, height) for a given img file content
  76. no requirements
  77. :type filepath: Union[bytes, str, pathlib.Path]
  78. :rtype Tuple[int, int]
  79. """
  80. height = -1
  81. width = -1
  82. if isinstance(filepath, io.BytesIO): # file-like object
  83. fhandle = filepath
  84. else:
  85. fhandle = open(filepath, 'rb')
  86. try:
  87. head = fhandle.read(31)
  88. size = len(head)
  89. # handle GIFs
  90. if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
  91. # Check to see if content_type is correct
  92. try:
  93. width, height = struct.unpack("<hh", head[6:10])
  94. except struct.error:
  95. raise ValueError("Invalid GIF file")
  96. # see png edition spec bytes are below chunk length then and finally the
  97. elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n') and head[12:16] == b'IHDR':
  98. try:
  99. width, height = struct.unpack(">LL", head[16:24])
  100. except struct.error:
  101. raise ValueError("Invalid PNG file")
  102. # Maybe this is for an older PNG version.
  103. elif size >= 16 and head.startswith(b'\211PNG\r\n\032\n'):
  104. # Check to see if we have the right content type
  105. try:
  106. width, height = struct.unpack(">LL", head[8:16])
  107. except struct.error:
  108. raise ValueError("Invalid PNG file")
  109. # handle JPEGs
  110. elif size >= 2 and head.startswith(b'\377\330'):
  111. try:
  112. fhandle.seek(0) # Read 0xff next
  113. size = 2
  114. ftype = 0
  115. while not 0xc0 <= ftype <= 0xcf or ftype in [0xc4, 0xc8, 0xcc]:
  116. fhandle.seek(size, 1)
  117. byte = fhandle.read(1)
  118. while ord(byte) == 0xff:
  119. byte = fhandle.read(1)
  120. ftype = ord(byte)
  121. size = struct.unpack('>H', fhandle.read(2))[0] - 2
  122. # We are at a SOFn block
  123. fhandle.seek(1, 1) # Skip `precision' byte.
  124. height, width = struct.unpack('>HH', fhandle.read(4))
  125. except (struct.error, TypeError):
  126. raise ValueError("Invalid JPEG file")
  127. # handle JPEG2000s
  128. elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'):
  129. fhandle.seek(48)
  130. try:
  131. height, width = struct.unpack('>LL', fhandle.read(8))
  132. except struct.error:
  133. raise ValueError("Invalid JPEG2000 file")
  134. # handle big endian TIFF
  135. elif size >= 8 and head.startswith(b"\x4d\x4d\x00\x2a"):
  136. offset = struct.unpack('>L', head[4:8])[0]
  137. fhandle.seek(offset)
  138. ifdsize = struct.unpack(">H", fhandle.read(2))[0]
  139. for i in range(ifdsize):
  140. tag, datatype, count, data = struct.unpack(">HHLL", fhandle.read(12))
  141. if tag == 256:
  142. if datatype == 3:
  143. width = int(data / 65536)
  144. elif datatype == 4:
  145. width = data
  146. else:
  147. raise ValueError("Invalid TIFF file: width column data type should be SHORT/LONG.")
  148. elif tag == 257:
  149. if datatype == 3:
  150. height = int(data / 65536)
  151. elif datatype == 4:
  152. height = data
  153. else:
  154. raise ValueError("Invalid TIFF file: height column data type should be SHORT/LONG.")
  155. if width != -1 and height != -1:
  156. break
  157. if width == -1 or height == -1:
  158. raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
  159. elif size >= 8 and head.startswith(b"\x49\x49\x2a\x00"):
  160. offset = struct.unpack('<L', head[4:8])[0]
  161. fhandle.seek(offset)
  162. ifdsize = struct.unpack("<H", fhandle.read(2))[0]
  163. for i in range(ifdsize):
  164. tag, datatype, count, data = struct.unpack("<HHLL", fhandle.read(12))
  165. if tag == 256:
  166. width = data
  167. elif tag == 257:
  168. height = data
  169. if width != -1 and height != -1:
  170. break
  171. if width == -1 or height == -1:
  172. raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
  173. # handle little endian BigTiff
  174. elif size >= 8 and head.startswith(b"\x49\x49\x2b\x00"):
  175. bytesize_offset = struct.unpack('<L', head[4:8])[0]
  176. if bytesize_offset != 8:
  177. raise ValueError('Invalid BigTIFF file: Expected offset to be 8, found {} instead.'.format(offset))
  178. offset = struct.unpack('<Q', head[8:16])[0]
  179. fhandle.seek(offset)
  180. ifdsize = struct.unpack("<Q", fhandle.read(8))[0]
  181. for i in range(ifdsize):
  182. tag, datatype, count, data = struct.unpack("<HHQQ", fhandle.read(20))
  183. if tag == 256:
  184. width = data
  185. elif tag == 257:
  186. height = data
  187. if width != -1 and height != -1:
  188. break
  189. if width == -1 or height == -1:
  190. raise ValueError("Invalid BigTIFF file: width and/or height IDS entries are missing.")
  191. # handle SVGs
  192. elif size >= 5 and (head.startswith(b'<?xml') or head.startswith(b'<svg')):
  193. fhandle.seek(0)
  194. data = fhandle.read(1024)
  195. try:
  196. data = data.decode('utf-8')
  197. width = re.search(r'[^-]width="(.*?)"', data).group(1)
  198. height = re.search(r'[^-]height="(.*?)"', data).group(1)
  199. except Exception:
  200. raise ValueError("Invalid SVG file")
  201. width = _convertToPx(width)
  202. height = _convertToPx(height)
  203. # handle Netpbm
  204. elif head[:1] == b"P" and head[1:2] in b"123456":
  205. fhandle.seek(2)
  206. sizes = []
  207. while True:
  208. next_chr = fhandle.read(1)
  209. if next_chr.isspace():
  210. continue
  211. if next_chr == b"":
  212. raise ValueError("Invalid Netpbm file")
  213. if next_chr == b"#":
  214. fhandle.readline()
  215. continue
  216. if not next_chr.isdigit():
  217. raise ValueError("Invalid character found on Netpbm file")
  218. size = next_chr
  219. next_chr = fhandle.read(1)
  220. while next_chr.isdigit():
  221. size += next_chr
  222. next_chr = fhandle.read(1)
  223. sizes.append(int(size))
  224. if len(sizes) == 2:
  225. break
  226. fhandle.seek(-1, os.SEEK_CUR)
  227. width, height = sizes
  228. elif head.startswith(b"RIFF") and head[8:12] == b"WEBP":
  229. if head[12:16] == b"VP8 ":
  230. width, height = struct.unpack("<HH", head[26:30])
  231. elif head[12:16] == b"VP8X":
  232. width = struct.unpack("<I", head[24:27] + b"\0")[0]
  233. height = struct.unpack("<I", head[27:30] + b"\0")[0]
  234. elif head[12:16] == b"VP8L":
  235. b = head[21:25]
  236. width = (((b[1] & 63) << 8) | b[0]) + 1
  237. height = (((b[3] & 15) << 10) | (b[2] << 2) | ((b[1] & 192) >> 6)) + 1
  238. else:
  239. raise ValueError("Unsupported WebP file")
  240. finally:
  241. fhandle.close()
  242. return width, height
  243. def getDPI(filepath):
  244. """
  245. Return (x DPI, y DPI) for a given img file content
  246. no requirements
  247. :type filepath: Union[bytes, str, pathlib.Path]
  248. :rtype Tuple[int, int]
  249. """
  250. xDPI = -1
  251. yDPI = -1
  252. if not isinstance(filepath, bytes):
  253. filepath = str(filepath)
  254. with open(filepath, 'rb') as fhandle:
  255. head = fhandle.read(24)
  256. size = len(head)
  257. # handle GIFs
  258. # GIFs doesn't have density
  259. if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
  260. pass
  261. # see png edition spec bytes are below chunk length then and finally the
  262. elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n'):
  263. chunkOffset = 8
  264. chunk = head[8:]
  265. while True:
  266. chunkType = chunk[4:8]
  267. if chunkType == b'pHYs':
  268. try:
  269. xDensity, yDensity, unit = struct.unpack(">LLB", chunk[8:])
  270. except struct.error:
  271. raise ValueError("Invalid PNG file")
  272. if unit:
  273. xDPI = _convertToDPI(xDensity, _UNIT_1M)
  274. yDPI = _convertToDPI(yDensity, _UNIT_1M)
  275. else: # no unit
  276. xDPI = xDensity
  277. yDPI = yDensity
  278. break
  279. elif chunkType == b'IDAT':
  280. break
  281. else:
  282. try:
  283. dataSize, = struct.unpack(">L", chunk[0:4])
  284. except struct.error:
  285. raise ValueError("Invalid PNG file")
  286. chunkOffset += dataSize + 12
  287. fhandle.seek(chunkOffset)
  288. chunk = fhandle.read(17)
  289. # handle JPEGs
  290. elif size >= 2 and head.startswith(b'\377\330'):
  291. try:
  292. fhandle.seek(0) # Read 0xff next
  293. size = 2
  294. ftype = 0
  295. while not 0xc0 <= ftype <= 0xcf:
  296. if ftype == 0xe0: # APP0 marker
  297. fhandle.seek(7, 1)
  298. unit, xDensity, yDensity = struct.unpack(">BHH", fhandle.read(5))
  299. if unit == 1 or unit == 0:
  300. xDPI = xDensity
  301. yDPI = yDensity
  302. elif unit == 2:
  303. xDPI = _convertToDPI(xDensity, _UNIT_CM)
  304. yDPI = _convertToDPI(yDensity, _UNIT_CM)
  305. break
  306. fhandle.seek(size, 1)
  307. byte = fhandle.read(1)
  308. while ord(byte) == 0xff:
  309. byte = fhandle.read(1)
  310. ftype = ord(byte)
  311. size = struct.unpack('>H', fhandle.read(2))[0] - 2
  312. except struct.error:
  313. raise ValueError("Invalid JPEG file")
  314. # handle JPEG2000s
  315. elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'):
  316. fhandle.seek(32)
  317. # skip JP2 image header box
  318. headerSize = struct.unpack('>L', fhandle.read(4))[0] - 8
  319. fhandle.seek(4, 1)
  320. foundResBox = False
  321. try:
  322. while headerSize > 0:
  323. boxHeader = fhandle.read(8)
  324. boxType = boxHeader[4:]
  325. if boxType == b'res ': # find resolution super box
  326. foundResBox = True
  327. headerSize -= 8
  328. break
  329. boxSize, = struct.unpack('>L', boxHeader[:4])
  330. fhandle.seek(boxSize - 8, 1)
  331. headerSize -= boxSize
  332. if foundResBox:
  333. while headerSize > 0:
  334. boxHeader = fhandle.read(8)
  335. boxType = boxHeader[4:]
  336. if boxType == b'resd': # Display resolution box
  337. yDensity, xDensity, yUnit, xUnit = struct.unpack(">HHBB", fhandle.read(10))
  338. xDPI = _convertToDPI(xDensity, xUnit)
  339. yDPI = _convertToDPI(yDensity, yUnit)
  340. break
  341. boxSize, = struct.unpack('>L', boxHeader[:4])
  342. fhandle.seek(boxSize - 8, 1)
  343. headerSize -= boxSize
  344. except struct.error as e:
  345. raise ValueError("Invalid JPEG2000 file")
  346. return xDPI, yDPI