utils.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. # Copyright 2014 Baidu, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
  4. # except in compliance with the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the
  9. # License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
  10. # either express or implied. See the License for the specific language governing permissions
  11. # and limitations under the License.
  12. """
  13. This module provide some tools for bce client.
  14. """
  15. # str() generator unicode,bytes() for ASCII
  16. from __future__ import print_function
  17. from __future__ import absolute_import
  18. from builtins import str, bytes
  19. from future.utils import iteritems, iterkeys, itervalues
  20. from baidubce import compat
  21. import os
  22. import re
  23. import datetime
  24. import hashlib
  25. import base64
  26. import string
  27. import sys
  28. try:
  29. from urllib.parse import urlparse
  30. except ImportError:
  31. from urlparse import urlparse
  32. from Crypto.Cipher import AES
  33. import baidubce
  34. from baidubce.http import http_headers
  35. import codecs
  36. DEFAULT_CNAME_LIKE_LIST = [b".cdn.bcebos.com"]
  37. DEFAULT_BOS_DOMAIN_SUFFIX = b'bcebos.com'
  38. HTTP_PROTOCOL_HEAD = b'http'
  39. def get_md5_from_fp(fp, offset=0, length=-1, buf_size=8192):
  40. """
  41. Get MD5 from file by fp.
  42. :type fp: FileIO
  43. :param fp: None
  44. :type offset: long
  45. :param offset: None
  46. :type length: long
  47. :param length: None
  48. =======================
  49. :return:
  50. **file_size, MD(encode by base64)**
  51. """
  52. origin_offset = fp.tell()
  53. if offset:
  54. fp.seek(offset)
  55. md5 = hashlib.md5()
  56. while True:
  57. bytes_to_read = buf_size
  58. if bytes_to_read > length > 0:
  59. bytes_to_read = length
  60. buf = fp.read(bytes_to_read)
  61. if not buf:
  62. break
  63. md5.update(buf)
  64. if length > 0:
  65. length -= len(buf)
  66. if length == 0:
  67. break
  68. fp.seek(origin_offset)
  69. return base64.standard_b64encode(md5.digest())
  70. def get_canonical_time(timestamp=0):
  71. """
  72. Get cannonical time.
  73. :type timestamp: int
  74. :param timestamp: None
  75. =======================
  76. :return:
  77. **string of canonical_time**
  78. """
  79. if timestamp == 0:
  80. utctime = datetime.datetime.utcnow()
  81. else:
  82. utctime = datetime.datetime.utcfromtimestamp(timestamp)
  83. return b"%04d-%02d-%02dT%02d:%02d:%02dZ" % (
  84. utctime.year, utctime.month, utctime.day,
  85. utctime.hour, utctime.minute, utctime.second)
  86. def is_ip(s):
  87. """
  88. Check a string whether is a legal ip address.
  89. :type s: string
  90. :param s: None
  91. =======================
  92. :return:
  93. **Boolean**
  94. """
  95. try:
  96. tmp_list = s.split(b':')
  97. s = tmp_list[0]
  98. if s == b'localhost':
  99. return True
  100. tmp_list = s.split(b'.')
  101. if len(tmp_list) != 4:
  102. return False
  103. else:
  104. for i in tmp_list:
  105. if int(i) < 0 or int(i) > 255:
  106. return False
  107. except:
  108. return False
  109. return True
  110. def convert_to_standard_string(input_string):
  111. """
  112. Encode a string to utf-8.
  113. :type input_string: string
  114. :param input_string: None
  115. =======================
  116. :return:
  117. **string**
  118. """
  119. #if isinstance(input_string, str):
  120. # return input_string.encode(baidubce.DEFAULT_ENCODING)
  121. #elif isinstance(input_string, bytes):
  122. # return input_string
  123. #else:
  124. # return str(input_string).encode("utf-8")
  125. return compat.convert_to_bytes(input_string)
  126. def convert_header2map(header_list):
  127. """
  128. Transfer a header list to dict
  129. :type s: list
  130. :param s: None
  131. =======================
  132. :return:
  133. **dict**
  134. """
  135. header_map = {}
  136. for a, b in header_list:
  137. if isinstance(a, bytes):
  138. a = a.strip(b'\"')
  139. if isinstance(b, bytes):
  140. b = b.strip(b'\"')
  141. header_map[a] = b
  142. return header_map
  143. def safe_get_element(name, container):
  144. """
  145. Get element from dict which the lower of key and name are equal.
  146. :type name: string
  147. :param name: None
  148. :type container: dict
  149. :param container: None
  150. =======================
  151. :return:
  152. **Value**
  153. """
  154. for k, v in iteritems(container):
  155. if k.strip().lower() == name.strip().lower():
  156. return v
  157. return ""
  158. def check_redirect(res):
  159. """
  160. Check whether the response is redirect.
  161. :type res: HttpResponse
  162. :param res: None
  163. :return:
  164. **Boolean**
  165. """
  166. is_redirect = False
  167. try:
  168. if res.status == 301 or res.status == 302:
  169. is_redirect = True
  170. except:
  171. pass
  172. return is_redirect
  173. def _get_normalized_char_list():
  174. """"
  175. :return:
  176. **ASCII string**
  177. """
  178. ret = ['%%%02X' % i for i in range(256)]
  179. for ch in string.ascii_letters + string.digits + '.~-_':
  180. ret[ord(ch)] = ch
  181. if isinstance(ret[0], str):
  182. ret = [s.encode("utf-8") for s in ret]
  183. return ret
  184. _NORMALIZED_CHAR_LIST = _get_normalized_char_list()
  185. def normalize_string(in_str, encoding_slash=True):
  186. """
  187. Encode in_str.
  188. When encoding_slash is True, don't encode skip_chars, vice versa.
  189. :type in_str: string
  190. :param in_str: None
  191. :type encoding_slash: Bool
  192. :param encoding_slash: None
  193. ===============================
  194. :return:
  195. **ASCII string**
  196. """
  197. tmp = []
  198. for ch in convert_to_standard_string(in_str):
  199. # on python3, ch is int type
  200. sep = ''
  201. index = -1
  202. if isinstance(ch, int):
  203. # on py3
  204. sep = chr(ch).encode("utf-8")
  205. index = ch
  206. else:
  207. sep = ch
  208. index = ord(ch)
  209. if sep == b'/' and not encoding_slash:
  210. tmp.append(b'/')
  211. else:
  212. tmp.append(_NORMALIZED_CHAR_LIST[index])
  213. return (b'').join(tmp)
  214. def append_uri(base_uri, *path_components):
  215. """
  216. Append path_components to the end of base_uri in order, and ignore all empty strings and None
  217. :param base_uri: None
  218. :type base_uri: string
  219. :param path_components: None
  220. :return: the final url
  221. :rtype: str
  222. """
  223. tmp = [base_uri]
  224. for path in path_components:
  225. if path:
  226. tmp.append(normalize_string(path, False))
  227. if len(tmp) > 1:
  228. tmp[0] = tmp[0].rstrip(b'/')
  229. tmp[-1] = tmp[-1].lstrip(b'/')
  230. for i in range(1, len(tmp) - 1):
  231. tmp[i] = tmp[i].strip(b'/')
  232. return (b'/').join(tmp)
  233. def check_bucket_valid(bucket):
  234. """
  235. Check bucket name whether is legal.
  236. :type bucket: string
  237. :param bucket: None
  238. =======================
  239. :return:
  240. **Boolean**
  241. """
  242. alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-"
  243. if len(bucket) < 3 or len(bucket) > 63:
  244. return False
  245. if bucket[-1] == "-" or bucket[-1] == "_":
  246. return False
  247. if not (('a' <= bucket[0] <= 'z') or ('0' <= bucket[0] <= '9')):
  248. return False
  249. for i in bucket:
  250. if not i in alphabet:
  251. return False
  252. return True
  253. def guess_content_type_by_file_name(file_name):
  254. """
  255. Get file type by filename.
  256. :type file_name: string
  257. :param file_name: None
  258. =======================
  259. :return:
  260. **Type Value**
  261. """
  262. mime_map = dict()
  263. mime_map["js"] = "application/javascript"
  264. mime_map["xlsx"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
  265. mime_map["xltx"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.template"
  266. mime_map["potx"] = "application/vnd.openxmlformats-officedocument.presentationml.template"
  267. mime_map["ppsx"] = "application/vnd.openxmlformats-officedocument.presentationml.slideshow"
  268. mime_map["pptx"] = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
  269. mime_map["sldx"] = "application/vnd.openxmlformats-officedocument.presentationml.slide"
  270. mime_map["docx"] = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
  271. mime_map["dotx"] = "application/vnd.openxmlformats-officedocument.wordprocessingml.template"
  272. mime_map["xlam"] = "application/vnd.ms-excel.addin.macroEnabled.12"
  273. mime_map["xlsb"] = "application/vnd.ms-excel.sheet.binary.macroEnabled.12"
  274. try:
  275. file_name = compat.convert_to_string(file_name)
  276. name = os.path.basename(file_name.lower())
  277. suffix = name.split('.')[-1]
  278. if suffix in iterkeys(mime_map):
  279. mime_type = mime_map[suffix]
  280. else:
  281. import mimetypes
  282. mimetypes.init()
  283. mime_type = mimetypes.types_map.get("." + suffix, 'application/octet-stream')
  284. except:
  285. mime_type = 'application/octet-stream'
  286. if not mime_type:
  287. mime_type = 'application/octet-stream'
  288. return compat.convert_to_bytes(mime_type)
  289. _first_cap_regex = re.compile('(.)([A-Z][a-z]+)')
  290. _number_cap_regex = re.compile('([a-z])([0-9]{2,})')
  291. _end_cap_regex = re.compile('([a-z0-9])([A-Z])')
  292. def pythonize_name(name):
  293. """Convert camel case to a "pythonic" name.
  294. Examples::
  295. pythonize_name('CamelCase') -> 'camel_case'
  296. pythonize_name('already_pythonized') -> 'already_pythonized'
  297. pythonize_name('HTTPRequest') -> 'http_request'
  298. pythonize_name('HTTPStatus200Ok') -> 'http_status_200_ok'
  299. pythonize_name('UPPER') -> 'upper'
  300. pythonize_name('ContentMd5')->'content_md5'
  301. pythonize_name('') -> ''
  302. """
  303. if name == "eTag":
  304. return "etag"
  305. s1 = _first_cap_regex.sub(r'\1_\2', name)
  306. s2 = _number_cap_regex.sub(r'\1_\2', s1)
  307. return _end_cap_regex.sub(r'\1_\2', s2).lower()
  308. def get_canonical_querystring(params, for_signature):
  309. """
  310. :param params:
  311. :param for_signature:
  312. :return:
  313. """
  314. if params is None:
  315. return ''
  316. result = []
  317. for k, v in iteritems(params):
  318. if not for_signature or k.lower != http_headers.AUTHORIZATION.lower():
  319. if v is None:
  320. v = ''
  321. result.append(b'%s=%s' % (normalize_string(k), normalize_string(v)))
  322. result.sort()
  323. return (b'&').join(result)
  324. def print_object(obj):
  325. """
  326. :param obj:
  327. :return:
  328. """
  329. tmp = []
  330. for k, v in iteritems(obj.__dict__):
  331. if not k.startswith('__') and k != "raw_data":
  332. if isinstance(v, bytes):
  333. tmp.append("%s:'%s'" % (k, v))
  334. # str is unicode
  335. elif isinstance(v, str):
  336. tmp.append("%s:u'%s'" % (k, v))
  337. else:
  338. tmp.append('%s:%s' % (k, v))
  339. return '{%s}' % ','.join(tmp)
  340. class Expando(object):
  341. """
  342. Expandable class
  343. """
  344. def __init__(self, attr_dict=None):
  345. if attr_dict:
  346. self.__dict__.update(attr_dict)
  347. def __getattr__(self, item):
  348. if item.startswith('__'):
  349. raise AttributeError
  350. return None
  351. def __repr__(self):
  352. return print_object(self)
  353. def dict_to_python_object(d):
  354. """
  355. :param d:
  356. :return:
  357. """
  358. attr = {}
  359. for k, v in iteritems(d):
  360. if not isinstance(k, compat.string_types):
  361. k = compat.convert_to_string(k)
  362. k = pythonize_name(k)
  363. attr[k] = v
  364. return Expando(attr)
  365. def required(**types):
  366. """
  367. decorator of input param check
  368. :param types:
  369. :return:
  370. """
  371. def _required(f):
  372. def _decorated(*args, **kwds):
  373. for i, v in enumerate(args):
  374. if f.__code__.co_varnames[i] in types:
  375. if v is None:
  376. raise ValueError('arg "%s" should not be None' %
  377. (f.__code__.co_varnames[i]))
  378. if not isinstance(v, types[f.__code__.co_varnames[i]]):
  379. raise TypeError('arg "%s"= %r does not match %s' %
  380. (f.__code__.co_varnames[i],
  381. v,
  382. types[f.__code__.co_varnames[i]]))
  383. for k, v in iteritems(kwds):
  384. if k in types:
  385. if v is None:
  386. raise ValueError('arg "%s" should not be None' % k)
  387. if not isinstance(v, types[k]):
  388. raise TypeError('arg "%s"= %r does not match %s' % (k, v, types[k]))
  389. return f(*args, **kwds)
  390. _decorated.__name__ = f.__name__
  391. return _decorated
  392. return _required
  393. def parse_host_port(endpoint, default_protocol):
  394. """
  395. parse protocol, host, port from endpoint in config
  396. :type: string
  397. :param endpoint: endpoint in config
  398. :type: baidubce.protocol.HTTP or baidubce.protocol.HTTPS
  399. :param default_protocol: if there is no scheme in endpoint,
  400. we will use this protocol as default
  401. :return: tuple of protocol, host, port
  402. """
  403. # netloc should begin with // according to RFC1808
  404. if b"//" not in endpoint:
  405. endpoint = b"//" + endpoint
  406. try:
  407. # scheme in endpoint dominates input default_protocol
  408. parse_result = urlparse(
  409. endpoint,
  410. compat.convert_to_bytes(default_protocol.name))
  411. except Exception as e:
  412. raise ValueError('Invalid endpoint:%s, error:%s' % (endpoint,
  413. compat.convert_to_string(e)))
  414. if parse_result.scheme == compat.convert_to_bytes(baidubce.protocol.HTTP.name):
  415. protocol = baidubce.protocol.HTTP
  416. port = baidubce.protocol.HTTP.default_port
  417. elif parse_result.scheme == compat.convert_to_bytes(baidubce.protocol.HTTPS.name):
  418. protocol = baidubce.protocol.HTTPS
  419. port = baidubce.protocol.HTTPS.default_port
  420. else:
  421. raise ValueError('Unsupported protocol %s' % parse_result.scheme)
  422. host = parse_result.hostname
  423. if parse_result.port is not None:
  424. port = parse_result.port
  425. return protocol, host, port
  426. """
  427. def aes128_encrypt_16char_key(adminpass, secretkey):
  428. #Python2:encrypt admin password by AES128
  429. pad_it = lambda s: s + (16 - len(s) % 16) * chr(16 - len(s) % 16)
  430. key = secretkey[0:16]
  431. mode = AES.MODE_ECB
  432. cryptor = AES.new(key, mode, key)
  433. cipheradminpass = cryptor.encrypt(pad_it(adminpass)).encode('hex')
  434. return cipheradminpass
  435. """
  436. def aes128_encrypt_16char_key(adminpass, secretkey):
  437. """
  438. :param adminpass: adminpass
  439. :param secretkey: secretkey
  440. :return: cipheradminpass
  441. """
  442. # Python3: encrypt admin password by AES128
  443. pad_it = lambda s: s + (16 - len(s) % 16) * chr(16 - len(s) % 16)
  444. key = secretkey[0:16]
  445. mode = AES.MODE_ECB
  446. cryptor = AES.new(key, mode)
  447. pad_admin = pad_it(adminpass)
  448. byte_pad_admin = pad_admin.encode(encoding='utf-8')
  449. cryptoradminpass = cryptor.encrypt(byte_pad_admin)
  450. #print(cryptoradminpass)
  451. #cipheradminpass = cryptor.encrypt(byte_pad_admin).encode('hex')
  452. byte_cipheradminpass = codecs.encode(cryptoradminpass, 'hex_codec')
  453. #print(byte_cipheradminpass)
  454. cipheradminpass = byte_cipheradminpass.decode(encoding='utf-8')
  455. #print(cipheradminpass)
  456. return cipheradminpass
  457. def is_cname_like_host(host):
  458. """
  459. :param host: custom domain
  460. :return: domain end with cdn endpoint or not
  461. """
  462. if host is None:
  463. return False
  464. for suffix in DEFAULT_CNAME_LIKE_LIST:
  465. if host.lower().endswith(suffix):
  466. return True
  467. return False
  468. def is_custom_host(host, bucket_name):
  469. """
  470. custom host : xxx.region.bcebos.com
  471. : return: custom, domain or not
  472. """
  473. if host is None or bucket_name is None:
  474. return False
  475. host_split = host.split(b'.')
  476. # split http head
  477. return host.lower().startswith(compat.convert_to_bytes(bucket_name.lower())) \
  478. and len(host_split) == 4 and is_bos_suffixed_host(host)
  479. def is_bos_suffixed_host(host):
  480. """
  481. :param host: bos endpoint
  482. :return: bos endpoint or not
  483. """
  484. if host is None:
  485. return False
  486. if host.endswith(b'/'):
  487. check_host = host[:-1]
  488. else:
  489. check_host = host
  490. return check_host.lower().endswith(DEFAULT_BOS_DOMAIN_SUFFIX)
  491. def check_ipv4(ipAddr):
  492. """
  493. :param ipAddr: ip address
  494. :return: true or false
  495. """
  496. compile_ip=re.compile(b'((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})(.((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})){3}')
  497. return compile_ip.match(ipAddr)
  498. def _get_data_size(data):
  499. if hasattr(data, '__len__'):
  500. return len(data)
  501. if hasattr(data, 'len'):
  502. return data.len
  503. if hasattr(data, 'seek') and hasattr(data, 'tell'):
  504. return file_object_remaining_bytes(data)
  505. return None
  506. def file_object_remaining_bytes(fileobj):
  507. current = fileobj.tell()
  508. fileobj.seek(0, os.SEEK_END)
  509. end = fileobj.tell()
  510. fileobj.seek(current, os.SEEK_SET)
  511. return end - current
  512. def _invoke_progress_callback(progress_callback, consumed_bytes, total_bytes):
  513. if progress_callback:
  514. progress_callback(consumed_bytes, total_bytes)
  515. def make_progress_adapter(data, progress_callback, size=None):
  516. """return a adapter,when reading 'data', that is, calling read or iterating
  517. over it Call the progress callback function
  518. :param data: bytes,file object or iterable
  519. :param progress_callback: callback function, ref:`_default_progress_callback`
  520. :param size: size of `data`
  521. :return: callback function adapter
  522. """
  523. if size is None:
  524. size = _get_data_size(data)
  525. if size is None:
  526. raise ValueError('{0} is not a file object'.format(data.__class__.__name__))
  527. return _BytesAndFileAdapter(data, progress_callback, size)
  528. _CHUNK_SIZE = 8 * 1024
  529. class _BytesAndFileAdapter(object):
  530. """With this adapter, you can add progress monitoring to 'data'.
  531. :param data: bytes or file object
  532. :param progress_callback: user-provided callback function. like callback(bytes_read, total_bytes)
  533. bytes_read is readed bytes;total_bytes is total bytes
  534. :param int size : data size
  535. """
  536. def __init__(self, data, progress_callback=None, size=None):
  537. self.data = data
  538. self.progress_callback = progress_callback
  539. self.size = size
  540. self.offset = 0
  541. @property
  542. def len(self):
  543. return self.size
  544. # for python 2.x
  545. def __bool__(self):
  546. return True
  547. # for python 3.x
  548. __nonzero__=__bool__
  549. # support iterable type
  550. # def __iter__(self):
  551. # return self
  552. # def __next__(self):
  553. # return self.next()
  554. # def next(self):
  555. # content = self.read(_CHUNK_SIZE)
  556. # if content:
  557. # return content
  558. # else:
  559. # raise StopIteration
  560. def read(self, amt=None):
  561. if self.offset >= self.size:
  562. return compat.convert_to_bytes('')
  563. if amt is None or amt < 0:
  564. bytes_to_read = self.size - self.offset
  565. else:
  566. bytes_to_read = min(amt, self.size - self.offset)
  567. if isinstance(self.data, bytes):
  568. content = self.data[self.offset:self.offset+bytes_to_read]
  569. else:
  570. content = self.data.read(bytes_to_read)
  571. self.offset += bytes_to_read
  572. _invoke_progress_callback(self.progress_callback, min(self.offset, self.size), self.size)
  573. return content
  574. def default_progress_callback(consumed_bytes, total_bytes):
  575. """Progress bar callback function that calculates the percentage of current completion
  576. :param consumed_bytes: Amount of data that has been uploaded/downloaded
  577. :param total_bytes: According to the total amount
  578. """
  579. if total_bytes:
  580. rate = int(100 * (float(consumed_bytes) / float(total_bytes)))
  581. start_progress = '*' * rate
  582. end_progress = '.' * (100 - rate)
  583. if rate == 100:
  584. print("\r{}%[{}->{}]\n".format(rate, start_progress, end_progress), end="")
  585. else:
  586. print("\r{}%[{}->{}]".format(rate, start_progress, end_progress), end="")
  587. sys.stdout.flush()