tool.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. #
  2. # Copyright 2013 The py-lmdb authors, all rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted only as authorized by the OpenLDAP
  6. # Public License.
  7. #
  8. # A copy of this license is available in the file LICENSE in the
  9. # top-level directory of the distribution or, alternatively, at
  10. # <http://www.OpenLDAP.org/license.html>.
  11. #
  12. # OpenLDAP is a registered trademark of the OpenLDAP Foundation.
  13. #
  14. # Individual files and/or contributed packages may be copyright by
  15. # other parties and/or subject to additional restrictions.
  16. #
  17. # This work also contains materials derived from public sources.
  18. #
  19. # Additional information about OpenLDAP can be obtained at
  20. # <http://www.openldap.org/>.
  21. #
  22. """
  23. Basic tools for working with LMDB.
  24. copy: Consistent high speed backup an environment.
  25. %prog copy -e source.lmdb target.lmdb
  26. copyfd: Consistent high speed backup an environment to stdout.
  27. %prog copyfd -e source.lmdb > target.lmdb/data.mdb
  28. drop: Delete one or more sub-databases.
  29. %prog drop db1
  30. dump: Dump one or more databases to disk in 'cdbmake' format.
  31. Usage: dump [db1=file1.cdbmake db2=file2.cdbmake]
  32. If no databases are given, dumps the main database to 'main.cdbmake'.
  33. edit: Add/delete/replace values from a database.
  34. %prog edit --set key=value --set-file key=/path \\
  35. --add key=value --add-file key=/path/to/file \\
  36. --delete key
  37. get: Read one or more values from a database.
  38. %prog get [<key1> [<keyN> [..]]]
  39. readers: Display readers in the lock table
  40. %prog readers -e /path/to/db [-c]
  41. If -c is specified, clear stale readers.
  42. restore: Read one or more database from disk in 'cdbmake' format.
  43. %prog restore db1=file1.cdbmake db2=file2.cdbmake
  44. The special db name ":main:" may be used to indicate the main DB.
  45. rewrite: Re-create an environment using MDB_APPEND
  46. %prog rewrite -e src.lmdb -E dst.lmdb [<db1> [<dbN> ..]]
  47. If no databases are given, rewrites only the main database.
  48. shell: Open interactive console with ENV set to the open environment.
  49. stat: Print environment statistics.
  50. warm: Read environment into page cache sequentially.
  51. watch: Show live environment statistics
  52. """
  53. from __future__ import absolute_import
  54. from __future__ import with_statement
  55. import array
  56. import collections
  57. import csv
  58. import functools
  59. import optparse
  60. import os
  61. import pprint
  62. import signal
  63. import string
  64. import struct
  65. import sys
  66. import time
  67. # Python3.x bikeshedded trechery.
  68. try:
  69. from io import BytesIO as StringIO
  70. except ImportError:
  71. try:
  72. from cStringIO import StringIO # type: ignore
  73. except ImportError:
  74. from StringIO import StringIO # type: ignore
  75. import lmdb
  76. BUF_SIZE = 10485760
  77. ENV = None
  78. DB = None
  79. # How strings get encoded to and decoded from DB
  80. ENCODING = 'utf-8'
  81. def _to_bytes(s):
  82. """Given either a Python 2.x or 3.x str, return either a str (Python 2.x)
  83. or a bytes instance (Python 3.x)."""
  84. return globals().get('unicode', str)(s).encode(ENCODING)
  85. def isprint(c):
  86. """Return ``True`` if the character `c` can be printed visibly and without
  87. adversely affecting printing position (e.g. newline)."""
  88. return c in string.printable and ord(c) > 16
  89. def xxd(s):
  90. """Return a vaguely /usr/bin/xxd formatted representation of the bytestring
  91. `s`."""
  92. sio = StringIO()
  93. pr = _to_bytes('')
  94. for idx, ch in enumerate(s):
  95. if sys.version_info[0] >= 3:
  96. ch = chr(ch)
  97. if not (idx % 16):
  98. if idx:
  99. sio.write(_to_bytes(' '))
  100. sio.write(pr)
  101. sio.write(_to_bytes('\n'))
  102. sio.write(_to_bytes('%07x:' % idx))
  103. pr = _to_bytes('')
  104. if not (idx % 2):
  105. sio.write(_to_bytes(' '))
  106. sio.write(_to_bytes('%02x' % (ord(ch),)))
  107. pr += _to_bytes(ch) if isprint(ch) else _to_bytes('.')
  108. if idx % 16:
  109. need = 15 - (idx % 16)
  110. # fill remainder of last line.
  111. sio.write(_to_bytes(' ') * need)
  112. sio.write(_to_bytes(' ') * (need // 2))
  113. sio.write(_to_bytes(' '))
  114. sio.write(pr)
  115. sio.write(_to_bytes('\n'))
  116. return sio.getvalue().decode(ENCODING)
  117. def make_parser():
  118. parser = optparse.OptionParser()
  119. parser.prog = 'python -mlmdb'
  120. parser.usage = '%prog [options] <command>\n' + __doc__.rstrip()
  121. parser.add_option('-e', '--env', help='Environment file to open')
  122. parser.add_option('-d', '--db', help='Database to open (default: main)')
  123. parser.add_option('-r', '--read', help='Open environment read-only')
  124. parser.add_option('-S', '--map_size', type='int', default='10',
  125. help='Map size in megabytes (default: 10)')
  126. parser.add_option('-s', '--use-single-file', action='store_true',
  127. help='The database was created as a single file and not a subdirectory')
  128. # FIXME: implement --all
  129. # parser.add_option('-a', '--all', action='store_true',
  130. # help='Make "dump" dump all databases')
  131. parser.add_option('-E', '--target_env',
  132. help='Target environment file for "dumpfd"')
  133. parser.add_option('-x', '--xxd', action='store_true',
  134. help='Print values in xxd format')
  135. parser.add_option('-M', '--max-dbs', type='int', default=128,
  136. help='Maximum open DBs (default: 128)')
  137. parser.add_option('--out-fd', type='int', default=1,
  138. help='"copyfd" command target fd')
  139. group = parser.add_option_group('Options for "copy" command')
  140. group.add_option('--compact', action='store_true', default=False,
  141. help='Perform compaction while copying.')
  142. group = parser.add_option_group('Options for "edit" command')
  143. group.add_option('--set', action='append',
  144. help='List of key=value pairs to set.')
  145. group.add_option('--set-file', action='append',
  146. help='List of key pairs to read from files.')
  147. group.add_option('--add', action='append',
  148. help='List of key=value pairs to add.')
  149. group.add_option('--add-file', action='append',
  150. help='List of key pairs to read from files.')
  151. group.add_option('--delete', action='append',
  152. help='List of key=value pairs to delete.')
  153. group = parser.add_option_group('Options for "readers" command')
  154. group.add_option('-c', '--clean', action='store_true',
  155. help='Clean stale readers? (default: no)')
  156. group = parser.add_option_group('Options for "watch" command')
  157. group.add_option('--csv', action='store_true',
  158. help='Generate CSV instead of terminal output.')
  159. group.add_option('--interval', type='int', default=1,
  160. help='Interval size (default: 1sec)')
  161. group.add_option('--window', type='int', default=10,
  162. help='Average window size (default: 10)')
  163. return parser
  164. def die(fmt, *args):
  165. if args:
  166. fmt %= args
  167. sys.stderr.write('lmdb.tool: %s\n' % (fmt,))
  168. raise SystemExit(1)
  169. def dump_cursor_to_fp(cursor, fp):
  170. for key, value in cursor:
  171. fp.write(_to_bytes('+%d,%d:' % (len(key), len(value))))
  172. fp.write(key)
  173. fp.write(_to_bytes('->'))
  174. fp.write(value)
  175. fp.write(_to_bytes('\n'))
  176. fp.write(_to_bytes('\n'))
  177. def db_map_from_args(args):
  178. db_map = {}
  179. for arg in args:
  180. dbname, sep, path = arg.partition('=')
  181. if not sep:
  182. die('DB specification missing "=": %r', arg)
  183. if dbname == ':main:':
  184. dbname = None
  185. if dbname in db_map:
  186. die('DB specified twice: %r', arg)
  187. db_map[dbname] = (ENV.open_db(_to_bytes(dbname) if dbname else None), path)
  188. if not db_map:
  189. db_map[':main:'] = (ENV.open_db(None), 'main.cdbmake')
  190. return db_map
  191. def cmd_copy(opts, args):
  192. if len(args) != 1:
  193. die('Please specify output directory (see --help)')
  194. output_dir = args[0]
  195. if os.path.exists(output_dir):
  196. die('Output directory %r already exists.', output_dir)
  197. os.makedirs(output_dir, int('0755', 8))
  198. print('Running copy to %r....' % (output_dir,))
  199. ENV.copy(output_dir, compact=opts.compact)
  200. def cmd_copyfd(opts, args):
  201. if args:
  202. die('"copyfd" command takes no arguments (see --help)')
  203. try:
  204. os.fdopen(opts.out_fd, 'w', 0)
  205. except OSError:
  206. e = sys.exc_info()[1]
  207. die('Bad --out-fd %d: %s', opts.out_fd, e)
  208. ENV.copyfd(opts.out_fd)
  209. def cmd_dump(opts, args):
  210. db_map = db_map_from_args(args)
  211. with ENV.begin(buffers=True) as txn:
  212. for dbname, (db, path) in db_map.items():
  213. with open(path, 'wb', BUF_SIZE) as fp:
  214. print('Dumping to %r...' % (path,))
  215. cursor = txn.cursor(db=db)
  216. dump_cursor_to_fp(cursor, fp)
  217. def restore_cursor_from_fp(txn, fp, db):
  218. read = fp.read
  219. read1 = functools.partial(read, 1)
  220. read_until = lambda sep: b''.join(iter(read1, sep)) # NOQA: E731
  221. rec_nr = 0
  222. while True:
  223. rec_nr += 1
  224. plus = read(1)
  225. if plus == b'\n':
  226. break
  227. elif plus != b'+':
  228. die('bad or missing plus, line/record #%d', rec_nr)
  229. try:
  230. klen = int(read_until(b','), 10)
  231. dlen = int(read_until(b':'), 10)
  232. except ValueError:
  233. die('bad or missing length, line/record #%d', rec_nr)
  234. key = read(klen)
  235. if read(2) != b'->':
  236. die('bad or missing separator, line/record #%d', rec_nr)
  237. data = read(dlen)
  238. if (len(key) + len(data)) != (klen + dlen):
  239. die('short key or data, line/record #%d', rec_nr)
  240. if read(1) != b'\n':
  241. die('bad line ending, line/record #%d', rec_nr)
  242. txn.put(key, data, db=db)
  243. return rec_nr
  244. def cmd_drop(opts, args):
  245. if not args:
  246. die('Must specify at least one sub-database (see --help)')
  247. dbs = map(ENV.open_db, (map(_to_bytes, args)))
  248. for idx, db in enumerate(dbs):
  249. name = args[idx]
  250. if name == ':main:':
  251. die('Cannot drop main DB')
  252. print('Dropping DB %r...' % (name,))
  253. with ENV.begin(write=True) as txn:
  254. txn.drop(db)
  255. def cmd_readers(opts, args):
  256. if opts.clean:
  257. print('Cleaned %d stale entries.' % (ENV.reader_check(),))
  258. print(ENV.readers())
  259. def cmd_restore(opts, args):
  260. db_map = db_map_from_args(args)
  261. with ENV.begin(buffers=True, write=True) as txn:
  262. for dbname, (db, path) in db_map.items():
  263. with open(path, 'rb', BUF_SIZE) as fp:
  264. print('Restoring from %r...' % (path,))
  265. count = restore_cursor_from_fp(txn, fp, db)
  266. print('Loaded %d keys from %r' % (count, path))
  267. def delta(hst):
  268. return [(hst[i] - hst[i - 1]) for i in range(1, len(hst))]
  269. SYS_BLOCK = '/sys/block'
  270. def _find_diskstat(path):
  271. if not os.path.exists(SYS_BLOCK):
  272. return
  273. st = os.stat(path)
  274. devs = '%s:%s' % (st.st_dev >> 8, st.st_dev & 0xff)
  275. def maybe(rootpath):
  276. dpath = os.path.join(rootpath, 'dev')
  277. if os.path.exists(dpath):
  278. with open(dpath) as fp:
  279. if fp.read().strip() == devs:
  280. return os.path.join(rootpath, 'stat')
  281. for name in os.listdir(SYS_BLOCK):
  282. basepath = os.path.join(SYS_BLOCK, name)
  283. statpath = maybe(basepath)
  284. if statpath:
  285. return statpath
  286. for name in os.listdir(basepath):
  287. base2path = os.path.join(basepath, name)
  288. statpath = maybe(base2path)
  289. if statpath:
  290. return statpath
  291. class DiskStatter(object):
  292. FIELDS = (
  293. 'reads',
  294. 'reads_merged',
  295. 'sectors_read',
  296. 'read_ms',
  297. 'writes',
  298. 'writes_merged',
  299. 'sectors_written',
  300. 'write_ms',
  301. 'io_count',
  302. 'io_ms',
  303. 'total_ms'
  304. )
  305. def __init__(self, path):
  306. self.fp = open(path)
  307. self.refresh()
  308. def refresh(self):
  309. self.fp.seek(0)
  310. vars(self).update((self.FIELDS[i], int(s))
  311. for i, s in enumerate(self.fp.read().split()))
  312. def cmd_watch(opts, args):
  313. info = None
  314. stat = None
  315. def window(func):
  316. history = collections.deque()
  317. def windowfunc():
  318. history.append(func())
  319. if len(history) > opts.window:
  320. history.popleft()
  321. if len(history) <= 1:
  322. return 0
  323. n = sum(delta(history)) / float(len(history) - 1)
  324. return n / opts.interval
  325. return windowfunc
  326. envmb = lambda: (info['last_pgno'] * stat['psize']) / 1048576. # NOQA
  327. cols = [
  328. ('%d', 'Depth', lambda: stat['depth']),
  329. ('%d', 'Branch', lambda: stat['branch_pages']),
  330. ('%d', 'Leaf', lambda: stat['leaf_pages']),
  331. ('%+d', 'Leaf/s', window(lambda: stat['leaf_pages'])),
  332. ('%d', 'Oflow', lambda: stat['overflow_pages']),
  333. ('%+d', 'Oflow/s', window(lambda: stat['overflow_pages'])),
  334. ('%d', 'Recs', lambda: stat['entries']),
  335. ('%+d', 'Recs/s', window(lambda: stat['entries'])),
  336. ('%d', 'Rdrs', lambda: info['num_readers']),
  337. ('%.2f', 'EnvMb', envmb),
  338. ('%+.2f', 'EnvMb/s', window(envmb)),
  339. ('%d', 'Txs', lambda: info['last_txnid']),
  340. ('%+.2f', 'Txs/s', window(lambda: info['last_txnid']))
  341. ]
  342. statter = None
  343. statpath = _find_diskstat(ENV.path())
  344. if statpath:
  345. statter = DiskStatter(statpath)
  346. cols += [
  347. ('%+d', 'SctRd/s', window(lambda: statter.sectors_read)),
  348. ('%+d', 'SctWr/s', window(lambda: statter.sectors_written)),
  349. ]
  350. term_width = 0
  351. widths = [len(head) for _, head, _ in cols]
  352. if opts.csv:
  353. writer = csv.writer(sys.stdout, quoting=csv.QUOTE_ALL)
  354. writer.writerow([head for _, head, _ in cols])
  355. cnt = 0
  356. try:
  357. while True:
  358. stat = ENV.stat()
  359. info = ENV.info()
  360. if statter:
  361. statter.refresh()
  362. vals = []
  363. for i, (fmt, head, func) in enumerate(cols):
  364. val = fmt % func()
  365. vals.append(val)
  366. widths[i] = max(widths[i], len(val))
  367. if opts.csv:
  368. writer.writerow(vals)
  369. else:
  370. if term_width != _TERM_WIDTH or not (cnt % (_TERM_HEIGHT - 2)):
  371. for i, (fmt, head, func) in enumerate(cols):
  372. sys.stdout.write(head.rjust(widths[i] + 1))
  373. sys.stdout.write('\n')
  374. term_width = _TERM_WIDTH
  375. for i, val in enumerate(vals):
  376. sys.stdout.write(val.rjust(widths[i] + 1))
  377. sys.stdout.write('\n')
  378. time.sleep(opts.interval)
  379. cnt += 1
  380. except KeyboardInterrupt:
  381. pass
  382. def cmd_warm(opts, args):
  383. stat = ENV.stat()
  384. info = ENV.info()
  385. bufsize = 32768
  386. last_offset = stat['psize'] * info['last_pgno']
  387. buf = array.array('B', _to_bytes('\x00' * bufsize))
  388. t0 = time.time()
  389. if opts.use_single_file:
  390. fp = open(opts.env, 'rb', bufsize)
  391. else:
  392. fp = open(opts.env + '/data.mdb', 'rb', bufsize)
  393. while fp.tell() < last_offset:
  394. fp.readinto(buf)
  395. print('Warmed %.2fmb in %dms' %
  396. (last_offset / 1048576., 1000 * (time.time() - t0)))
  397. def cmd_rewrite(opts, args):
  398. if not opts.target_env:
  399. die('Must specify target environment path with -E')
  400. src_info = ENV.info()
  401. target_env = lmdb.open(opts.target_env,
  402. map_size=src_info['map_size'] * 2,
  403. max_dbs=opts.max_dbs, sync=False,
  404. writemap=True, map_async=True,
  405. metasync=False)
  406. dbs = []
  407. for arg in args:
  408. name = None if arg == ':main:' else arg
  409. src_db = ENV.open_db(_to_bytes(name))
  410. dst_db = target_env.open_db(_to_bytes(name))
  411. dbs.append((arg, src_db, dst_db))
  412. if not dbs:
  413. dbs.append((':main:', ENV.open_db(None), target_env.open_db(None)))
  414. for name, src_db, dst_db in dbs:
  415. print('Writing %r...' % (name,))
  416. with target_env.begin(db=dst_db, write=True) as wtxn:
  417. with ENV.begin(db=src_db, buffers=True) as rtxn:
  418. for key, value in rtxn.cursor():
  419. wtxn.put(key, value, append=True)
  420. print('Syncing..')
  421. target_env.sync(True)
  422. def cmd_get(opts, args):
  423. print_header = len(args) > 1
  424. with ENV.begin(buffers=True, db=DB) as txn:
  425. for arg in args:
  426. value = txn.get(_to_bytes(arg))
  427. if value is None:
  428. print('%r: missing' % (arg,))
  429. continue
  430. if print_header:
  431. print('%r:' % (arg,))
  432. if opts.xxd:
  433. print(xxd(value))
  434. else:
  435. print(bytes(value))
  436. def cmd_edit(opts, args):
  437. if args:
  438. die('Edit command only takes options, not arguments (see --help)')
  439. with ENV.begin(write=True) as txn:
  440. cursor = txn.cursor(db=DB)
  441. for elem in opts.add or []:
  442. key, _, value = _to_bytes(elem).partition(_to_bytes('='))
  443. cursor.put(key, value, overwrite=False)
  444. for elem in opts.set or []:
  445. key, _, value = _to_bytes(elem).partition(_to_bytes('='))
  446. cursor.put(key, value)
  447. for key in opts.delete or []:
  448. txn.delete(_to_bytes(key), db=DB)
  449. for elem in opts.add_file or []:
  450. key, _, path = _to_bytes(elem).partition(_to_bytes('='))
  451. with open(path, 'rb') as fp:
  452. cursor.put(key, fp.read(), overwrite=False)
  453. for elem in opts.set_file or []:
  454. key, _, path = _to_bytes(elem).partition(_to_bytes('='))
  455. with open(path, 'rb') as fp:
  456. cursor.put(key, fp.read())
  457. def cmd_shell(opts, args):
  458. import code
  459. import readline # NOQA
  460. code.InteractiveConsole(globals()).interact()
  461. def cmd_stat(opts, args):
  462. pprint.pprint(ENV.stat())
  463. pprint.pprint(ENV.info())
  464. def _get_term_width(default=(80, 25)):
  465. try:
  466. import fcntl # No fcntl on win32
  467. import termios # No termios on win32
  468. s = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, '1234')
  469. height, width = struct.unpack('hh', s)
  470. return width, height
  471. except Exception:
  472. return default
  473. def _on_sigwinch(*args):
  474. global _TERM_WIDTH, _TERM_HEIGHT
  475. _TERM_WIDTH, _TERM_HEIGHT = _get_term_width()
  476. def main(argv=None):
  477. parser = make_parser()
  478. opts, args = parser.parse_args(argv)
  479. if not args:
  480. die('Please specify a command (see --help)')
  481. if not opts.env:
  482. die('Please specify environment (--env)')
  483. global ENV
  484. ENV = lmdb.open(opts.env, map_size=opts.map_size * 1048576, subdir=not opts.use_single_file,
  485. max_dbs=opts.max_dbs, create=False, readonly=opts.read == 'READ')
  486. if opts.db:
  487. global DB
  488. DB = ENV.open_db(_to_bytes(opts.db))
  489. if hasattr(signal, 'SIGWINCH'): # Disable on win32.
  490. signal.signal(signal.SIGWINCH, _on_sigwinch)
  491. _on_sigwinch()
  492. func = globals().get('cmd_' + args[0])
  493. if not func:
  494. die('No such command: %r' % (args[0],))
  495. func(opts, args[1:])
  496. if __name__ == '__main__':
  497. main(sys.argv[1:])