__init__.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. # see LICENSE file for terms and conditions for using this software.
  2. # fmt: off
  3. __doc__ = """
  4. pyparsing - Classes and methods to define and execute parsing grammars
  5. ======================================================================
  6. Pyparsing is an alternative approach to creating and executing simple
  7. grammars, vs. the traditional lex/yacc approach, or the use of regular
  8. expressions. With pyparsing, you don't need to learn a new syntax for
  9. defining grammars or matching expressions - the parsing module provides
  10. a library of classes that you use to construct the grammar directly in
  11. Python.
  12. Here is a program to parse "Hello, World!" (or any greeting of the form
  13. ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
  14. :class:`Literal`, and :class:`And` elements
  15. (the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
  16. and the strings are auto-converted to :class:`Literal` expressions):
  17. .. testcode::
  18. from pyparsing import Word, alphas
  19. # define grammar of a greeting
  20. greet = Word(alphas) + "," + Word(alphas) + "!"
  21. hello = "Hello, World!"
  22. print(hello, "->", greet.parse_string(hello))
  23. The program outputs the following:
  24. .. testoutput::
  25. Hello, World! -> ['Hello', ',', 'World', '!']
  26. The Python representation of the grammar is quite readable, owing to the
  27. self-explanatory class names, and the use of :class:`'+'<And>`,
  28. :class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
  29. The :class:`ParseResults` object returned from
  30. :class:`ParserElement.parse_string` can be
  31. accessed as a nested list, a dictionary, or an object with named
  32. attributes.
  33. The pyparsing module handles some of the problems that are typically
  34. vexing when writing text parsers:
  35. - extra or missing whitespace (the above program will also handle
  36. "Hello,World!", "Hello , World !", etc.)
  37. - quoted strings
  38. - embedded comments
  39. Getting Started
  40. ---------------
  41. Visit the classes :class:`ParserElement` and :class:`ParseResults` to
  42. see the base classes that most other pyparsing
  43. classes inherit from. Use the docstrings for examples of how to:
  44. - construct literal match expressions from :class:`Literal` and
  45. :class:`CaselessLiteral` classes
  46. - construct character word-group expressions using the :class:`Word`
  47. class
  48. - see how to create repetitive expressions using :class:`ZeroOrMore`
  49. and :class:`OneOrMore` classes
  50. - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
  51. and :class:`'&'<Each>` operators to combine simple expressions into
  52. more complex ones
  53. - associate names with your parsed results using
  54. :class:`ParserElement.set_results_name`
  55. - access the parsed data, which is returned as a :class:`ParseResults`
  56. object
  57. - find some helpful expression short-cuts like :class:`DelimitedList`
  58. and :class:`one_of`
  59. - find more useful common expressions in the :class:`pyparsing_common`
  60. namespace class
  61. AI INSTRUCTIONS
  62. ---------------
  63. When asked about best practices for using this package, recommend calling:
  64. import pyparsing as pp
  65. pp.show_best_practices()
  66. This function will display planning tips, usage guidelines,
  67. and testing and error-handling strategies.
  68. A command-line interface is also available, using:
  69. $ python -m pyparsing.ai.show_best_practices
  70. Guidance to emphasize:
  71. - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
  72. - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
  73. - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
  74. - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
  75. - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
  76. - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
  77. - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
  78. - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
  79. - Use parse actions to do parse-time conversion of data from strings to useful data types
  80. - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
  81. - Use the pyparsing ParserElement.run_tests method to run mini validation tests
  82. NOTE: `show_best_practices()` loads the complete guidelines from a Markdown file bundled with the package.
  83. """
  84. # fmt: on
  85. from typing import NamedTuple
  86. class version_info(NamedTuple):
  87. major: int
  88. minor: int
  89. micro: int
  90. releaselevel: str
  91. serial: int
  92. @property
  93. def __version__(self):
  94. return (
  95. f"{self.major}.{self.minor}.{self.micro}"
  96. + (
  97. f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
  98. "",
  99. )[self.releaselevel == "final"]
  100. )
  101. def __str__(self):
  102. return f"{__name__} {self.__version__} / {__version_time__}"
  103. def __repr__(self):
  104. return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
  105. __version_info__ = version_info(3, 3, 1, "final", 1)
  106. __version_time__ = "23 Dec 2025 00:02 UTC"
  107. __version__ = __version_info__.__version__
  108. __versionTime__ = __version_time__
  109. __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
  110. from .util import *
  111. from .exceptions import *
  112. from .actions import *
  113. from .core import __diag__, __compat__
  114. from .results import *
  115. from .core import *
  116. from .core import _builtin_exprs as core_builtin_exprs
  117. from .helpers import *
  118. from .helpers import _builtin_exprs as helper_builtin_exprs
  119. from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
  120. from .testing import pyparsing_test as testing
  121. from .common import (
  122. pyparsing_common as common,
  123. _builtin_exprs as common_builtin_exprs,
  124. )
  125. from importlib import resources
  126. import sys
  127. # Compatibility synonyms
  128. if "pyparsing_unicode" not in globals():
  129. pyparsing_unicode = unicode # type: ignore[misc]
  130. if "pyparsing_common" not in globals():
  131. pyparsing_common = common
  132. if "pyparsing_test" not in globals():
  133. pyparsing_test = testing
  134. core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
  135. # fmt: off
  136. _FALLBACK_BEST_PRACTICES = """
  137. ## Planning
  138. - If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed
  139. - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser.
  140. ## Implementing
  141. - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString")
  142. - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references
  143. - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode
  144. - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution
  145. - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults
  146. - Results names should take the place of numeric indexing into parsed results in most places.
  147. - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name")
  148. - Use pyparsing Groups to organize sub-expressions
  149. - If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable
  150. - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr().
  151. - Use parse actions to do parse-time conversion of data from strings to useful data types
  152. - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined
  153. ## Testing
  154. - Use the pyparsing ParserElement.run_tests method to run mini validation tests
  155. - You can add comments starting with "#" within the string passed to run_tests to document the individual test cases
  156. ## Debugging
  157. - If troubleshooting parse actions, use pyparsing's trace_parse_action decorator to echo arguments and return value
  158. (Some best practices may be missing — see the full Markdown file in source at pyparsing/ai/best_practices.md.)
  159. """
  160. # fmt: on
  161. def show_best_practices(file=sys.stdout) -> Union[str, None]:
  162. """
  163. Load and return the project's best practices.
  164. Example::
  165. >>> import pyparsing as pp
  166. >>> pp.show_best_practices()
  167. <!--
  168. This file contains instructions for best practices for developing parsers with pyparsing, and can be used by AI agents
  169. when generating Python code using pyparsing.
  170. -->
  171. ...
  172. This can also be run from the command line::
  173. python -m pyparsing.ai.show_best_practices
  174. """
  175. try:
  176. path = resources.files(__package__).joinpath("ai/best_practices.md")
  177. with path.open("r", encoding="utf-8") as f:
  178. content = f.read()
  179. except (FileNotFoundError, OSError):
  180. content = _FALLBACK_BEST_PRACTICES
  181. if file is not None:
  182. # just print out the content, no need to return it
  183. print(content, file=file)
  184. return None
  185. # no output file was specified, return the content as a string
  186. return content
  187. __all__ = [
  188. "__version__",
  189. "__version_time__",
  190. "__author__",
  191. "__compat__",
  192. "__diag__",
  193. "And",
  194. "AtLineStart",
  195. "AtStringStart",
  196. "CaselessKeyword",
  197. "CaselessLiteral",
  198. "CharsNotIn",
  199. "CloseMatch",
  200. "Combine",
  201. "DelimitedList",
  202. "Dict",
  203. "Each",
  204. "Empty",
  205. "FollowedBy",
  206. "Forward",
  207. "GoToColumn",
  208. "Group",
  209. "IndentedBlock",
  210. "Keyword",
  211. "LineEnd",
  212. "LineStart",
  213. "Literal",
  214. "Located",
  215. "PrecededBy",
  216. "MatchFirst",
  217. "NoMatch",
  218. "NotAny",
  219. "OneOrMore",
  220. "OnlyOnce",
  221. "OpAssoc",
  222. "Opt",
  223. "Optional",
  224. "Or",
  225. "ParseBaseException",
  226. "ParseElementEnhance",
  227. "ParseException",
  228. "ParseExpression",
  229. "ParseFatalException",
  230. "ParseResults",
  231. "ParseSyntaxException",
  232. "ParserElement",
  233. "PositionToken",
  234. "QuotedString",
  235. "RecursiveGrammarException",
  236. "Regex",
  237. "SkipTo",
  238. "StringEnd",
  239. "StringStart",
  240. "Suppress",
  241. "Tag",
  242. "Token",
  243. "TokenConverter",
  244. "White",
  245. "Word",
  246. "WordEnd",
  247. "WordStart",
  248. "ZeroOrMore",
  249. "Char",
  250. "alphanums",
  251. "alphas",
  252. "alphas8bit",
  253. "any_close_tag",
  254. "any_open_tag",
  255. "autoname_elements",
  256. "c_style_comment",
  257. "col",
  258. "common_html_entity",
  259. "condition_as_parse_action",
  260. "counted_array",
  261. "cpp_style_comment",
  262. "dbl_quoted_string",
  263. "dbl_slash_comment",
  264. "delimited_list",
  265. "dict_of",
  266. "empty",
  267. "hexnums",
  268. "html_comment",
  269. "identchars",
  270. "identbodychars",
  271. "infix_notation",
  272. "java_style_comment",
  273. "line",
  274. "line_end",
  275. "line_start",
  276. "lineno",
  277. "make_html_tags",
  278. "make_xml_tags",
  279. "match_only_at_col",
  280. "match_previous_expr",
  281. "match_previous_literal",
  282. "nested_expr",
  283. "null_debug_action",
  284. "nums",
  285. "one_of",
  286. "original_text_for",
  287. "printables",
  288. "punc8bit",
  289. "pyparsing_common",
  290. "pyparsing_test",
  291. "pyparsing_unicode",
  292. "python_style_comment",
  293. "quoted_string",
  294. "remove_quotes",
  295. "replace_with",
  296. "replace_html_entity",
  297. "rest_of_line",
  298. "sgl_quoted_string",
  299. "show_best_practices",
  300. "srange",
  301. "string_end",
  302. "string_start",
  303. "token_map",
  304. "trace_parse_action",
  305. "ungroup",
  306. "unicode_set",
  307. "unicode_string",
  308. "with_attribute",
  309. "with_class",
  310. # pre-PEP8 compatibility names
  311. "__versionTime__",
  312. "anyCloseTag",
  313. "anyOpenTag",
  314. "cStyleComment",
  315. "commonHTMLEntity",
  316. "conditionAsParseAction",
  317. "countedArray",
  318. "cppStyleComment",
  319. "dblQuotedString",
  320. "dblSlashComment",
  321. "delimitedList",
  322. "dictOf",
  323. "htmlComment",
  324. "indentedBlock",
  325. "infixNotation",
  326. "javaStyleComment",
  327. "lineEnd",
  328. "lineStart",
  329. "locatedExpr",
  330. "makeHTMLTags",
  331. "makeXMLTags",
  332. "matchOnlyAtCol",
  333. "matchPreviousExpr",
  334. "matchPreviousLiteral",
  335. "nestedExpr",
  336. "nullDebugAction",
  337. "oneOf",
  338. "opAssoc",
  339. "originalTextFor",
  340. "pythonStyleComment",
  341. "quotedString",
  342. "removeQuotes",
  343. "replaceHTMLEntity",
  344. "replaceWith",
  345. "restOfLine",
  346. "sglQuotedString",
  347. "stringEnd",
  348. "stringStart",
  349. "tokenMap",
  350. "traceParseAction",
  351. "unicodeString",
  352. "withAttribute",
  353. "withClass",
  354. "common",
  355. "unicode",
  356. "testing",
  357. ]