_json.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # Extracted from https://github.com/pfmoore/pkg_metadata
  2. from email.header import Header, decode_header, make_header
  3. from email.message import Message
  4. from typing import Any, Dict, List, Union, cast
  5. METADATA_FIELDS = [
  6. # Name, Multiple-Use
  7. ("Metadata-Version", False),
  8. ("Name", False),
  9. ("Version", False),
  10. ("Dynamic", True),
  11. ("Platform", True),
  12. ("Supported-Platform", True),
  13. ("Summary", False),
  14. ("Description", False),
  15. ("Description-Content-Type", False),
  16. ("Keywords", False),
  17. ("Home-page", False),
  18. ("Download-URL", False),
  19. ("Author", False),
  20. ("Author-email", False),
  21. ("Maintainer", False),
  22. ("Maintainer-email", False),
  23. ("License", False),
  24. ("License-Expression", False),
  25. ("License-File", True),
  26. ("Classifier", True),
  27. ("Requires-Dist", True),
  28. ("Requires-Python", False),
  29. ("Requires-External", True),
  30. ("Project-URL", True),
  31. ("Provides-Extra", True),
  32. ("Provides-Dist", True),
  33. ("Obsoletes-Dist", True),
  34. ]
  35. def json_name(field: str) -> str:
  36. return field.lower().replace("-", "_")
  37. def msg_to_json(msg: Message) -> Dict[str, Any]:
  38. """Convert a Message object into a JSON-compatible dictionary."""
  39. def sanitise_header(h: Union[Header, str]) -> str:
  40. if isinstance(h, Header):
  41. chunks = []
  42. for bytes, encoding in decode_header(h):
  43. if encoding == "unknown-8bit":
  44. try:
  45. # See if UTF-8 works
  46. bytes.decode("utf-8")
  47. encoding = "utf-8"
  48. except UnicodeDecodeError:
  49. # If not, latin1 at least won't fail
  50. encoding = "latin1"
  51. chunks.append((bytes, encoding))
  52. return str(make_header(chunks))
  53. return str(h)
  54. result = {}
  55. for field, multi in METADATA_FIELDS:
  56. if field not in msg:
  57. continue
  58. key = json_name(field)
  59. if multi:
  60. value: Union[str, List[str]] = [
  61. sanitise_header(v) for v in msg.get_all(field) # type: ignore
  62. ]
  63. else:
  64. value = sanitise_header(msg.get(field)) # type: ignore
  65. if key == "keywords":
  66. # Accept both comma-separated and space-separated
  67. # forms, for better compatibility with old data.
  68. if "," in value:
  69. value = [v.strip() for v in value.split(",")]
  70. else:
  71. value = value.split()
  72. result[key] = value
  73. payload = cast(str, msg.get_payload())
  74. if payload:
  75. result["description"] = payload
  76. return result