| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- # Extracted from https://github.com/pfmoore/pkg_metadata
- from email.header import Header, decode_header, make_header
- from email.message import Message
- from typing import Any, Dict, List, Union, cast
- METADATA_FIELDS = [
- # Name, Multiple-Use
- ("Metadata-Version", False),
- ("Name", False),
- ("Version", False),
- ("Dynamic", True),
- ("Platform", True),
- ("Supported-Platform", True),
- ("Summary", False),
- ("Description", False),
- ("Description-Content-Type", False),
- ("Keywords", False),
- ("Home-page", False),
- ("Download-URL", False),
- ("Author", False),
- ("Author-email", False),
- ("Maintainer", False),
- ("Maintainer-email", False),
- ("License", False),
- ("License-Expression", False),
- ("License-File", True),
- ("Classifier", True),
- ("Requires-Dist", True),
- ("Requires-Python", False),
- ("Requires-External", True),
- ("Project-URL", True),
- ("Provides-Extra", True),
- ("Provides-Dist", True),
- ("Obsoletes-Dist", True),
- ]
- def json_name(field: str) -> str:
- return field.lower().replace("-", "_")
- def msg_to_json(msg: Message) -> Dict[str, Any]:
- """Convert a Message object into a JSON-compatible dictionary."""
- def sanitise_header(h: Union[Header, str]) -> str:
- if isinstance(h, Header):
- chunks = []
- for bytes, encoding in decode_header(h):
- if encoding == "unknown-8bit":
- try:
- # See if UTF-8 works
- bytes.decode("utf-8")
- encoding = "utf-8"
- except UnicodeDecodeError:
- # If not, latin1 at least won't fail
- encoding = "latin1"
- chunks.append((bytes, encoding))
- return str(make_header(chunks))
- return str(h)
- result = {}
- for field, multi in METADATA_FIELDS:
- if field not in msg:
- continue
- key = json_name(field)
- if multi:
- value: Union[str, List[str]] = [
- sanitise_header(v) for v in msg.get_all(field) # type: ignore
- ]
- else:
- value = sanitise_header(msg.get(field)) # type: ignore
- if key == "keywords":
- # Accept both comma-separated and space-separated
- # forms, for better compatibility with old data.
- if "," in value:
- value = [v.strip() for v in value.split(",")]
- else:
- value = value.split()
- result[key] = value
- payload = cast(str, msg.get_payload())
- if payload:
- result["description"] = payload
- return result
|