string-util.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #ifndef MUPDF_FITZ_STRING_H
  23. #define MUPDF_FITZ_STRING_H
  24. #include "mupdf/fitz/system.h"
  25. #include "mupdf/fitz/context.h"
  26. /* The Unicode character used to incoming character whose value is
  27. * unknown or unrepresentable. */
  28. #define FZ_REPLACEMENT_CHARACTER 0xFFFD
  29. /**
  30. Safe string functions
  31. */
  32. /**
  33. Return strlen(s), if that is less than maxlen, or maxlen if
  34. there is no null byte ('\0') among the first maxlen bytes.
  35. */
  36. size_t fz_strnlen(const char *s, size_t maxlen);
  37. /**
  38. Given a pointer to a C string (or a pointer to NULL) break
  39. it at the first occurrence of a delimiter char (from a given
  40. set).
  41. stringp: Pointer to a C string pointer (or NULL). Updated on
  42. exit to point to the first char of the string after the
  43. delimiter that was found. The string pointed to by stringp will
  44. be corrupted by this call (as the found delimiter will be
  45. overwritten by 0).
  46. delim: A C string of acceptable delimiter characters.
  47. Returns a pointer to a C string containing the chars of stringp
  48. up to the first delimiter char (or the end of the string), or
  49. NULL.
  50. */
  51. char *fz_strsep(char **stringp, const char *delim);
  52. /**
  53. Copy at most n-1 chars of a string into a destination
  54. buffer with null termination, returning the real length of the
  55. initial string (excluding terminator).
  56. dst: Destination buffer, at least n bytes long.
  57. src: C string (non-NULL).
  58. n: Size of dst buffer in bytes.
  59. Returns the length (excluding terminator) of src.
  60. */
  61. size_t fz_strlcpy(char *dst, const char *src, size_t n);
  62. /**
  63. Concatenate 2 strings, with a maximum length.
  64. dst: pointer to first string in a buffer of n bytes.
  65. src: pointer to string to concatenate.
  66. n: Size (in bytes) of buffer that dst is in.
  67. Returns the real length that a concatenated dst + src would have
  68. been (not including terminator).
  69. */
  70. size_t fz_strlcat(char *dst, const char *src, size_t n);
  71. /**
  72. Safe strstr function.
  73. haystack: Where to look (may be NULL).
  74. needled: What to look for.
  75. Returns NULL if unmatched, or pointer to start of match.
  76. */
  77. const char *fz_strstr(const char *haystack, const char *needle);
  78. /**
  79. Safe case-insensitive strstr function. (Accepts UTF-8).
  80. haystack: Where to look (may be NULL).
  81. needled: What to look for.
  82. Returns NULL if unmatched, or pointer to start of match.
  83. */
  84. const char *fz_strstrcase(const char *haystack, const char *needle);
  85. /**
  86. Find the start of the first occurrence of the substring needle in haystack.
  87. */
  88. void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
  89. /**
  90. extract the directory component from a path.
  91. */
  92. void fz_dirname(char *dir, const char *path, size_t dirsize);
  93. /**
  94. Find the filename component in a path.
  95. */
  96. const char *fz_basename(const char *path);
  97. /**
  98. portable strverscmp(3) function
  99. */
  100. int fz_strverscmp(const char *s1, const char *s2);
  101. /**
  102. Like fz_decode_uri_component but in-place.
  103. */
  104. char *fz_urldecode(char *url);
  105. /**
  106. * Return a new string representing the unencoded version of the given URI.
  107. * This decodes all escape sequences except those that would result in a reserved
  108. * character that are part of the URI syntax (; / ? : @ & = + $ , #).
  109. */
  110. char *fz_decode_uri(fz_context *ctx, const char *s);
  111. /**
  112. * Return a new string representing the unencoded version of the given URI component.
  113. * This decodes all escape sequences!
  114. */
  115. char *fz_decode_uri_component(fz_context *ctx, const char *s);
  116. /**
  117. * Return a new string representing the provided string encoded as a URI.
  118. */
  119. char *fz_encode_uri(fz_context *ctx, const char *s);
  120. /**
  121. * Return a new string representing the provided string encoded as an URI component.
  122. * This also encodes the special reserved characters (; / ? : @ & = + $ , #).
  123. */
  124. char *fz_encode_uri_component(fz_context *ctx, const char *s);
  125. /**
  126. * Return a new string representing the provided string encoded as an URI path name.
  127. * This also encodes the special reserved characters except /.
  128. */
  129. char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
  130. /**
  131. create output file name using a template.
  132. If the path contains %[0-9]*d, the first such pattern will be
  133. replaced with the page number. If the template does not contain
  134. such a pattern, the page number will be inserted before the
  135. filename extension. If the template does not have a filename
  136. extension, the page number will be added to the end.
  137. */
  138. void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);
  139. /**
  140. rewrite path to the shortest string that names the same path.
  141. Eliminates multiple and trailing slashes, interprets "." and
  142. "..". Overwrites the string in place.
  143. */
  144. char *fz_cleanname(char *name);
  145. /**
  146. rewrite path to the shortest string that names the same path.
  147. Eliminates multiple and trailing slashes, interprets "." and
  148. "..". Allocates a new string that the caller must free.
  149. */
  150. char *fz_cleanname_strdup(fz_context *ctx, const char *name);
  151. /**
  152. Resolve a path to an absolute file name.
  153. The resolved path buffer must be of at least PATH_MAX size.
  154. */
  155. char *fz_realpath(const char *path, char *resolved_path);
  156. /**
  157. Case insensitive (UTF8) string comparison.
  158. */
  159. int fz_strcasecmp(const char *a, const char *b);
  160. /**
  161. Case insensitive (UTF8) string comparison.
  162. n = maximum number of bytes to read from either a or b.
  163. */
  164. int fz_strncasecmp(const char *a, const char *b, size_t n);
  165. /**
  166. FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
  167. length returned by fz_chartorune).
  168. */
  169. enum { FZ_UTFMAX = 4 };
  170. /**
  171. UTF8 decode a single rune from a sequence of chars.
  172. rune: Pointer to an int to assign the decoded 'rune' to.
  173. (0xFFFD on error).
  174. str: Pointer to a UTF8 encoded string.
  175. Returns the number of bytes consumed.
  176. */
  177. int fz_chartorune(int *rune, const char *str);
  178. /**
  179. UTF8 decode a single rune from a sequence of chars
  180. of given length.
  181. rune: Pointer to an int to assign the decoded 'rune' to.
  182. (0xFFFD on error).
  183. str: Pointer to a UTF8 encoded string.
  184. n: The number of bytes available at str.
  185. Returns the number of bytes consumed.
  186. */
  187. int fz_chartorunen(int *rune, const char *str, size_t n);
  188. /**
  189. UTF8 encode a rune to a sequence of chars.
  190. str: Pointer to a place to put the UTF8 encoded character.
  191. rune: Pointer to a 'rune'.
  192. Returns the number of bytes the rune took to output.
  193. */
  194. int fz_runetochar(char *str, int rune);
  195. /**
  196. Count how many chars are required to represent a rune.
  197. rune: The rune to encode.
  198. Returns the number of bytes required to represent this run in
  199. UTF8.
  200. */
  201. int fz_runelen(int rune);
  202. /**
  203. Compute the index of a rune in a string.
  204. str: Pointer to beginning of a string.
  205. p: Pointer to a char in str.
  206. Returns the index of the rune pointed to by p in str.
  207. */
  208. int fz_runeidx(const char *str, const char *p);
  209. /**
  210. Obtain a pointer to the char representing the rune
  211. at a given index.
  212. str: Pointer to beginning of a string.
  213. idx: Index of a rune to return a char pointer to.
  214. Returns a pointer to the char where the desired rune starts,
  215. or NULL if the string ends before the index is reached.
  216. */
  217. const char *fz_runeptr(const char *str, int idx);
  218. /**
  219. Count how many runes the UTF-8 encoded string
  220. consists of.
  221. s: The UTF-8 encoded, NUL-terminated text string.
  222. Returns the number of runes in the string.
  223. */
  224. int fz_utflen(const char *s);
  225. /*
  226. Convert a wchar string into a new heap allocated utf8 one.
  227. */
  228. char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
  229. /*
  230. Convert a utf8 string into a new heap allocated wchar one.
  231. */
  232. wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
  233. /**
  234. Locale-independent decimal to binary conversion. On overflow
  235. return (-)INFINITY and set errno to ERANGE. On underflow return
  236. 0 and set errno to ERANGE. Special inputs (case insensitive):
  237. "NAN", "INF" or "INFINITY".
  238. */
  239. float fz_strtof(const char *s, char **es);
  240. int fz_grisu(float f, char *s, int *exp);
  241. /**
  242. Check and parse string into page ranges:
  243. /,?(-?\d+|N)(-(-?\d+|N))?/
  244. */
  245. int fz_is_page_range(fz_context *ctx, const char *s);
  246. const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);
  247. /**
  248. Unicode aware tolower and toupper functions.
  249. */
  250. int fz_tolower(int c);
  251. int fz_toupper(int c);
  252. #endif