gen_label.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import argparse
  16. import json
  17. def gen_rec_label(input_path, out_label):
  18. with open(out_label, "w") as out_file:
  19. with open(input_path, "r") as f:
  20. for line in f.readlines():
  21. tmp = line.strip("\n").replace(" ", "").split(",")
  22. img_path, label = tmp[0], tmp[1]
  23. label = label.replace('"', "")
  24. out_file.write(img_path + "\t" + label + "\n")
  25. def gen_det_label(root_path, input_dir, out_label):
  26. with open(out_label, "w") as out_file:
  27. for label_file in os.listdir(input_dir):
  28. img_path = os.path.join(root_path, label_file[3:-4] + ".jpg")
  29. label = []
  30. with open(
  31. os.path.join(input_dir, label_file), "r", encoding="utf-8-sig"
  32. ) as f:
  33. for line in f.readlines():
  34. tmp = line.strip("\n\r").replace("\xef\xbb\xbf", "").split(",")
  35. points = tmp[:8]
  36. s = []
  37. for i in range(0, len(points), 2):
  38. b = points[i : i + 2]
  39. b = [int(t) for t in b]
  40. s.append(b)
  41. result = {"transcription": tmp[8], "points": s}
  42. label.append(result)
  43. out_file.write(
  44. img_path + "\t" + json.dumps(label, ensure_ascii=False) + "\n"
  45. )
  46. if __name__ == "__main__":
  47. parser = argparse.ArgumentParser()
  48. parser.add_argument(
  49. "--mode",
  50. type=str,
  51. default="rec",
  52. help="Generate rec_label or det_label, can be set rec or det",
  53. )
  54. parser.add_argument(
  55. "--root_path",
  56. type=str,
  57. default=".",
  58. help="The root directory of images.Only takes effect when mode=det ",
  59. )
  60. parser.add_argument(
  61. "--input_path",
  62. type=str,
  63. default=".",
  64. help="Input_label or input path to be converted",
  65. )
  66. parser.add_argument(
  67. "--output_label", type=str, default="out_label.txt", help="Output file name"
  68. )
  69. args = parser.parse_args()
  70. if args.mode == "rec":
  71. print("Generate rec label")
  72. gen_rec_label(args.input_path, args.output_label)
  73. elif args.mode == "det":
  74. gen_det_label(args.root_path, args.input_path, args.output_label)