| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- # -*- coding: utf-8 -*-
- # @Time : 2019/8/23 21:54
- # @Author : zhoujun
- import pathlib
- import os
- import cv2
- import numpy as np
- import scipy.io as sio
- from tqdm.auto import tqdm
- from base import BaseDataSet
- from utils import order_points_clockwise, get_datalist, load, expand_polygon
- class ICDAR2015Dataset(BaseDataSet):
- def __init__(
- self,
- data_path: str,
- img_mode,
- pre_processes,
- filter_keys,
- ignore_tags,
- transform=None,
- **kwargs,
- ):
- super().__init__(
- data_path, img_mode, pre_processes, filter_keys, ignore_tags, transform
- )
- def load_data(self, data_path: str) -> list:
- data_list = get_datalist(data_path)
- t_data_list = []
- for img_path, label_path in data_list:
- data = self._get_annotation(label_path)
- if len(data["text_polys"]) > 0:
- item = {"img_path": img_path, "img_name": pathlib.Path(img_path).stem}
- item.update(data)
- t_data_list.append(item)
- else:
- print("there is no suit bbox in {}".format(label_path))
- return t_data_list
- def _get_annotation(self, label_path: str) -> dict:
- boxes = []
- texts = []
- ignores = []
- with open(label_path, encoding="utf-8", mode="r") as f:
- for line in f.readlines():
- params = line.strip().strip("\ufeff").strip("\xef\xbb\xbf").split(",")
- try:
- box = order_points_clockwise(
- np.array(list(map(float, params[:8]))).reshape(-1, 2)
- )
- if cv2.contourArea(box) > 0:
- boxes.append(box)
- label = params[8]
- texts.append(label)
- ignores.append(label in self.ignore_tags)
- except:
- print("load label failed on {}".format(label_path))
- data = {
- "text_polys": np.array(boxes),
- "texts": texts,
- "ignore_tags": ignores,
- }
- return data
- class DetDataset(BaseDataSet):
- def __init__(
- self,
- data_path: str,
- img_mode,
- pre_processes,
- filter_keys,
- ignore_tags,
- transform=None,
- **kwargs,
- ):
- self.load_char_annotation = kwargs["load_char_annotation"]
- self.expand_one_char = kwargs["expand_one_char"]
- super().__init__(
- data_path, img_mode, pre_processes, filter_keys, ignore_tags, transform
- )
- def load_data(self, data_path: str) -> list:
- """
- 从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
- :param data_path:
- :return:
- """
- data_list = []
- for path in data_path:
- content = load(path)
- for gt in tqdm(content["data_list"], desc="read file {}".format(path)):
- img_path = os.path.join(content["data_root"], gt["img_name"])
- polygons = []
- texts = []
- illegibility_list = []
- language_list = []
- for annotation in gt["annotations"]:
- if len(annotation["polygon"]) == 0 or len(annotation["text"]) == 0:
- continue
- if len(annotation["text"]) > 1 and self.expand_one_char:
- annotation["polygon"] = expand_polygon(annotation["polygon"])
- polygons.append(annotation["polygon"])
- texts.append(annotation["text"])
- illegibility_list.append(annotation["illegibility"])
- language_list.append(annotation["language"])
- if self.load_char_annotation:
- for char_annotation in annotation["chars"]:
- if (
- len(char_annotation["polygon"]) == 0
- or len(char_annotation["char"]) == 0
- ):
- continue
- polygons.append(char_annotation["polygon"])
- texts.append(char_annotation["char"])
- illegibility_list.append(char_annotation["illegibility"])
- language_list.append(char_annotation["language"])
- data_list.append(
- {
- "img_path": img_path,
- "img_name": gt["img_name"],
- "text_polys": np.array(polygons),
- "texts": texts,
- "ignore_tags": illegibility_list,
- }
- )
- return data_list
- class SynthTextDataset(BaseDataSet):
- def __init__(
- self,
- data_path: str,
- img_mode,
- pre_processes,
- filter_keys,
- transform=None,
- **kwargs,
- ):
- self.transform = transform
- self.dataRoot = pathlib.Path(data_path)
- if not self.dataRoot.exists():
- raise FileNotFoundError("Dataset folder is not exist.")
- self.targetFilePath = self.dataRoot / "gt.mat"
- if not self.targetFilePath.exists():
- raise FileExistsError("Target file is not exist.")
- targets = {}
- sio.loadmat(
- self.targetFilePath,
- targets,
- squeeze_me=True,
- struct_as_record=False,
- variable_names=["imnames", "wordBB", "txt"],
- )
- self.imageNames = targets["imnames"]
- self.wordBBoxes = targets["wordBB"]
- self.transcripts = targets["txt"]
- super().__init__(data_path, img_mode, pre_processes, filter_keys, transform)
- def load_data(self, data_path: str) -> list:
- t_data_list = []
- for imageName, wordBBoxes, texts in zip(
- self.imageNames, self.wordBBoxes, self.transcripts
- ):
- item = {}
- wordBBoxes = (
- np.expand_dims(wordBBoxes, axis=2)
- if (wordBBoxes.ndim == 2)
- else wordBBoxes
- )
- _, _, numOfWords = wordBBoxes.shape
- text_polys = wordBBoxes.reshape(
- [8, numOfWords], order="F"
- ).T # num_words * 8
- text_polys = text_polys.reshape(numOfWords, 4, 2) # num_of_words * 4 * 2
- transcripts = [word for line in texts for word in line.split()]
- if numOfWords != len(transcripts):
- continue
- item["img_path"] = str(self.dataRoot / imageName)
- item["img_name"] = (self.dataRoot / imageName).stem
- item["text_polys"] = text_polys
- item["texts"] = transcripts
- item["ignore_tags"] = [x in self.ignore_tags for x in transcripts]
- t_data_list.append(item)
- return t_data_list
|