| 123456789101112131415161718192021222324252627282930 |
- # -*- coding: utf-8 -*-
- import sys
- from pathlib import Path
- dict_path = Path(__file__).parent / 'OnnxOCR-main' / 'OnnxOCR-main' / 'onnxocr' / 'models' / 'ppocrv5' / 'ppocrv5_dict.txt'
- with open(dict_path, 'r', encoding='utf-8') as f:
- content = f.read()
- # 检查日文字符
- hiragana = [c for c in content if '\u3040' <= c <= '\u309F'] # 平假名
- katakana = [c for c in content if '\u30A0' <= c <= '\u30FF'] # 片假名
- kanji = [c for c in content if '\u4E00' <= c <= '\u9FAF'] # 汉字
- print(f'字典总字符数: {len(content)}')
- print(f'平假名数量: {len(hiragana)}')
- print(f'片假名数量: {len(katakana)}')
- print(f'汉字数量: {len(kanji)}')
- if hiragana:
- print(f'平假名示例: {hiragana[:20]}')
- else:
- print('平假名: 无')
- if katakana:
- print(f'片假名示例: {katakana[:20]}')
- else:
- print('片假名: 无')
- print(f'汉字示例: {kanji[:20]}')
|