# -*- coding: utf-8 -*- import sys from pathlib import Path dict_path = Path(__file__).parent / 'OnnxOCR-main' / 'OnnxOCR-main' / 'onnxocr' / 'models' / 'ppocrv5' / 'ppocrv5_dict.txt' with open(dict_path, 'r', encoding='utf-8') as f: content = f.read() # 检查日文字符 hiragana = [c for c in content if '\u3040' <= c <= '\u309F'] # 平假名 katakana = [c for c in content if '\u30A0' <= c <= '\u30FF'] # 片假名 kanji = [c for c in content if '\u4E00' <= c <= '\u9FAF'] # 汉字 print(f'字典总字符数: {len(content)}') print(f'平假名数量: {len(hiragana)}') print(f'片假名数量: {len(katakana)}') print(f'汉字数量: {len(kanji)}') if hiragana: print(f'平假名示例: {hiragana[:20]}') else: print('平假名: 无') if katakana: print(f'片假名示例: {katakana[:20]}') else: print('片假名: 无') print(f'汉字示例: {kanji[:20]}')