Commit d9c98689 authored by morion Cham's avatar morion Cham
Browse files
parents b87de387 5f518764
Loading
Loading
Loading
Loading

.gitmodules

0 → 100644
+6 −0
Original line number Diff line number Diff line
[submodule "public/data/0.pdfs/a-complete-collection-of-bronzes-unearthed-in-china"]
	path = public/data/0.pdfs/a-complete-collection-of-bronzes-unearthed-in-china
	url = https://mirrors.sustech.edu.cn/git/12011404/a-complete-collection-of-bronzes-unearthed-in-china.git
[submodule "users/ycm/图中图提取/labelGo-Yolov5AutoLabelImg"]
	path = users/ycm/图中图提取/labelGo-Yolov5AutoLabelImg
	url = https://github.com/cnyvfang/labelGo-Yolov5AutoLabelImg.git
Original line number Diff line number Diff line
Subproject commit 97a66ae2e1a650c1493c0b97c98e99ca3191a381
+236 −0
Original line number Diff line number Diff line
import json
import math
from typing import Tuple
import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image

plt.style.use("default")


def cv_imread(filePath):
    # cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),-1)
    cv_img = Image.open(filePath)
    cv_img = np.array(cv_img)  # 将对象 cv_img 转化为RGB像素值矩阵
    ## imdecode读取的是rgb,如果后续需要opencv处理的话,需要转换成bgr,转换后图片颜色会变化
    cv_img = cv2.cvtColor(cv_img, cv2.COLOR_RGB2BGR)
    return cv_img

def cv_imwrite(filename, src, format=".png"):
    cv2.imencode(format ,src)[1].tofile(filename)


def cv_imshow(img):
    # plt.style.use("default")
    # plt.axis('off')
    # plt.imshow(img[:, :, ::-1])
    # # plt.imshow(img[:,:,::1])
    tmp = Image.fromarray(img[:, :, ::-1])
    return tmp


def cv_imshow_single_channel(img):
    # plt.style.use("default")
    # plt.axis('off')
    # plt.imshow(img, cmap='gray')
    tmp = Image.fromarray(img).convert('L')
    return tmp

def cv_rect2slice(rect:Tuple[int, int, int, int]):
    x, y, w, h = rect
    return (y, y+h, x, x+w)
def slice2xyxy(slice:Tuple[int, int, int, int]):
    # python 的 numpy array,横着升高的是列,竖着向下的是行。而CV中是https://roboflow.com/formats/yolov5-pytorch-txt
    y1, y2, x1, x2 = slice
    return (x1, y1, x2, y2)

def xyxy2yolo_xywh(image_size:Tuple[int, int], box:Tuple[int, int, int, int]):
    # size是图片的大小 (width, height)
    # 比如, 
    # box = 'xmin': 1580.0, 'ymin': 758.667, 'xmax': 1638.6667, 'ymax': 818.6667
    # converts to yolo format
    # 拆解
    global_width, global_height = image_size
    x_min, y_min, x_max, y_max = box
    # 计算
    center_x = (x_min + x_max)/2.0  
    center_y = (y_min + y_max)/2.0  
    box_width = x_max - x_min  # xmax-xmin
    box_height = y_max - y_min  # ymax-ymin
    # 中心化
    center_x /= global_width
    box_width /= global_width
    center_y /= global_height
    box_height /= global_height
    return (center_x , center_y, box_width, box_height)

def yolo_xywh2cv_rect(image_size:Tuple[int, int], yolo_box:Tuple[float, float, float, float]):
    # 拆解
    global_width, global_height = image_size
    center_x , center_y, box_width, box_height = yolo_box
    # 计算
    center_x, box_width = center_x*global_width, box_width*global_width
    center_y, box_height = center_y*global_height, box_height*global_height
    x = center_x-box_width/2
    y = center_y-box_height/2
    process = lambda x, limit: min(max(int(round(x)), 0), limit)
    return (process(x, global_width), process(y, global_height), 
            process(box_width, global_width), process(box_height, global_height))
    


from deep_translator import (GoogleTranslator,
                             ChatGptTranslator,
                             MicrosoftTranslator,
                             PonsTranslator,
                             LingueeTranslator,
                             MyMemoryTranslator,
                             YandexTranslator,
                             PapagoTranslator,
                             DeeplTranslator,
                             QcriTranslator,
                             single_detection,
                             batch_detection)

# Use any translator you like, in this example GoogleTranslator
translator = MyMemoryTranslator(source='zh-CN', target='en-US')
def utf82gbk(s):
    # return s.encode("gbk").decode(encoding='gbk', errors="ignore")
    # return s.encode("gbk").decode(encoding='utf-8', errors="ignore")
    return translator.translate(s)  
    # return unicode(s)
    
    
def dump_json_file(obj, file_path):
    json.dump(obj, open(file_path, 'w'))
    
def load_json_file(file_path):
    return json.load(open(file_path, 'r'))


# https://blog.csdn.net/yb389973788/article/details/108646721


keycode_dict = {'0': 7, '1': 8, '2': 9, '3': 10, '4': 11, '5': 12, '6': 13, '7': 14, '8': 15, '9': 16,
           'A': 29, 'B': 30, 'C': 31, 'D': 32, 'E': 33, 'F': 34, 'G': 35, 'H': 36, 'I': 37, 'J': 38,
           'K': 39, 'L': 40, 'M': 41, 'N': 42, 'O': 43, 'P': 44, 'Q': 45, 'R': 46, 'S': 47, 'T': 48,
           'U': 49, 'V': 50, 'W': 51, 'X': 52, 'Y': 53, 'Z': 54,
           'a': 29, 'b': 30, 'c': 31, 'd': 32, 'e': 33, 'f': 34, 'g': 35, 'h': 36, 'i': 37, 'j': 38,
           'k': 39, 'l': 40, 'm': 41, 'n': 42, 'o': 43, 'p': 44, 'q': 45, 'r': 46, 's': 47, 't': 48,
           'u': 49, 'v': 50, 'w': 51, 'x': 52, 'y': 53, 'z': 54,
           'META_ALT_LEFT_ON': 16,
           'META_ALT_MASK': 50,
           'META_ALT_ON': 2,
           'META_ALT_RIGHT_ON': 32,
           'META_CAPS_LOCK_ON': 1048576,
           'META_CTRL_LEFT_ON': 8192,
           'META_CTRL_MASK': 28672,
           'META_CTRL_ON': 4096,
           'META_CTRL_RIGHT_ON': 16384,
           'META_FUNCTION_ON': 8,
           'META_META_LEFT_ON': 131072,
           'META_META_MASK': 458752,
           'META_META_ON': 65536,
           'META_META_RIGHT_ON': 262144,
           'META_NUM_LOCK_ON': 2097152,
           'META_SCROLL_LOCK_ON': 4194304,
           'META_SHIFT_LEFT_ON': 64,
           'META_SHIFT_MASK': 193,
           'META_SHIFT_ON': 1,
           'META_SHIFT_RIGHT_ON': 128,
           'META_SYM_ON': 4,
           'KEYCODE_APOSTROPHE': 75,
           'KEYCODE_AT': 77,
           'KEYCODE_BACKSLASH': 73,
           'KEYCODE_COMMA': 55,
           'KEYCODE_EQUALS': 70,
           'KEYCODE_GRAVE': 68,
           'KEYCODE_LEFT_BRACKET': 71,
           'KEYCODE_MINUS': 69,
           'KEYCODE_PERIOD': 56,
           'KEYCODE_PLUS': 81,
           'KEYCODE_POUND': 18,
           'KEYCODE_RIGHT_BRACKET': 72,
           'KEYCODE_SEMICOLON': 74,
           'KEYCODE_SLASH': 76,
           'KEYCODE_STAR': 17,
           'KEYCODE_SPACE': 62,
           'KEYCODE_TAB': 61,
           'KEYCODE_ENTER': 66,
           'KEYCODE_ESCAPE': 111,
           'KEYCODE_CAPS_LOCK': 115,
           'KEYCODE_CLEAR': 28,
           'KEYCODE_PAGE_DOWN': 93,
           'KEYCODE_PAGE_UP': 92,
           'KEYCODE_SCROLL_LOCK': 116,
           'KEYCODE_MOVE_END': 123,
           'KEYCODE_MOVE_HOME': 122,
           'KEYCODE_INSERT': 124,
           'KEYCODE_SHIFT_LEFT': 59,
           'KEYCODE_SHIFT_RIGHT': 60,
           'KEYCODE_F1': 131,
           'KEYCODE_F2': 132,
           'KEYCODE_F3': 133,
           'KEYCODE_F4': 134,
           'KEYCODE_F5': 135,
           'KEYCODE_F6': 136,
           'KEYCODE_F7': 137,
           'KEYCODE_F8': 138,
           'KEYCODE_F9': 139,
           'KEYCODE_F10': 140,
           'KEYCODE_F11': 141,
           'KEYCODE_F12': 142,
           'KEYCODE_BACK': 4,
           'KEYCODE_CALL': 5,
           'KEYCODE_ENDCALL': 6,
           'KEYCODE_CAMERA': 27,
           'KEYCODE_FOCUS': 80,
           'KEYCODE_VOLUME_UP': 24,
           'KEYCODE_VOLUME_DOWN': 25,
           'KEYCODE_VOLUME_MUTE': 164,
           'KEYCODE_MENU': 82,
           'KEYCODE_HOME': 3,
           'KEYCODE_POWER': 26,
           'KEYCODE_SEARCH': 84,
           'KEYCODE_NOTIFICATION': 83,
           'KEYCODE_NUM': 78,
           'KEYCODE_SYM': 63,
           'KEYCODE_SETTINGS': 176,
           'KEYCODE_DEL': 67,
           'KEYCODE_FORWARD_DEL': 112,
           'KEYCODE_NUMPAD_0': 144,
           'KEYCODE_NUMPAD_1': 145,
           'KEYCODE_NUMPAD_2': 146,
           'KEYCODE_NUMPAD_3': 147,
           'KEYCODE_NUMPAD_4': 148,
           'KEYCODE_NUMPAD_5': 149,
           'KEYCODE_NUMPAD_6': 150,
           'KEYCODE_NUMPAD_7': 151,
           'KEYCODE_NUMPAD_8': 152,
           'KEYCODE_NUMPAD_9': 153,
           'KEYCODE_NUMPAD_ADD': 157,
           'KEYCODE_NUMPAD_COMMA': 159,
           'KEYCODE_NUMPAD_DIVIDE': 154,
           'KEYCODE_NUMPAD_DOT': 158,
           'KEYCODE_NUMPAD_EQUALS': 161,
           'KEYCODE_NUMPAD_LEFT_PAREN': 162,
           'KEYCODE_NUMPAD_MULTIPLY': 155,
           'KEYCODE_NUMPAD_RIGHT_PAREN': 163,
           'KEYCODE_NUMPAD_SUBTRACT': 156,
           'KEYCODE_NUMPAD_ENTER': 160,
           'KEYCODE_NUM_LOCK': 143,
           'KEYCODE_MEDIA_FAST_FORWARD': 90,
           'KEYCODE_MEDIA_NEXT': 87,
           'KEYCODE_MEDIA_PAUSE': 127,
           'KEYCODE_MEDIA_PLAY': 126,
           'KEYCODE_MEDIA_PLAY_PAUSE': 85,
           'KEYCODE_MEDIA_PREVIOUS': 88,
           'KEYCODE_MEDIA_RECORD': 130,
           'KEYCODE_MEDIA_REWIND': 89,
           'KEYCODE_MEDIA_STOP': 86,
           }
def get_keycode(key_name):
    
    return keycode_dict[key_name]
 No newline at end of file

labelGo-Yolov5AutoLabelImg @ b873a7d5

Original line number Diff line number Diff line
Subproject commit b873a7d5482d152ded714120fcc7e34e7a038fb7
+259 −0
Original line number Diff line number Diff line
# 自动标注程序文档
# 操作方法
# 1. 按下ad案件切换当前工作图片。或者滑动滑条也可以。
# 2. 按j或者k:自动跳过已经标注好的图片,寻找下一个没有标注过的图片。
# 3. 按enter保存标注结果。
# 4. 按s保存参数。下次启动程序会直接加载这次保存的参数。每次保存会覆盖上一次的参数。
# 5. 按q退出程序。
# 可以调节的参数介绍:见下面 var_dict 的定义

from typing import List, Tuple
from commons import *
from pathlib import Path
import json

debug = False

this_file = Path(__file__).resolve()
this_directory = this_file.parent
project_directory = this_directory.parent.parent.parent

# 1. 定义标注的目标

classes = 342 # 请修改,是本数据集中文物的标号的范围,从1开始到classes。 

data_folder = project_directory / "public/data"
# data_folder = "/data/dataset"
dataset_name = "ChineseUnearthedBronzes-01-Beijing_Tianjin-InnerMongolia"
input_folder = data_folder / '1.pages' / dataset_name
pages = len(list(input_folder.glob("*.PNG")))
output_folder = data_folder / '3.pngs' / dataset_name
if not output_folder.exists():
    output_folder.mkdir(parents=True, exist_ok=True)
img_path = lambda number: input_folder / f'{number}页-{number}.PNG'
# img = cv_imread(img_path)   
# cv_imshow(img)

classes_txt_path = output_folder/'classes.txt'
# if not classes_txt_path.exists():
with open(classes_txt_path, 'w') as f:
    f.write("any\n")
    for i in range(1, classes+1):
        f.write(f"{i}\n")

# 2. 处理函数
parameter_path = this_directory/'标注器保存的参数.json'
max_area_possible = 2900*4000
percents = 1000
if parameter_path.exists():
    var_dict = load_json_file(parameter_path)
else:
    var_dict = dict(
        # 可视化而已
        number=dict(range=(1, pages), value=1, comment="这是当前处理的图片编号。") # 图片编号
        ,processing_step=dict(range=(0, 3), value=3, comment="0表示显示原图,1表示显示黑白处理图,2表示轮廓图,3表示目标框选图。") # 显示标注框
        # 需要调的参数
        ,blur_ksize=dict(range=(0, 10), value=0, comment="0表示不进行平滑,1-10表示滤波的范围大小。")
        ,black_threshold=dict(range=(0, 255), value=240, comment="这是用于二值化图片为黑白图片的阈值,大于这个亮度值的像素会变成黑色,其他地方是白色。") # 黑白阈值
        ,min_valid_area=dict(range=(0, percents), value=int(12679/max_area_possible*percents), comment=f"这是想要检测的内图最小可以接受的面积。(单位是{percents}分数)") # 最小合法面积
        ,max_valid_area=dict(range=(0, percents), value=int(11579852/max_area_possible*percents), comment=f"这是想要检测的内图最大可以接受的面积。(单位是{percents}分数)") # 最大合法面积
        ,max_length_width_ratio=dict(range=(0, percents), value=percents, comment=f"这是想要检测的内图最大的长宽比。(单位是{percents}分比,取到{percents}/{percents}时最大长宽比是10。)") # 最大长宽比

    )
    
def process(img, visual_steps):
    visual_res = img
    def call_back(new_img):
        nonlocal visual_steps, visual_res
        visual_steps-=1
        if visual_steps>=0:
            visual_res = new_img
    # 1.
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    not_white = cv2.threshold(gray, var_dict['black_threshold']['value'], 255, cv2.THRESH_BINARY)[1]   
    blur_ksize = 2*var_dict['blur_ksize']['value']+1
    if blur_ksize>1:
        not_white = cv2.medianBlur(not_white, ksize=blur_ksize)
    call_back(not_white)
    # 2. 
    contours, hierarchy = cv2.findContours(not_white,
                                       mode=cv2.RETR_CCOMP,  # 两层结构
                                       # mode=cv2.RETR_TREE, # 完整的轮廓树结构
                                       #    mode=cv2.RETR_EXTERNAL, # 只返回最外边的轮廓, 不嵌套
                                       method=cv2.CHAIN_APPROX_SIMPLE)  # 不保存轮廓中水平、垂直、对角的线段,只保存轮廓的角点
    valid_contours = list(filter(lambda contour:
                    # ((lambda area: 4000 < area < 2900 * 4000)(cv2.contourArea(contour))) and
                    ((lambda area: var_dict['min_valid_area']['value']/percents*max_area_possible <
                      area < var_dict['max_valid_area']['value']/percents*max_area_possible )(cv2.contourArea(contour))) and
                    contour.size >= 6
                    , contours))
    new_img = cv2.drawContours(img.copy(), contours=valid_contours, contourIdx=-1, color=(255, 0, 0), thickness=10)
    call_back(new_img)
    # 3. 
    rects = list(map(cv2.boundingRect, valid_contours))
    valid_rects = list(filter(lambda rect:
                    1<=(max(rect[2:])/min(rect[2:]))<=(var_dict['max_length_width_ratio']['value']/percents*10)
        , rects))
    if debug:
        for rect in valid_rects:
            print((max(rect[2:])/min(rect[2:])))
    new_img = img.copy()
    for rect in valid_rects:
        new_img = cv2.rectangle(new_img, rect, color=(0, 255, 0), thickness=10)
    call_back(new_img)
    
    # return
    return visual_res, valid_rects


        
# 3. 创建GUI
window_name = utf82gbk("内图提取器")  # Extractor
# window_name = u"内图提取器"  # 没用
window_size = (2900 // 5, (4000+800) // 5)




def make_yolo(txt_path:str, array_shape:Tuple[int, int, int], cv_rects:List[Tuple[int, int, int, int]], label_type=0):
    # 保存YOLO风格标注框
    # 格式参考说明:https://roboflow.com/formats https://roboflow.com/formats/yolov8-pytorch-txt
    # yolo v8: class_id center_x center_y width height
    res = list(map(lambda rect: 
        " ".join([str(label_type)]+
                 [str(f) for f in 
                  xyxy2yolo_xywh((array_shape[1], array_shape[0]), slice2xyxy(cv_rect2slice(rect)))]), 
                   cv_rects))
    with open(txt_path, 'w') as f:
        f.write("\n".join(res)+"\n")

yolo_txt_path = lambda number: output_folder / f'{number}页-{number}.txt'

inner_png_path = lambda number, subimage_num: output_folder / f"{number}页-{number}-内图{subimage_num}.png"

def is_annotated(number:int):
    return yolo_txt_path(number).exists()
def remove_annotated(number:int):
    # 删除旧的标注结果
    # path = f"{number}-*.png"
    path = inner_png_path(number, "*").relative_to(output_folder).as_posix()
    for f in output_folder.glob(path):
        f.unlink()
    yolo_txt_path(number).unlink()
    
def make_sub_image_from_yolo(txt_path:str, number:int, image:np.ndarray):
    pass
    
    
current_img, valid_rects = None, None
def save_rois(image:np.ndarray, rects:List[Tuple[int, int, int, int]]):
    current_num = int(var_dict['number']['value'])
    yolo_path = yolo_txt_path(current_num)
    if is_annotated(current_num):
        print(f'警告:{current_num}号图片有已知标注成果,正在使用新的标注结果覆盖。')
        remove_annotated(current_num)
        
    # 阅读上一页的标注类别,找到上一轮的最大编号,本轮默认是上一轮的编号+1,除非上一轮是0
    new_label = 0
    if current_num != 1:
        with open(yolo_txt_path(current_num-1)) as f:
            lines = f.readlines()
            for line in lines:
                ss = line.strip().split()
                if len(ss)==5:
                    new_label = max(new_label, int(ss[0]))
    new_label = new_label+1 if new_label!=0 else new_label
        
    make_yolo(yolo_path.as_posix(), image.shape, rects, new_label)
    
        
    # 保存小图片    
    for i, rect in enumerate(rects):
        y1, y2, x1, x2 = cv_rect2slice(rect)
        roi = image[y1:y2, x1:x2]
        # cv2.imwrite((output_folder/f"{current_num}-{i}.png").as_posix(), roi)
        cv_imwrite(inner_png_path(current_num, i).as_posix(), roi)
    
        
    print(f"enter: 确认标注,已保存标注结果到 {yolo_path}")

import time
last_prompt_time = time.time()
first_time_update = len(var_dict.keys())-1
def update(x):
        global current_img, valid_rects, first_time_update, last_prompt_time
        if first_time_update>0:
            first_time_update -=1
            print(f"加载程序中, 剩余{first_time_update}s")
            return
    # try:
        for k, v in var_dict.items():
            new_value = cv2.getTrackbarPos(k, window_name)
            if v['value']!=new_value:
                v['value'] = new_value
                if time.time()-last_prompt_time>3:
                    last_prompt_time = time.time()
                    print(f"调参:您正在编辑参数‘{k}"+ (f"{v.get('comment', '')}"if 'comment' in v else ""))
        current_img = cv_imread(img_path(var_dict['number']['value']))
        visual_res, valid_rects = process(current_img, var_dict['processing_step']['value'])    
        
        cv2.imshow(window_name, visual_res)
    # except Exception as e:
    #     if first_time_update>0:
    #         first_time_update -=1
    #     else:
    #         raise e
    


def init():
    global current_img, visual_res, valid_rects
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(window_name, *window_size)
    
    for k, v in var_dict.items():
        cv2.createTrackbar(k, window_name, v['value'], v['range'][1], update)
        cv2.setTrackbarMin(k, window_name, v['range'][0])
        cv2.setTrackbarPos(k, window_name, v['value'])
        
    current_img = cv_imread(img_path(var_dict['number']['value']))
    visual_res, valid_rects = process(current_img, var_dict['processing_step']['value'])    
    cv2.imshow(window_name, visual_res)
    
init()

# 3. 启动标注循环

    
    
    
while (True):
    if cv2.getWindowProperty(window_name, cv2.WND_PROP_VISIBLE) < 1:  # 当窗口关闭时为-1,显示时为0
        break
    key = cv2.waitKey(10) % 255
    if key == ord('s'):
        print(f"当前参数已保存到 {parameter_path}")
        dump_json_file(var_dict, parameter_path)
    #https://blog.csdn.net/listener51/article/details/89353247
    # elif key==get_keycode('KEYCODE_ENTER'):
    # https://blog.csdn.net/mystonelxj/article/details/88184829
    elif key==13: # enter
        save_rois(current_img, valid_rects)
    elif key==38 or key==37 or key==ord("a"): # ↑ ←
        print("up: 上一张图片。")
        cv2.setTrackbarPos('number', window_name, max(1, var_dict['number']['value']-1))
    elif key==40 or key==39 or key==ord("d"): # 
        print("down: 下一张图片。")
        cv2.setTrackbarPos('number', window_name, min(pages, var_dict['number']['value']+1))
    elif key==ord('j') or key==ord('k'):
        new_page = -1
        for new_page in range(var_dict['number']['value']+1, pages+1):
            if not is_annotated(new_page):
                break
        cv2.setTrackbarPos('number', window_name, new_page)
    elif key == ord('q'):
        break
print("q: 退出。")
cv2.destroyAllWindows()
exit()
# cv2.destroyAllWindows()
Loading