Source code for io3d.datasets

#! /usr/bin/python
# -*- coding: utf-8 -*-
"""
Module is used for visualization of segmentation stored in pkl file.
"""

import logging
import os.path
import sys

logger = logging.getLogger(__name__)
import argparse

import numpy as np
import zipfile
import glob
import os.path as op
import io3d
from . import cachefile as cachef
# if sys.version_info < (3, 0):
#     import urllib as urllibr
# else:
#     import urllib.request as urllibr


# you can get hash from command line with:
#  python imtools/sample_data.py -v sliver_training_001
local_dir="~/data/medical/orig/"
# vessels.pkl nejprve vytvoří prázný adresář s názvem vessels.pkl, pak jej při rozbalování zase smaže
__url_home = "http://home.zcu.cz/~mjirik/lisa/testdata/sample-extra-data/"
__url_server = "http://147.228.240.61/queetech/"
data_urls= {
    "head": [__url_server + "sample-data/head.zip", "89e9b60fd23257f01c4a1632ff7bb800", "matlab"] ,
    "jatra_06mm_jenjatra": [__url_server + "sample-data/jatra_06mm_jenjatra.zip", None, "jatra_06mm_jenjatra/*.dcm"],
    "jatra_5mm": [__url_server + "sample-data/jatra_5mm.zip", '1b9039ffe1ff9af9caa344341c8cec03', "jatra_5mm/*.dcm"],
    "exp": [__url_server + "sample-data/exp.zip", '74f2c10b17b6bd31bd03662df6cf884d'],
    "sliver_training_001": [__url_server + "sample-data/sliver_training_001.zip","d64235727c0adafe13d24bfb311d1ed0","liver*001.*"],
    "volumetrie": [__url_server + "sample-data/volumetrie.zip","6b2a2da67874ba526e2fe00a78dd19c9"],
    "vessels.pkl": [__url_server + "sample-data/vessels.pkl.zip","698ef2bc345bb616f8d4195048538ded"],
    "biodur_sample": [__url_server + "sample-data/biodur_sample.zip","d459dd5b308ca07d10414b3a3a9000ea"],
    "gensei_slices": [__url_server + "sample-data/gensei_slices.zip", "ef93b121add8e4a133bb086e9e6491c9"],
    "exp_small": [__url_server + "sample-data/exp_small.zip", "0526ba8ea363fe8b5227f5807b7aaca7"],
    "vincentka": [__url_server + "vincentka.zip", "a30fdabaa39c5ce032a3223ed30b88e3"],
    "vincentka_sample": [__url_server + "sample-data/vincentka_sample.zip"],
    "donut": __url_server + "sample-data/donut.zip",
    "io3d_sample_data": [__url_server + "sample-extra-data/io3d_sample_data.zip"],
    "lisa": {"package": ["donut", "vincentka_sample", "exp_small", "gensei_slices",
                         "biodur_sample", "vessels.pkl", "sliver_training_001", "jatra_5mm",
                         "head", "volumetrie"]},
    "3Dircadb1": ["http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.zip", None, None, "ircad/*[!p]/*[!pfg]"],
    "3Dircadb1.1": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.1.zip",
    "3Dircadb1.2": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.2.zip",
    "3Dircadb1.3": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.3.zip",
    "3Dircadb1.4": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.4.zip",
    "3Dircadb1.5": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.5.zip",
    "3Dircadb1.6": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.6.zip",
    "3Dircadb1.7": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.7.zip",
    "3Dircadb1.8": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.8.zip",
    "3Dircadb1.9": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.9.zip",
    "3Dircadb1.10": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.10.zip",
    "3Dircadb1.11": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.11.zip",
    "3Dircadb1.12": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.12.zip",
    "3Dircadb1.13": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.13.zip",
    "3Dircadb1.14": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.14.zip",
    "3Dircadb1.15": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.15.zip",
    "3Dircadb1.16": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.16.zip",
    "3Dircadb1.17": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.17.zip",
    "3Dircadb1.18": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.18.zip",
    "3Dircadb1.19": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.19.zip",
    "3Dircadb1.20": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.20.zip",
    
    # není nutné pole, stačí jen string
    # "exp_small": "http://147.228.240.61/queetech/sample-data/exp_small.zip",
}
# cachefile = "~/io3d_cache.yaml"

[docs]def join_path(*path_to_join): """ join input path to sample data path (usually in ~/lisa_data) :param path_to_join: one or more paths :return: """ sdp = dataset_path() pth = os.path.join(sdp, *path_to_join) logger.debug('sample_data_path' + str(sdp)) logger.debug('path ' + str(pth)) return pth
[docs]def set_dataset_path(path, cache=None, cachefile="~/io3d_cache.yaml"): if cachefile is not None: cache = cachef.CacheFile(cachefile) cache.update("local_dataset_dir", path)
[docs]def dataset_path(cache=None, cachefile="~/io3d_cache.yaml"): """ Get dataset path. :param cache: CacheFile object :param cachefile: cachefile path :return: """ local_data_dir = local_dir if cachefile is not None: cache = cachef.CacheFile(cachefile) # cache.update('local_dataset_dir', head) if cache is not None: local_data_dir = cache.get_or_save_default('local_dataset_dir', local_dir) return op.expanduser(local_data_dir)
# def get_sample_data(): # keys = imtools.sample_data.data_urls.keys() # imtools.sample_data.get_sample_data(keys, sample_data_path())
[docs]def get_dataset_meta(label): data_url = data_urls[label] if type(data_url) == str: # back compatibility data_url = [data_url] if type(data_url) == list: data_url.extend([None, None, None]) data_url = data_url[:4] url, expected_hash, hash_path, fnpattern = data_url if hash_path is None: hash_path = label if fnpattern is None and hash_path is not None: fnpattern = hash_path # elif type(data_url) == dict: return data_url, url, expected_hash, hash_path, fnpattern
def _expand_dataset_packages(dataset_label_dict): """ dataset package is multi dataset :param dataset_label_dict: :return: """ new_dataset_label_dict = [] for label in dataset_label_dict: dataset_metadata = data_urls[label] if type(dataset_metadata) == dict and "package" in dataset_metadata: new_dataset_label_dict.extend(dataset_metadata["package"]) else: new_dataset_label_dict.append(label) return new_dataset_label_dict
[docs]def download(dataset_label=None, destination_dir=None, dry_run=False): """ Download sample data by data label. Labels can be listed by sample_data.data_urls.keys() :param dataset_label: label of data. If it is set to None, all data are downloaded :param destination_dir: output dir for data :return: """ if destination_dir is None: destination_dir = dataset_path() destination_dir = op.expanduser(destination_dir) if not op.exists(destination_dir): os.makedirs(destination_dir) if dataset_label is None: dataset_label=data_urls.keys() if type(dataset_label) == str: dataset_label = [dataset_label] dataset_label = _expand_dataset_packages(dataset_label) for label in dataset_label: # make all data:url have length 3 data_url, url, expected_hash, hash_path, fnpattern = get_dataset_meta(label) if hash_path is None: hash_path = label try: computed_hash = checksum(os.path.join(destination_dir, hash_path)) except: # there is probably no checksumdir module logger.warning("problem with sample_data.checksum()") computed_hash = None logger.info("dataset '" + label + "'") logger.info("expected hash: '" + str(expected_hash) + "'") logger.info("computed hash: '" + str(computed_hash) + "'") if (computed_hash is not None) and (expected_hash == computed_hash): logger.info("match ok - no download needed") else: logger.info("downloading") if not dry_run: downzip(url, destination=destination_dir) logger.info("finished") downloaded_hash = checksum(os.path.join(destination_dir, hash_path)) logger.info("downloaded hash: '" + str(downloaded_hash) + "'") if downloaded_hash != expected_hash: logger.warning("downloaded hash is different from expected hash\n" + \ "expected hash: '" + str(expected_hash) + "'\n" + \ "downloaded hash: '" + str(downloaded_hash) + "'\n") else: logger.debug("dry run")
[docs]def get_old(dataset_label, id, destination_dir=None): """ Get the 3D data from specified dataset with specified id. Download data if necessary. :param dataset_label: :param id: integer or wildcards file pattern :param destination_dir: :return: """ # @TODO implement if destination_dir is None: destination_dir = dataset_path() destination_dir = op.expanduser(destination_dir) data_url, url, expected_hash, hash_path, fnpattern = get_dataset_meta(dataset_label) paths = glob.glob(os.path.join(destination_dir, fnpattern)) paths.sort() import fnmatch print(paths) print(id) pathsf = fnmatch.filter(paths, id) print(pathsf ) datap = io3d.read(pathsf[0], dataplus_format=True) return datap
[docs]def get(dataset_label, series_number=None, *args, **kwargs): """ :param dataset_label: label from data_urls :param series_number: Series identification in study. :param args: :param kwargs: :return: """ # relative path in the datasets relative_path_extracted_from_data_urls = "" datapath = join_path(relative_path_extracted_from_data_urls) # read 3D data from datapath datap = io3d.read(datapath, series_number=series_number, dataplus_format=True, *args, **kwargs) return datap
[docs]def checksum(path, hashfunc='md5'): """ Return checksum given by path. Wildcards can be used in check sum. Function is strongly dependent on checksumdir package by 'cakepietoast'. :param path: :param hashfunc: :return: """ import checksumdir hash_func = checksumdir.HASH_FUNCS.get(hashfunc) if not hash_func: raise NotImplementedError('{} not implemented.'.format(hashfunc)) if os.path.isdir(path): return checksumdir.dirhash(path, hashfunc=hashfunc) hashvalues = [] path_list = glob.glob(path) logger.debug("path_list " + str(path_list)) for path in path_list: if os.path.isfile(path): hashvalues.append(checksumdir._filehash(path, hashfunc=hash_func)) logger.debug(str(hashvalues)) hash = checksumdir._reduce_hash(hashvalues, hashfunc=hash_func) return hash
[docs]def generate_donut(): """ Generate donut like shape with stick inside :return: datap with keys data3d, segmentation and voxelsize_mm """ import numpy as np segmentation = np.zeros([20, 30, 40]) # generate test data segmentation[6:10, 7:24, 10:37] = 1 segmentation[6:10, 7, 10] = 0 segmentation[6:10, 23, 10] = 0 segmentation[6:10, 7, 36] = 0 segmentation[6:10, 23, 36] = 0 segmentation[2:18, 12:19, 18:28] = 2 data3d = segmentation * 100 + np.random.random(segmentation.shape) * 30 voxelsize_mm=[3,2,1] datap = { 'data3d': data3d, 'segmentation': segmentation, 'voxelsize_mm': voxelsize_mm } # io3d.write(datap, "donut.pklz") return datap
[docs]def generate_abdominal(size = 100, liver_intensity=100, noise_intensity=20, portal_vein_intensity=130, spleen_intensity=90): boundary = int(size/4) voxelsize_mm = [1.0, 1.5, 1.5] slab = { 'liver': 1, 'porta': 2, 'spleen': 17 } segmentation = np.zeros([size, size, size], dtype=np.uint8) segmentation[boundary:-boundary, boundary:-2*boundary, 2*boundary:-boundary] = 1 segmentation[:, boundary*2:boundary*2+5, boundary*2:boundary*2+5] = 2 segmentation[:, boundary*2:boundary*2+5, boundary*2:boundary*2+5] = 2 segmentation[:, -5:, -boundary:] = 17 seeds = np.zeros([size, size, size], dtype=np.uint8) seeds[ boundary + 1 : boundary + 4, boundary + 1 : boundary + 4, 2 * boundary + 1 : 2 * boundary + 4 ] = 1 noise = (np.random.random(segmentation.shape) * noise_intensity).astype(np.int) data3d = np.zeros(segmentation.shape, dtype=np.int) data3d [segmentation == 1] = liver_intensity data3d [segmentation == 2] = portal_vein_intensity data3d [segmentation == 17] = spleen_intensity data3d += noise datap = { 'data3d': data3d, 'segmentation': segmentation, 'voxelsize_mm': voxelsize_mm, 'seeds': seeds, 'slab': slab } return datap
[docs]def sliver_reader(filename_end_mask="*[0-9].mhd", sliver_reference_dir="~/data/medical/orig/sliver07/training/", read_orig=True, read_seg=False): """ Generator for reading sliver data from directory structure. :param filename_end_mask: file selection can be controlled with this parameter :param sliver_reference_dir: directory with sliver .mhd and .raw files :param read_orig: read image data if is set True :param read_seg: read segmentation data if is set True :return: numeric_label, vs_mm, oname, orig_data, rname, ref_data """ sliver_reference_dir = op.expanduser(sliver_reference_dir) orig_fnames = glob.glob(sliver_reference_dir + "*orig" + filename_end_mask) ref_fnames = glob.glob(sliver_reference_dir + "*seg"+ filename_end_mask) orig_fnames.sort() ref_fnames.sort() output = [] for i in range(0, len(orig_fnames)): oname = orig_fnames[i] rname = ref_fnames[i] vs_mm = None ref_data= None orig_data = None if read_orig: orig_data, metadata = io3d.datareader.read(oname, dataplus_format=False) vs_mm = metadata['voxelsize_mm'] if read_seg: ref_data, metadata = io3d.datareader.read(rname, dataplus_format=False) vs_mm = metadata['voxelsize_mm'] import re numeric_label = re.search(".*g(\d+)", oname).group(1) out = (numeric_label, vs_mm, oname, orig_data, rname, ref_data) yield out
[docs]def main(): logger = logging.getLogger() logger.setLevel(logging.WARNING) ch = logging.StreamHandler() logger.addHandler(ch) #logger.debug('input params') # input parser parser = argparse.ArgumentParser( description= "Work on dataset") parser.add_argument( "-l", "--labels", metavar="N", nargs="+", default=None, help='Get sample data') parser.add_argument( '-L', '--print_labels', action="store_true", default=False, help='print all available labels') parser.add_argument( '-c', '--checksum', # action="store_true", default=None, help='Get hash for requested path') parser.add_argument( '-v', '--verbatim', action="store_true", default=False, help='more messages') parser.add_argument( '-d', '--debug', # action="store_true", default=None, help='set debug level') parser.add_argument( '-o', '--destination_dir', default=dataset_path(), help='set output directory') args = parser.parse_args() # if args.get_sample_data == False and args.install == False and args.build_gco == False: ## default setup is install and get sample data # args.get_sample_data = True # args.install = True # args.build_gco = False if args.verbatim: # logger.setLevel(logging.DEBUG) logger.setLevel(logging.INFO) if args.debug is not None: logger.setLevel(int(args.debug)) if args.checksum is not None: print(checksum(args.checksum)) if args.labels is None: return if args.print_labels: print(sorted(data_urls.keys())) return download(args.labels, destination_dir=args.destination_dir)
#submodule_update()
[docs]def remove(local_file_name): try: os.remove(local_file_name) except Exception as e: print ("Cannot remove file '" + local_file_name + "'. Please remove\ it manually.") print (e)
[docs]def downzip(url, destination='./sample_data/'): """ Download, unzip and delete. """ # url = "http://147.228.240.61/queetech/sample-data/jatra_06mm_jenjatra.zip" logmsg = "downloading from '" + url + "' to '" + destination + "'" print(logmsg) logger.info(logmsg) tmp_filename = "tmp.zip" # urllibr.urlretrieve(url, zip_file_name) from . import network network.download_file(url, destination, filename=tmp_filename) zip_file_name = os.path.join(destination, tmp_filename) unzip_recursive(zip_file_name)
# unzip_one(local_file_name) # def unzip_all(path): # """ Unzip all .zip files packed in other .zip in path recusively. # # :param path: # :return: # """ # # ziplist = glob.glob(op.join(path, '*.zip')) # while len(ziplist) > 0: # # for local_file_name in ziplist: # local_file_name = ziplist[0] # unzip_one(local_file_name) # ziplist = glob.glob(op.join(path, '*.zip'))
[docs]def unzip_one(local_file_name): """ Unzip one file and delete it. :param local_file_name: file name of zip file :return: """ local_file_name = op.expanduser(local_file_name) destination = op.dirname(local_file_name) datafile = zipfile.ZipFile(local_file_name) namelist = datafile.namelist() datafile.extractall(destination) datafile.close() remove(local_file_name) fullnamelist = [] for fn in namelist: fullnamelist.append(op.join(destination, fn)) return fullnamelist
[docs]def unzip_recursive(zip_file_name): """ Unzip file with all recursive zip files inside and delete zip files after that. :param zip_file_name: :return: """ logger.debug("unzipping " + zip_file_name) fnlist = unzip_one(zip_file_name) for fn in fnlist: if zipfile.is_zipfile(fn): local_fnlist = unzip_recursive(fn) fnlist.extend(local_fnlist) return fnlist
if __name__ == "__main__": main()