#! /usr/bin/python
# -*- coding: utf-8 -*-
"""
Module is used for visualization of segmentation stored in pkl file.
"""
import logging
import os.path
import sys
logger = logging.getLogger(__name__)
import argparse
import numpy as np
import zipfile
import glob
import os.path as op
import io3d
from . import cachefile as cachef
# if sys.version_info < (3, 0):
# import urllib as urllibr
# else:
# import urllib.request as urllibr
# you can get hash from command line with:
# python imtools/sample_data.py -v sliver_training_001
local_dir="~/data/medical/orig/"
# vessels.pkl nejprve vytvoří prázný adresář s názvem vessels.pkl, pak jej při rozbalování zase smaže
__url_home = "http://home.zcu.cz/~mjirik/lisa/testdata/sample-extra-data/"
__url_server = "http://147.228.240.61/queetech/"
data_urls= {
"head": [__url_server + "sample-data/head.zip", "89e9b60fd23257f01c4a1632ff7bb800", "matlab"] ,
"jatra_06mm_jenjatra": [__url_server + "sample-data/jatra_06mm_jenjatra.zip", None, "jatra_06mm_jenjatra/*.dcm"],
"jatra_5mm": [__url_server + "sample-data/jatra_5mm.zip", '1b9039ffe1ff9af9caa344341c8cec03', "jatra_5mm/*.dcm"],
"exp": [__url_server + "sample-data/exp.zip", '74f2c10b17b6bd31bd03662df6cf884d'],
"sliver_training_001": [__url_server + "sample-data/sliver_training_001.zip","d64235727c0adafe13d24bfb311d1ed0","liver*001.*"],
"volumetrie": [__url_server + "sample-data/volumetrie.zip","6b2a2da67874ba526e2fe00a78dd19c9"],
"vessels.pkl": [__url_server + "sample-data/vessels.pkl.zip","698ef2bc345bb616f8d4195048538ded"],
"biodur_sample": [__url_server + "sample-data/biodur_sample.zip","d459dd5b308ca07d10414b3a3a9000ea"],
"gensei_slices": [__url_server + "sample-data/gensei_slices.zip", "ef93b121add8e4a133bb086e9e6491c9"],
"exp_small": [__url_server + "sample-data/exp_small.zip", "0526ba8ea363fe8b5227f5807b7aaca7"],
"vincentka": [__url_server + "vincentka.zip", "a30fdabaa39c5ce032a3223ed30b88e3"],
"vincentka_sample": [__url_server + "sample-data/vincentka_sample.zip"],
"donut": __url_server + "sample-data/donut.zip",
"io3d_sample_data": [__url_server + "sample-extra-data/io3d_sample_data.zip"],
"lisa": {"package": ["donut", "vincentka_sample", "exp_small", "gensei_slices",
"biodur_sample", "vessels.pkl", "sliver_training_001", "jatra_5mm",
"head", "volumetrie"]},
"3Dircadb1": ["http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.zip", None, None, "ircad/*[!p]/*[!pfg]"],
"3Dircadb1.1": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.1.zip",
"3Dircadb1.2": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.2.zip",
"3Dircadb1.3": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.3.zip",
"3Dircadb1.4": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.4.zip",
"3Dircadb1.5": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.5.zip",
"3Dircadb1.6": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.6.zip",
"3Dircadb1.7": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.7.zip",
"3Dircadb1.8": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.8.zip",
"3Dircadb1.9": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.9.zip",
"3Dircadb1.10": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.10.zip",
"3Dircadb1.11": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.11.zip",
"3Dircadb1.12": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.12.zip",
"3Dircadb1.13": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.13.zip",
"3Dircadb1.14": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.14.zip",
"3Dircadb1.15": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.15.zip",
"3Dircadb1.16": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.16.zip",
"3Dircadb1.17": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.17.zip",
"3Dircadb1.18": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.18.zip",
"3Dircadb1.19": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.19.zip",
"3Dircadb1.20": "http://ircad.fr/softwares/3Dircadb/3Dircadb1/3Dircadb1.20.zip",
# není nutné pole, stačí jen string
# "exp_small": "http://147.228.240.61/queetech/sample-data/exp_small.zip",
}
# cachefile = "~/io3d_cache.yaml"
[docs]def join_path(*path_to_join):
"""
join input path to sample data path (usually in ~/lisa_data)
:param path_to_join: one or more paths
:return:
"""
sdp = dataset_path()
pth = os.path.join(sdp, *path_to_join)
logger.debug('sample_data_path' + str(sdp))
logger.debug('path ' + str(pth))
return pth
[docs]def set_dataset_path(path, cache=None, cachefile="~/io3d_cache.yaml"):
if cachefile is not None:
cache = cachef.CacheFile(cachefile)
cache.update("local_dataset_dir", path)
[docs]def dataset_path(cache=None, cachefile="~/io3d_cache.yaml"):
"""
Get dataset path.
:param cache: CacheFile object
:param cachefile: cachefile path
:return:
"""
local_data_dir = local_dir
if cachefile is not None:
cache = cachef.CacheFile(cachefile)
# cache.update('local_dataset_dir', head)
if cache is not None:
local_data_dir = cache.get_or_save_default('local_dataset_dir', local_dir)
return op.expanduser(local_data_dir)
# def get_sample_data():
# keys = imtools.sample_data.data_urls.keys()
# imtools.sample_data.get_sample_data(keys, sample_data_path())
def _expand_dataset_packages(dataset_label_dict):
"""
dataset package is multi dataset
:param dataset_label_dict:
:return:
"""
new_dataset_label_dict = []
for label in dataset_label_dict:
dataset_metadata = data_urls[label]
if type(dataset_metadata) == dict and "package" in dataset_metadata:
new_dataset_label_dict.extend(dataset_metadata["package"])
else:
new_dataset_label_dict.append(label)
return new_dataset_label_dict
[docs]def download(dataset_label=None, destination_dir=None, dry_run=False):
"""
Download sample data by data label. Labels can be listed by sample_data.data_urls.keys()
:param dataset_label: label of data. If it is set to None, all data are downloaded
:param destination_dir: output dir for data
:return:
"""
if destination_dir is None:
destination_dir = dataset_path()
destination_dir = op.expanduser(destination_dir)
if not op.exists(destination_dir):
os.makedirs(destination_dir)
if dataset_label is None:
dataset_label=data_urls.keys()
if type(dataset_label) == str:
dataset_label = [dataset_label]
dataset_label = _expand_dataset_packages(dataset_label)
for label in dataset_label:
# make all data:url have length 3
data_url, url, expected_hash, hash_path, fnpattern = get_dataset_meta(label)
if hash_path is None:
hash_path = label
try:
computed_hash = checksum(os.path.join(destination_dir, hash_path))
except:
# there is probably no checksumdir module
logger.warning("problem with sample_data.checksum()")
computed_hash = None
logger.info("dataset '" + label + "'")
logger.info("expected hash: '" + str(expected_hash) + "'")
logger.info("computed hash: '" + str(computed_hash) + "'")
if (computed_hash is not None) and (expected_hash == computed_hash):
logger.info("match ok - no download needed")
else:
logger.info("downloading")
if not dry_run:
downzip(url, destination=destination_dir)
logger.info("finished")
downloaded_hash = checksum(os.path.join(destination_dir, hash_path))
logger.info("downloaded hash: '" + str(downloaded_hash) + "'")
if downloaded_hash != expected_hash:
logger.warning("downloaded hash is different from expected hash\n" + \
"expected hash: '" + str(expected_hash) + "'\n" + \
"downloaded hash: '" + str(downloaded_hash) + "'\n")
else:
logger.debug("dry run")
[docs]def get_old(dataset_label, id, destination_dir=None):
"""
Get the 3D data from specified dataset with specified id.
Download data if necessary.
:param dataset_label:
:param id: integer or wildcards file pattern
:param destination_dir:
:return:
"""
# @TODO implement
if destination_dir is None:
destination_dir = dataset_path()
destination_dir = op.expanduser(destination_dir)
data_url, url, expected_hash, hash_path, fnpattern = get_dataset_meta(dataset_label)
paths = glob.glob(os.path.join(destination_dir, fnpattern))
paths.sort()
import fnmatch
print(paths)
print(id)
pathsf = fnmatch.filter(paths, id)
print(pathsf
)
datap = io3d.read(pathsf[0], dataplus_format=True)
return datap
[docs]def get(dataset_label, series_number=None, *args, **kwargs):
"""
:param dataset_label: label from data_urls
:param series_number: Series identification in study.
:param args:
:param kwargs:
:return:
"""
# relative path in the datasets
relative_path_extracted_from_data_urls = ""
datapath = join_path(relative_path_extracted_from_data_urls)
# read 3D data from datapath
datap = io3d.read(datapath, series_number=series_number, dataplus_format=True, *args, **kwargs)
return datap
[docs]def checksum(path, hashfunc='md5'):
"""
Return checksum given by path. Wildcards can be used in check sum. Function is strongly
dependent on checksumdir package by 'cakepietoast'.
:param path:
:param hashfunc:
:return:
"""
import checksumdir
hash_func = checksumdir.HASH_FUNCS.get(hashfunc)
if not hash_func:
raise NotImplementedError('{} not implemented.'.format(hashfunc))
if os.path.isdir(path):
return checksumdir.dirhash(path, hashfunc=hashfunc)
hashvalues = []
path_list = glob.glob(path)
logger.debug("path_list " + str(path_list))
for path in path_list:
if os.path.isfile(path):
hashvalues.append(checksumdir._filehash(path, hashfunc=hash_func))
logger.debug(str(hashvalues))
hash = checksumdir._reduce_hash(hashvalues, hashfunc=hash_func)
return hash
[docs]def generate_donut():
"""
Generate donut like shape with stick inside
:return: datap with keys data3d, segmentation and voxelsize_mm
"""
import numpy as np
segmentation = np.zeros([20, 30, 40])
# generate test data
segmentation[6:10, 7:24, 10:37] = 1
segmentation[6:10, 7, 10] = 0
segmentation[6:10, 23, 10] = 0
segmentation[6:10, 7, 36] = 0
segmentation[6:10, 23, 36] = 0
segmentation[2:18, 12:19, 18:28] = 2
data3d = segmentation * 100 + np.random.random(segmentation.shape) * 30
voxelsize_mm=[3,2,1]
datap = {
'data3d': data3d,
'segmentation': segmentation,
'voxelsize_mm': voxelsize_mm
}
# io3d.write(datap, "donut.pklz")
return datap
[docs]def generate_abdominal(size = 100, liver_intensity=100, noise_intensity=20, portal_vein_intensity=130, spleen_intensity=90):
boundary = int(size/4)
voxelsize_mm = [1.0, 1.5, 1.5]
slab = {
'liver': 1,
'porta': 2,
'spleen': 17
}
segmentation = np.zeros([size, size, size], dtype=np.uint8)
segmentation[boundary:-boundary, boundary:-2*boundary, 2*boundary:-boundary] = 1
segmentation[:, boundary*2:boundary*2+5, boundary*2:boundary*2+5] = 2
segmentation[:, boundary*2:boundary*2+5, boundary*2:boundary*2+5] = 2
segmentation[:, -5:, -boundary:] = 17
seeds = np.zeros([size, size, size], dtype=np.uint8)
seeds[
boundary + 1 : boundary + 4,
boundary + 1 : boundary + 4,
2 * boundary + 1 : 2 * boundary + 4
] = 1
noise = (np.random.random(segmentation.shape) * noise_intensity).astype(np.int)
data3d = np.zeros(segmentation.shape, dtype=np.int)
data3d [segmentation == 1] = liver_intensity
data3d [segmentation == 2] = portal_vein_intensity
data3d [segmentation == 17] = spleen_intensity
data3d += noise
datap = {
'data3d': data3d,
'segmentation': segmentation,
'voxelsize_mm': voxelsize_mm,
'seeds': seeds,
'slab': slab
}
return datap
[docs]def sliver_reader(filename_end_mask="*[0-9].mhd", sliver_reference_dir="~/data/medical/orig/sliver07/training/", read_orig=True, read_seg=False):
"""
Generator for reading sliver data from directory structure.
:param filename_end_mask: file selection can be controlled with this parameter
:param sliver_reference_dir: directory with sliver .mhd and .raw files
:param read_orig: read image data if is set True
:param read_seg: read segmentation data if is set True
:return: numeric_label, vs_mm, oname, orig_data, rname, ref_data
"""
sliver_reference_dir = op.expanduser(sliver_reference_dir)
orig_fnames = glob.glob(sliver_reference_dir + "*orig" + filename_end_mask)
ref_fnames = glob.glob(sliver_reference_dir + "*seg"+ filename_end_mask)
orig_fnames.sort()
ref_fnames.sort()
output = []
for i in range(0, len(orig_fnames)):
oname = orig_fnames[i]
rname = ref_fnames[i]
vs_mm = None
ref_data= None
orig_data = None
if read_orig:
orig_data, metadata = io3d.datareader.read(oname, dataplus_format=False)
vs_mm = metadata['voxelsize_mm']
if read_seg:
ref_data, metadata = io3d.datareader.read(rname, dataplus_format=False)
vs_mm = metadata['voxelsize_mm']
import re
numeric_label = re.search(".*g(\d+)", oname).group(1)
out = (numeric_label, vs_mm, oname, orig_data, rname, ref_data)
yield out
[docs]def main():
logger = logging.getLogger()
logger.setLevel(logging.WARNING)
ch = logging.StreamHandler()
logger.addHandler(ch)
#logger.debug('input params')
# input parser
parser = argparse.ArgumentParser(
description=
"Work on dataset")
parser.add_argument(
"-l", "--labels", metavar="N", nargs="+",
default=None,
help='Get sample data')
parser.add_argument(
'-L', '--print_labels', action="store_true",
default=False,
help='print all available labels')
parser.add_argument(
'-c', '--checksum', # action="store_true",
default=None,
help='Get hash for requested path')
parser.add_argument(
'-v', '--verbatim', action="store_true",
default=False,
help='more messages')
parser.add_argument(
'-d', '--debug', # action="store_true",
default=None,
help='set debug level')
parser.add_argument(
'-o', '--destination_dir',
default=dataset_path(),
help='set output directory')
args = parser.parse_args()
# if args.get_sample_data == False and args.install == False and args.build_gco == False:
## default setup is install and get sample data
# args.get_sample_data = True
# args.install = True
# args.build_gco = False
if args.verbatim:
# logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)
if args.debug is not None:
logger.setLevel(int(args.debug))
if args.checksum is not None:
print(checksum(args.checksum))
if args.labels is None:
return
if args.print_labels:
print(sorted(data_urls.keys()))
return
download(args.labels, destination_dir=args.destination_dir)
#submodule_update()
[docs]def remove(local_file_name):
try:
os.remove(local_file_name)
except Exception as e:
print ("Cannot remove file '" + local_file_name + "'. Please remove\
it manually.")
print (e)
[docs]def downzip(url, destination='./sample_data/'):
"""
Download, unzip and delete.
"""
# url = "http://147.228.240.61/queetech/sample-data/jatra_06mm_jenjatra.zip"
logmsg = "downloading from '" + url + "' to '" + destination + "'"
print(logmsg)
logger.info(logmsg)
tmp_filename = "tmp.zip"
# urllibr.urlretrieve(url, zip_file_name)
from . import network
network.download_file(url, destination, filename=tmp_filename)
zip_file_name = os.path.join(destination, tmp_filename)
unzip_recursive(zip_file_name)
# unzip_one(local_file_name)
# def unzip_all(path):
# """ Unzip all .zip files packed in other .zip in path recusively.
#
# :param path:
# :return:
# """
#
# ziplist = glob.glob(op.join(path, '*.zip'))
# while len(ziplist) > 0:
# # for local_file_name in ziplist:
# local_file_name = ziplist[0]
# unzip_one(local_file_name)
# ziplist = glob.glob(op.join(path, '*.zip'))
[docs]def unzip_one(local_file_name):
"""
Unzip one file and delete it.
:param local_file_name: file name of zip file
:return:
"""
local_file_name = op.expanduser(local_file_name)
destination = op.dirname(local_file_name)
datafile = zipfile.ZipFile(local_file_name)
namelist = datafile.namelist()
datafile.extractall(destination)
datafile.close()
remove(local_file_name)
fullnamelist = []
for fn in namelist:
fullnamelist.append(op.join(destination, fn))
return fullnamelist
[docs]def unzip_recursive(zip_file_name):
"""
Unzip file with all recursive zip files inside and delete zip files after that.
:param zip_file_name:
:return:
"""
logger.debug("unzipping " + zip_file_name)
fnlist = unzip_one(zip_file_name)
for fn in fnlist:
if zipfile.is_zipfile(fn):
local_fnlist = unzip_recursive(fn)
fnlist.extend(local_fnlist)
return fnlist
if __name__ == "__main__":
main()