Analyze everything (batched)#
This notebook demonstrates how to run a complete STRESS analysis in a batched way. This requires you to assemble a list of complete filenames and then submit all of them for processing. It is strongly required to do this on a sufficiently powerful processing PC.
Note
Visualizing the results is not part of this tutorial. To find out more, check the analyze-everything tutorial.
Additional dependencies#
In case you want to work with proprietory file formats (e.g., czi, nd2, etc), you’ll need to install some packages that can handle them. As a one-fits-all solution, install the bioio package. To find out exactly what extension of bioio you need, check the documentation. For instance, for working with czi images you’ll need these dependencies:
pip install bioio bioio-czi
import napari
import numpy as np
from napari_stress import reconstruction, measurements, utils, stress_backend, TimelapseConverter
import os
from pathlib import Path
from bioio import BioImage
from dask.distributed import Client, get_client
import pandas as pd
from skimage import io
import vedo
Converter = TimelapseConverter()
viewer = napari.Viewer(ndisplay=3)
Assistant skips harvesting pyclesperanto as it's not installed.
Set up parallelization#
IT’s important to set this up beforehand. The process may require a lot of memory - and if you don’t set it here, napari-stress will set it for you, which may not be suitable for the load we will be working with in this usecase.
try:
client = get_client()
except Exception:
client = Client(n_workers=8, memory_limit='32GB')
print('Link to progress dashboard: ', client.dashboard_link)
Link to progress dashboard: http://127.0.0.1:8787/status
Collect data to be analyzed#
The easiest way to find samples in a given root folder is to use os.walk
. In essence, it recursively walks all subdirectories of a given folder, which allows you to look for files that are matching a specific pattern:
paths = [
os.path.join(r'D:\Marc', 'Marc_mouse_embryo_droplets_Aug2024'),
os.path.join(r'D:\Marc', 'Marc_genistein_droplets_Aug2024'),
os.path.join(r'D:\Marc', 'Marc_mouse_timepoint_data_Aug2024'),
os.path.join(r'D:\Marc', 'Marc_human_timepoint_data_Aug2024')
]
files_to_analyze = []
for path in paths:
for root, subdirs, filenames in os.walk(path):
for filename in filenames:
if filename.endswith('.czi'):
files_to_analyze.append(os.path.join(root, filename))
data_records = pd.DataFrame()
data_records['path'] = files_to_analyze
data_records['pixel_size_z'] = data_records['path'].apply(lambda x: BioImage(x).physical_pixel_sizes[0])
data_records['pixel_size_y'] = data_records['path'].apply(lambda x: BioImage(x).physical_pixel_sizes[1])
data_records['pixel_size_x'] = data_records['path'].apply(lambda x: BioImage(x).physical_pixel_sizes[2])
# show all rows
pd.set_option('display.max_rows', None)
data_records
path | pixel_size_z | pixel_size_y | pixel_size_x | |
---|---|---|---|---|
0 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.37 | 0.082864 | 0.082864 |
1 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.103580 | 0.103580 |
2 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.103580 | 0.103580 |
3 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.094164 | 0.094164 |
4 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.082864 | 0.082864 |
5 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.082864 | 0.082864 |
6 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.103580 | 0.103580 |
7 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.082864 | 0.082864 |
8 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.38 | 0.082864 | 0.082864 |
9 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.37 | 0.090070 | 0.090070 |
10 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.37 | 0.103580 | 0.103580 |
11 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.37 | 0.115089 | 0.115089 |
12 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E8.... | 0.37 | 0.103580 | 0.103580 |
13 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
14 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
15 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
16 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.103580 | 0.103580 |
17 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
18 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
19 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.103580 | 0.103580 |
20 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.129475 | 0.129475 |
21 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
22 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
23 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.38 | 0.115089 | 0.115089 |
24 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.147972 | 0.147972 |
25 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.115089 | 0.115089 |
26 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.138107 | 0.138107 |
27 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.172634 | 0.172634 |
28 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.138107 | 0.138107 |
29 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.115089 | 0.115089 |
30 | D:\Marc\Marc_mouse_embryo_droplets_Aug2024\E9.... | 0.37 | 0.103580 | 0.103580 |
31 | D:\Marc\Marc_genistein_droplets_Aug2024\F1G4 d... | 0.37 | 0.103580 | 0.103580 |
32 | D:\Marc\Marc_genistein_droplets_Aug2024\F1G4 d... | 0.37 | 0.103580 | 0.103580 |
33 | D:\Marc\Marc_genistein_droplets_Aug2024\F1G4 d... | 0.37 | 0.094164 | 0.094164 |
34 | D:\Marc\Marc_genistein_droplets_Aug2024\GFP-NL... | 0.40 | 0.082864 | 0.082864 |
35 | D:\Marc\Marc_genistein_droplets_Aug2024\GFP-NL... | 0.40 | 0.069054 | 0.069054 |
36 | D:\Marc\Marc_genistein_droplets_Aug2024\GFP-NL... | 0.40 | 0.082864 | 0.082864 |
37 | D:\Marc\Marc_genistein_droplets_Aug2024\GFP-NL... | 0.40 | 0.082864 | 0.082864 |
38 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.48 | 0.079677 | 0.079677 |
39 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.103580 | 0.103580 |
40 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.082864 | 0.082864 |
41 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.094164 | 0.094164 |
42 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.103580 | 0.103580 |
43 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.109032 | 0.109032 |
44 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.115089 | 0.115089 |
45 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.103580 | 0.103580 |
46 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.082864 | 0.082864 |
47 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.103580 | 0.103580 |
48 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\F1G4... | 0.37 | 0.103580 | 0.103580 |
49 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.103580 | 0.103580 |
50 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.082864 | 0.082864 |
51 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
52 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
53 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
54 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.103580 | 0.103580 |
55 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
56 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
57 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
58 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.069054 | 0.069054 |
59 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.073986 | 0.073986 |
60 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.094164 | 0.094164 |
61 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.37 | 0.073986 | 0.073986 |
62 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.103580 | 0.103580 |
63 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.103580 | 0.103580 |
64 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.073986 | 0.073986 |
65 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
66 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
67 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
68 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.082864 | 0.082864 |
69 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.076726 | 0.076726 |
70 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
71 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
72 | D:\Marc\Marc_mouse_timepoint_data_Aug2024\GFP-... | 0.40 | 0.069054 | 0.069054 |
73 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.073986 | 0.073986 |
74 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
75 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.073986 | 0.073986 |
76 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
77 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
78 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
79 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.076726 | 0.076726 |
80 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
81 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
82 | D:\Marc\Marc_human_timepoint_data_Aug2024\48h ... | 0.37 | 0.069054 | 0.069054 |
83 | D:\Marc\Marc_human_timepoint_data_Aug2024\60h ... | 0.66 | 0.069054 | 0.069054 |
84 | D:\Marc\Marc_human_timepoint_data_Aug2024\60h ... | 0.66 | 0.069054 | 0.069054 |
85 | D:\Marc\Marc_human_timepoint_data_Aug2024\60h ... | 0.66 | 0.069054 | 0.069054 |
Set up output folder#
Select a folder where you would like to store the results:
output_folder = r'D:\Marc\Marc_mouse_embryo_droplets_Aug2024\stress_analysis_2024-08-21'
Let’s pick a sample and put it to napari to determine feasible parameters for the analysis.
image = BioImage(r'D:\\Marc\\Marc_mouse_embryo_droplets_Aug2024\\E8.5 embryos\\Mouse embryo E8.5_6_tailbud_droplets_40X_droplet1.czi')
image_data = image.get_image_data()
for idx, ch in enumerate(image.channel_names):
viewer.add_image(image_data[:, idx], name=ch, scale=[1] + list(image.physical_pixel_sizes), blending='additive')
Analysis parameters#
In case you ran the reconstruction previously interactively from the napari viewer (as explained here) and exported the settings, you can import the settings here, too. To do so, simply uncomment the line below (remove the #
) and provide the path to the saved settings file:
reconstruction_parameters = utils.import_settings(file_name=os.path.join(output_folder, 'reconstruction_settings.yaml'))
reconstruction_parameters['return_intermediate_results'] = False
measurement_parameters = {
'max_degree': 20, # spherical harmonics degree
'n_quadrature_points': 590, # number of quadrature points to measure on (maximum is 5810)
'gamma': 1.1} # interfacial tension of droplet
alpha = 0.05 # lower and upper boundary in cumulative distribution function which should be used to calculate the stress anisotropy
Analysis#
In this step, we put the entire analysis as demonstrated in this notebook into a single function. This function will then be called in a parallel fashion on all the datasets to make analysis faster.
import tqdm
jobs = []
iterator = tqdm.tqdm(data_records.iterrows(), total=len(data_records))
for idx, row in iterator:
iterator.set_description(f'Processing {row["path"]}')
# there's something weird with this sample
if idx== 4:
continue
# load data
image = BioImage(row['path'])
reconstruction_parameters['voxelsize'] = np.asarray(image.physical_pixel_sizes)
reconstruction_parameters['use_dask'] = False
raw_image = image.get_image_data().squeeze()[:, 1]
jobs.append(client.submit(reconstruction.reconstruct_droplet,
raw_image,
**reconstruction_parameters))
Processing D:\Marc\Marc_human_timepoint_data_Aug2024\60h samples\H2B-GFP p33 800 cells Gastruloid 60h_1_droplet3.czi: 100%|██████████| 86/86 [18:18<00:00, 12.77s/it]
results_reconstruction = []
for job in jobs:
try:
result = client.gather(job)
except Exception as e:
result = None
results_reconstruction.append(result)
_ = stress_backend.lbdv_info(Max_SPH_Deg=measurement_parameters['max_degree'],
Num_Quad_Pts=measurement_parameters['n_quadrature_points'])
jobs_measurement = []
for result_reconstruction_single in results_reconstruction:
if result_reconstruction_single is None:
jobs_measurement.append(None)
continue
jobs_measurement.append(client.submit(measurements.comprehensive_analysis, result_reconstruction_single[1][0], **measurement_parameters))
finished_jobs = []
for job in jobs_measurement:
if job is None:
finished_jobs.append(None)
continue
if job.status == 'finished':
finished_jobs.append(job)
else:
finished_jobs.append(None)
results_measurement = client.gather(finished_jobs)
print(f'Reconstructed droplets: {len(results_reconstruction)}, measured droplets: {len(results_measurement)}')
Reconstructed droplets: 85, measured droplets: 85
viewer.layers.clear()
index_visualize = 0
image_raw = io.imread(data_records.iloc[index_visualize]['path']).squeeze()
scale = np.asarray(AICSImage(data_records.iloc[index_visualize]['path'], reader=BioformatsReader).physical_pixel_sizes)
viewer.add_image(image_raw, scale=scale)
for res in results_reconstruction[index_visualize] + results_measurement[index_visualize]:
layer = napari.layers.Layer.create(res[0], res[1], res[2])
viewer.add_layer(layer)
To make handling further down easier, we store all data and metadata in a few simple dataframes
root_export = r'D:\Marc\stress_analysis_2024-08-21'
df_to_export = pd.DataFrame()
for result, result_reconstruction, (idx, row) in zip(results_measurement, results_reconstruction, data_records.iterrows()):
if result is None:
continue
try:
# Compile data
n_frames = len(np.unique(result[0][0][:, 0]))
df_over_time, df_nearest_pairs, df_all_pairs, df_autocorrelations = utils.compile_data_from_layers(
result, n_frames=n_frames, time_step=0)
# create folder for results in separate directory with same name as the original data
folder_name = os.path.join(root_export, str(Path(row['path']).stem))
os.makedirs(folder_name, exist_ok=True)
# remove non-scalar properties and save separately
ellipsoid_contribution_matrix = np.stack(df_over_time['Elipsoid_deviation_contribution_matrix'].values)
io.imsave(os.path.join(folder_name, 'ellipsoid_contribution_matrix.tif'), ellipsoid_contribution_matrix)
df_over_time.drop(columns='Elipsoid_deviation_contribution_matrix', inplace=True)
Tissue_stress_tensor_cartesian = np.stack(df_over_time['Tissue_stress_tensor_cartesian'].values)
np.save(os.path.join(folder_name, 'Tissue_stress_tensor_cartesian.npy'), Tissue_stress_tensor_cartesian)
df_over_time.drop(columns='Tissue_stress_tensor_cartesian', inplace=True)
Tissue_stress_tensor_elliptical = np.stack(df_over_time['Tissue_stress_tensor_elliptical'].values)
np.save(os.path.join(folder_name, 'Tissue_stress_tensor_elliptical.npy'), Tissue_stress_tensor_elliptical)
df_over_time.drop(columns='Tissue_stress_tensor_elliptical', inplace=True)
df_over_time.drop(columns=['stress_cell_all_pair_distance',
'autocorrelations_spatial_total',
'autocorrelations_spatial_cell',
'autocorrelations_spatial_tissue',
'stress_cell_nearest_pair_anisotropy',
'stress_cell_nearest_pair_distance',
'stress_cell_all_pair_anisotropy'], inplace=True)
df_over_time['sample'] = Path(row['path']).stem
df_nearest_pairs['sample'] = Path(row['path']).stem
df_all_pairs['sample'] = Path(row['path']).stem
df_autocorrelations['sample'] = Path(row['path']).stem
df_over_time.to_csv(os.path.join(folder_name, 'results_over_time.csv'), index=False)
df_nearest_pairs.to_csv(os.path.join(folder_name, 'results_nearest_pairs.csv'), index=False)
df_all_pairs.to_csv(os.path.join(folder_name, 'results_all_pairs.csv'), index=False)
df_autocorrelations.to_csv(os.path.join(folder_name, 'results_autocorrelations.csv'), index=False)
df_over_time['sample'] = Path(row['path']).stem
df_to_export = pd.concat([df_to_export, df_over_time], axis=0)
pointclouds_reconstruction = Converter._ldtuple_to_list_of_ldtuple(results_reconstruction[0][1])
for t_idx, pointcloud in enumerate(pointclouds_reconstruction):
pointcloud_vedo = vedo.Points(pointcloud[0])
vedo.save(pointcloud_vedo, os.path.join(folder_name, f'reconstructed_points_t{t_idx}.vtk'))
# same for result[0]
pointclouds = Converter._ldtuple_to_list_of_ldtuple(result[0])
for t_idx, pointcloud in enumerate(pointclouds):
pointcloud_vedo = vedo.Points(pointcloud[0])
vedo.save(pointcloud_vedo, os.path.join(folder_name, f'spherical_harmonics_expansion_t{t_idx}.vtk'))
# same for result[3]
pointclouds = Converter._ldtuple_to_list_of_ldtuple(result[3])
for t_idx, pointcloud in enumerate(pointclouds):
pointcloud_vedo = vedo.Points(pointcloud[0])
for col in pointcloud[1]['features'].columns:
pointcloud_vedo.pointdata[col] = pointcloud[1]['features'][col]
vedo.save(pointcloud_vedo, os.path.join(folder_name, f'lebedev_points_ellipsoid_t{t_idx}.vtk'))
# same for result[4]
pointclouds = Converter._ldtuple_to_list_of_ldtuple(result[4])
for t_idx, pointcloud in enumerate(pointclouds):
pointcloud_vedo = vedo.Points(pointcloud[0])
for col in pointcloud[1]['features'].columns:
pointcloud_vedo.pointdata[col] = pointcloud[1]['features'][col]
vedo.save(pointcloud_vedo, os.path.join(folder_name, f'lebedev_points_t{t_idx}.vtk'))
# export pointclouds
except Exception as e:
print(f'Error processing {row["path"]}: {e}')
Error processing D:\Marc\Marc_genistein_droplets_Aug2024\GFP-NLS data\20240728_Genistein_Gastruloid_102h_1_droplet.czi: object of type 'numpy.float64' has no len()
df_to_export.to_csv(os.path.join(root_export, 'results_over_time_all_samples.csv'), index=False)
utils.export_settings(reconstruction_parameters, file_name=os.path.join(root_export, 'reconstruction_settings.yaml'))
utils.export_settings(measurement_parameters, file_name=os.path.join(root_export, 'measurement_settings.yaml'))