def pick_reference_channels(self, max_sat_proportion=0.001, min_dyn_range=3, **_):
"""
Pick reference channels for each protein using multiple quality metrics.
The channel selection uses a weighted scoring system that considers:
- Dynamic range (log10)
- Signal-to-noise ratio
- Specificity (signal vs crosstalk)
- Saturation proportion (penalizes channels with too many saturated events)
"""
if self.use_reference_channels is not None:
user_provided = []
for p in self._protein_names:
if p in self.use_reference_channels.keys():
ch = self.use_reference_channels[p]
assert ch in self._channel_names, f'Unknown channel {ch} in reference_channels'
user_provided.append(self._channel_names.index(ch))
else:
user_provided.append(None)
unknown_provided = [
p for p in self.use_reference_channels if p not in self._protein_names
]
if len(unknown_provided) > 0:
self._log.warning(
f'Ignoring proteins {unknown_provided}: declared a reference channel but are absent from controls.'
)
else:
user_provided = [None] * len(self._protein_names)
# compute quality metrics for selection
metrics = ChannelMetrics(
self._controls_values, self._controls_masks, self._channel_names, self._protein_names
)
single_masks = self._controls_masks.sum(axis=1) == 1
ref_channels = []
for pid, prot in enumerate(self._protein_names):
if user_provided[pid] is not None:
ref_channels.append(user_provided[pid])
continue
# get protein mask and values
prot_mask = np.logical_and(single_masks, self._controls_masks[:, pid])
if not np.any(prot_mask):
ref_channels.append(None)
continue
prot_values = self._controls_values[prot_mask]
# compute saturation proportions
prot_sat = np.logical_or(
prot_values <= self._saturarion_thresholds[:, 0],
prot_values >= self._saturarion_thresholds[:, 1],
)
sat_proportions = prot_sat.sum(axis=0) / prot_sat.shape[0]
# get other metrics for this protein
channel_scores = []
for cid, chan in enumerate(self._channel_names):
# skip channels with too much saturation
if sat_proportions[cid] > max_sat_proportion:
channel_scores.append(-np.inf)
continue
# compile scores from different metrics
scores = {
'dynamic_range': metrics.dynamic_ranges[prot][chan],
'snr': metrics.signal_to_noise[prot][chan],
'specificity': metrics.specificities[prot][chan],
'signal': metrics.signal_strengths[prot][chan],
}
# skip channels with insufficient dynamic range
if scores['dynamic_range'] < min_dyn_range:
channel_scores.append(-np.inf)
continue
# normalize each metric to 0-1 range for this protein
metric_weights = {
'dynamic_range': 1.0,
'specificity': 0.8,
'snr': 0.5,
'signal': 0.4,
}
scores = normalized_metrics(scores)
total_score = 0
for metric, weight in metric_weights.items():
score = scores[metric]
total_score += weight * score
channel_scores.append(total_score)
channel_scores = np.array(channel_scores)
if np.all(channel_scores == -np.inf):
ref_channels.append(None)
raise ValueError(
f"""
No reference channel found for {prot}!
Try increasing max_sat_proportion and/or decreasing min_dyn_range
Saturation proportions per channel: {sat_proportions}
"""
)
else:
best_channel = int(np.argmax(channel_scores))
ref_channels.append(best_channel)
self._log.debug(
f"Reference channel for {prot} is {self._channel_names[best_channel]} "
f"with score {channel_scores[best_channel]:.2f}"
)
return ref_channels