Generated from notebooks/1-DNA-seq-model-example.ipynb

Here is a short demonstration of Kipoi-interpret.

import kipoi
import kipoi_interpret
from kipoi_veff.utils.plot import seqlogo_heatmap
from concise.preprocessing.sequence import encodeDNA
# list all available methods
kipoi_interpret.importance_scores.available_methods()
{'deeplift': kipoi_interpret.importance_scores.referencebased.DeepLift,
 'grad': kipoi_interpret.importance_scores.gradient.Gradient,
 'grad*input': kipoi_interpret.importance_scores.gradient.GradientXInput,
 'intgrad': kipoi_interpret.importance_scores.referencebased.IntegratedGradients,
 'mutation': kipoi_interpret.importance_scores.ism.Mutation,
 'saliency': kipoi_interpret.importance_scores.gradient.Saliency}
# Gradient-based methods
from kipoi_interpret.importance_scores.gradient import Gradient, GradientXInput
# In-silico mutagenesis-based methods
from kipoi_interpret.importance_scores.ism import Mutation
# DeepLift
from kipoi_interpret.importance_scores.referencebased import DeepLift

Setup

Model:

model = kipoi.get_model("DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1")
2018-07-20 01:41:36,856 [INFO] git-lfs pull -I DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1/**
2018-07-20 01:41:36,923 [INFO] git-lfs pull -I DeepBind/template/**
2018-07-20 01:41:36,981 [INFO] model DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1 loaded
2018-07-20 01:41:37,014 [INFO] git-lfs pull -I DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1/./**
2018-07-20 01:41:37,068 [INFO] git-lfs pull -I DeepBind/template/**
2018-07-20 01:41:37,129 [INFO] dataloader DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1/. loaded
2018-07-20 01:41:37,140 [INFO] successfully loaded the dataloader from /home/avsec/.kipoi/models/DeepBind/Homo_sapiens/TF/D00765.001_ChIP-seq_GATA1/dataloader.py::SeqDataset
2018-07-20 01:41:37,206 [INFO] successfully loaded model architecture from <_io.TextIOWrapper name='model_files/model.json' mode='r' encoding='UTF-8'>
2018-07-20 01:41:37,265 [INFO] successfully loaded model weights from model_files/model.h5
2018-07-20 01:41:37,267 [INFO] dataloader.output_schema is compatible with model.schema

Sequence of interest:

seq = "ATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAGCTGTGGGAGGAAGATAAGAGGTATGAACATGATTAGCAAAAGGGCCTAGCTTGGACTCAGAATAA"
seqa = encodeDNA([seq]) # one-hot-encode the sequence

Gradient * input

grxinp = GradientXInput(model)
val = grxinp.score(seqa)[0]
fig = plt.figure(figsize=(15,2.5))
seqlogo_heatmap(val, val.T, ax=plt.subplot())
<matplotlib.axes._subplots.AxesSubplot at 0x7f3ef9bbadd8>

png

Gradient

gr = Gradient(model)
val = gr.score(seqa)[0]
fig = plt.figure(figsize=(15,2.5))
seqlogo_heatmap(val, val.T, ax=plt.subplot())
<matplotlib.axes._subplots.AxesSubplot at 0x7f3ef9220668>

png

In-silico mutagenesis

# TODO - update the Mutate function. It should return the following:
# prediction_value:
#   - array
def to_array(isval):
    """Temporary convert the output to a numpy array
    """
    def to_vec(x):
        if x is None:
            return 0
        else:
            if isinstance(x, list):
                return x[0]
            else:
                return x
    return np.array([[to_vec(y) for y in x] for x in isval])
ism = Mutation(model, "seq")
val = to_array(ism.score(seqa)[0])
fig = plt.figure(figsize=(15,2.5))
seqlogo_heatmap(np.abs(val), val.T, ax=plt.subplot(), show_letter_scale=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7f3ef82f8160>

png

DeepLift

# Not a sequential model
# dl = DeepLift(model, 'maximum_593', 0)