Skip to content

Python Package

Arborist is also a Python package. Below is an example of how to use it.

Running the arborist function

import pandas as pd 
from arborist import read_trees, arborist 

#read the trees in as edge lists 
candidate_trees = read_trees("example/input/candidate_trees.txt")
print(f"Candidate set size: {len(candidate_trees)}")
# Candidate set size: 30

read_counts = pd.read_csv("example/input/read_counts.csv")
read_counts.head()
"""
>>> read_counts.head()
   snv  cell  alt  total
0    0     4    0      1
1    0     5    0      1
2    0    29    0      1
3    0    45    0      1
"""

snv_clusters = pd.read_csv("example/input/input_clustering.csv", header=None, names=["snv", "cluster"]) 
snv_clusters.head()
"""
>>> snv_clusters.head()
   snv  cluster
0  108        2
1  123        2
2  176        2
3  289        2
4  452        2
"""

ranking, best_fit, all_fits =arborist(
    tree_list = candidate_trees,
    read_counts = read_counts,
    snv_clusters = snv_clusters,
    alpha = 0.001,
    max_iter = 10,
    tolerance = 1,
    gamma= 0.7,
    add_normal = False,
    threads = 10,
    verbose = False
)

print(best_fit)
"""
Tree 0
ELBO: -311397.27
 2->1
 2->3
 2->4
 2->5
 3->6
 0->2
"""

Manipulating TreeFit objects generated by arborist

TreeFit objects can be used to obtain MAP assignments of cell-to-clone labels (z) or SNV-to-cluster labels (y) as well as explore the approximate posterior distributions (qz) and (qy)


#TreeFit 
z = best_fit.map_assign_z()
z.head()
y = best_fit.map_assign_y()
y.head()

qz = best_fit.q_z_df()
qz.head()
qy = best_fit.q_y_df()
qy.head()