"""Read a data from ascii file and make a simple analysis
visualizing 3 of the 5 dimensions of the dataset"""
import numpy as np
from vedo import download, Points, Axes, show
from vedo.pyplot import histogram


################################### Read the csv data:
delimiter=','
fpath = download('https://vedo.embl.es/examples/data/genes.csv')
with open(fpath, "r") as f:
    lines = f.readlines()
data = []
for i,lns in enumerate(lines):
    if i==0:
        names = lns.split(delimiter) # read header
        continue
    ln = lns.split(delimiter)
    vals = [float(x) for x in ln]
    data.append(vals)
data = np.array(data)

print("Print first 5 rows:\n", names)
print(data[:5])
print("Number of rows:", len(data))
##################################################

# extract the columns into separate vectors:
g0, g1, g2, g3, g4 = data.T # unpack genes
n0, n1, n2, n3, n4 = names

# now create and show histograms of the gene expressions
h0 = histogram(g0, xtitle=n0, c=0)
h1 = histogram(g1, xtitle=n1, c=1)
h2 = histogram(g2, xtitle=n2, c=2)
h3 = histogram(g3, xtitle=n3, c=3, logscale=True)
h4 = histogram(g4, xtitle=n4, c=4)

# this is where you choose what variables to show as 3D points
pts = np.c_[g4,g2,g3] # form an array of 3d points from the columns

pts_1 = pts[g0>0]                   # select only points that have g0>0
p1 = Points(pts_1).ps(4).c('red5')  # create the vedo object (ps=point size)
print("after selection nr. of points is", len(pts_1))

pts_2 = pts[(g0<0) & (g1>.5)]       # select excluded points that have g1>0.5
p2 = Points(pts_2).ps(8).c('green') # create the vedo object

axes = Axes(p1+p2, xtitle='gene4', ytitle='gene2', ztitle='gene3', c='k')

# Show the two clouds superposed on a new plotter window:
show([h0, h1, h2, h3, h4, (p1,p2, axes, __doc__)],
      shape="1/5", # 1 spaces above and 5 below
      sharecam=0, axes=0, zoom=1.4, interactive=True,
).close()