pythonmatplotlib

How to properly scale cmapcolor


Below is a fully-functioning example of my problem. The color that cmap assigns based on the values I give to it does not match the color that it should be on the color bar. For example, the fragment with distance=1.75 is yellow when according to the colorbar it should be light purple. How can I assign the colors properly to the bars?

#Create some fake example data and store it in a dataframe
exampledf=pd.DataFrame()

import random as rd
tempname='Template'
queryname='Query'

templatenamelst=[]
querynamelst=[]
startlst=[]
endlst=[]
querystartslst=[]
queryendslst=[]
distancelst=[]

for i in range(20):
    start=i*5
    end=(i*5)+25
    distance=rd.uniform(0, 4)
    
    querystart=rd.randint(0, 95)
    queryend=querystart+25
    
    templatenamelst.append(tempname)
    querynamelst.append(queryname)
    startlst.append(start)
    endlst.append(end)
    distancelst.append(distance)
    querystartslst.append(querystart)
    queryendslst.append(queryend)

exampledf['TempName']=templatenamelst
exampledf['QueryName']=querynamelst
exampledf['TempStarts']=startlst
exampledf['TempEnds']=endlst
exampledf['QueryStarts']=querystartslst
exampledf['QueryEnds']=queryendslst
exampledf['Distances']=distancelst

display(exampledf)

    
#Build the plot from the example data

import random as rd
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
warnings.filterwarnings('ignore') 

TempName=exampledf.iloc[0]['TempName']
QueryName=exampledf.iloc[0]['QueryName']
NumSeqFrags=len(exampledf)

TempLength=templength+2

Distanceslst=list(exampledf.Distances)

XMax=max(list(exampledf.TempEnds))
cmapz = cm.get_cmap('plasma')

img = plt.imshow(np.array([[0,max(Distanceslst)]]), cmap="plasma")
img.set_visible(False)

barheight=6

plt.barh(width=TempLength, left=0, height=barheight, y=0,color='Black')
ticklabels=[TempName]
ticklocations=[0]

#Add the fragments
for i in range (len(exampledf)):
    cmapcolor=cmapz(Distanceslst[i])
    width=int(exampledf.iloc[i]['TempEnds'])-int(exampledf.iloc[i]['TempStarts'])
    start=int(exampledf.iloc[i]['TempStarts'])

    yloc=(barheight+4)*i+barheight

    plt.barh(width=width, left=start, height=barheight, y=yloc,color=cmapcolor)
    
    fullname=str(exampledf.iloc[i]['QueryName'])+'('+str(exampledf.iloc[i]['QueryStarts'])+':'+str(exampledf.iloc[i]['QueryEnds'])+')'+'(Distance='+str(round(Distanceslst[i],2))+')'
    ticklabels.append(fullname)
    ticklocations.append(yloc)

plt.yticks(ticks=ticklocations, labels=ticklabels,fontsize=8)    

plt.colorbar(orientation="horizontal",fraction=0.05,label='Distance')

plt.show() 

enter image description here


Solution

  • The Colormap object's __call__ method takes a number between zero and one.

    For scaling arbitrary data into that range see the matplotlib.colors.Normalize class:

    from matplotlib.colors import Normalize
    norm = Normalize(vmin = min(Distanceslst), vmax = max(Distanceslst))
    ...
    cmapcolor=cmapz(norm(Distanceslst[i]))