how to calculate correlation coefficient for a sca…

i am trying to calculate the correlation coefficient for a scatterplot with scipy, the thing is, i have a kind of complex dataset in an ndarray, and the basic syntax does not work for me…

here is my full code:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from matplotlib.pyplot import figure

figure(figsize=(12, 10), dpi=200)


import scipy.stats
from scipy.stats import t

plt.close('all')


data = np.array([
    [22.8, 14.4],
    [19.6, 3.6],
    [0.3, 16.6],
    [8.9, 7],
    [13.7, 13.4],
    [14.7, 1.5],
    [1.9, 0.4],
    [-1.8, 0.3],
    [-3, -15.3],
    [-5.9, -6.3],
    [-13.4, -15],
    [-5.7, -34.8],
    [-6.8, -12.9],

]) 

custom_annotations = ["K464E", "K472E", "R470E", "K464A", "M155E", "K472A", "M155A", "Q539A", "M155R", "D244A", "E247A", "E247R", "D244K"]
class_colours = ["r", "r", "r", "r", "r", "r", "g", "g", "b", "b", "b", "b", "b"]

for i, point in enumerate(data): 
    plt.scatter(point[0], point[1], marker='o', label=custom_annotations[i], c=class_colours[i], edgecolors='black', linewidths=1, alpha=0.75)
    plt.annotate(custom_annotations[i], (data[i,0], data[i,1]))

plt.xlabel(r'$Delta  V_{0.5}$  Apo wild-type mHCN2 (mV)', fontsize=10)
plt.ylabel(r'$Delta psi$  cAMP-bound wild-type mHCN2 (mV)', fontsize=10)
plt.title('$Delta psi$  cAMP-bound wild-type mHCN2 (HHU) vs Change in relative current (Jena)', fontsize=10)

plt.axvline(0, c=(.5, .5, .5), ls= '--')
plt.axhline(0, c=(.5, .5, .5), ls= '--')

scipy.stats.pearsonr(data[i,0], data[i,1])

plt.legend(ncol=3, loc=(1.04,0))
plt.show()

Answer

pearsonr works fine on your data

scipy.stats.pearsonr(data[:,0], data[:,1]) #change i to : to get the whole col.
# this returns (r_coeff, p_value)

You were passing two floats (namely values at the row i) as the error says, however corr takes two arrays, in your case the two columns.

how to calculate correlation coefficient for a scatter-plot in scipy

Advertisement

Answer