i am trying to calculate the correlation coefficient for a scatterplot with scipy, the thing is, i have a kind of complex dataset in an ndarray, and the basic syntax does not work for me…
here is my full code:
JavaScript
x
51
51
1
import pandas as pd
2
import matplotlib.pyplot as plt
3
import numpy as np
4
import matplotlib.patches as mpatches
5
from matplotlib.pyplot import figure
6
7
figure(figsize=(12, 10), dpi=200)
8
9
10
import scipy.stats
11
from scipy.stats import t
12
13
plt.close('all')
14
15
16
data = np.array([
17
[22.8, 14.4],
18
[19.6, 3.6],
19
[0.3, 16.6],
20
[8.9, 7],
21
[13.7, 13.4],
22
[14.7, 1.5],
23
[1.9, 0.4],
24
[-1.8, 0.3],
25
[-3, -15.3],
26
[-5.9, -6.3],
27
[-13.4, -15],
28
[-5.7, -34.8],
29
[-6.8, -12.9],
30
31
])
32
33
custom_annotations = ["K464E", "K472E", "R470E", "K464A", "M155E", "K472A", "M155A", "Q539A", "M155R", "D244A", "E247A", "E247R", "D244K"]
34
class_colours = ["r", "r", "r", "r", "r", "r", "g", "g", "b", "b", "b", "b", "b"]
35
36
for i, point in enumerate(data):
37
plt.scatter(point[0], point[1], marker='o', label=custom_annotations[i], c=class_colours[i], edgecolors='black', linewidths=1, alpha=0.75)
38
plt.annotate(custom_annotations[i], (data[i,0], data[i,1]))
39
40
plt.xlabel(r'$Delta V_{0.5}$ Apo wild-type mHCN2 (mV)', fontsize=10)
41
plt.ylabel(r'$Delta psi$ cAMP-bound wild-type mHCN2 (mV)', fontsize=10)
42
plt.title('$Delta psi$ cAMP-bound wild-type mHCN2 (HHU) vs Change in relative current (Jena)', fontsize=10)
43
44
plt.axvline(0, c=(.5, .5, .5), ls= '--')
45
plt.axhline(0, c=(.5, .5, .5), ls= '--')
46
47
scipy.stats.pearsonr(data[i,0], data[i,1])
48
49
plt.legend(ncol=3, loc=(1.04,0))
50
plt.show()
51
Advertisement
Answer
pearsonr
works fine on your data
JavaScript
1
3
1
scipy.stats.pearsonr(data[:,0], data[:,1]) #change i to : to get the whole col.
2
# this returns (r_coeff, p_value)
3
You were passing two floats (namely values at the row i
) as the error says, however corr
takes two arrays, in your case the two columns.