I successfully get the data from this table from THRIVEN :
But as you can see, at the Net%
column, those values negative/positive are determined by some CSS (which I believed, and I couldn’t find them where they are located).
How can I extract those data and put them into my Excel as negative/positive numbers? Below is my current code :
JavaScript
x
51
51
1
lwb = load_workbook(filename='THRIVEN.xlsx')
2
lws = lwb['THRI']
3
4
klseLink = 'https://www.klsescreener.com/v2/stocks/view/7889'
5
klseParser = BeautifulSoup(klseLink.text, 'html.parser')
6
7
currentQuarterReportTable = klseParser.find('table', {'class': 'financial_reports table table-hover table-sm table-theme'}).findAll('tr', limit=5)
8
currentQuarterReportSelectedRow = []
9
10
print("")
11
print("==================== CURRENT QUARTER REPORT =====================")
12
print("")
13
14
try:
15
for currentQuarterReportRow in currentQuarterReportTable[1:]:
16
navigatedCurrentQuarterReportColumn = [td.text.strip() for td in currentQuarterReportRow.findAll("td")]
17
18
navigatedCurrentQuarterReportColumn.pop(0)
19
navigatedCurrentQuarterReportColumn.pop(0)
20
navigatedCurrentQuarterReportColumn.pop(0)
21
navigatedCurrentQuarterReportColumn.pop(4)
22
navigatedCurrentQuarterReportColumn.pop(6)
23
currentQuarterReportSelectedRow.append(navigatedCurrentQuarterReportColumn)
24
25
currentQuarterReportLimitedTable = pd.DataFrame(currentQuarterReportSelectedRow, columns=['Revenue', 'Profit/Loss', 'Quarter', 'Quarter Date', 'Announced Date', 'Net'])
26
currentQuarterReportLimitedTable = currentQuarterReportLimitedTable.rename(index={0: '1', 1: '2', 2: '3', 3: '4'})
27
print(currentQuarterReportLimitedTable)
28
29
i = 0
30
for currentQuarterReportRow in currentQuarterReportTable[1:]:
31
i += 1
32
selectedColumn = [td.text.strip() for td in currentQuarterReportRow.findAll("td")]
33
quarter = selectedColumn[5]
34
quarterDate = selectedColumn[6]
35
announcedDate = selectedColumn[8]
36
revenue = (selectedColumn[3].replace("k", "")).replace(",", "")
37
profitloss = (selectedColumn[4].replace("k", "")).replace(",", "")
38
net = selectedColumn[9].replace("%", "")
39
40
lws.cell(18 + int(i), 3).value = int(quarter)
41
lws.cell(18 + int(i), 5).value = quarterDate
42
lws.cell(18 + int(i), 7).value = announcedDate
43
lws.cell(18 + int(i), 9).value = int(revenue)
44
lws.cell(18 + int(i), 11).value = int(profitloss)
45
lws.cell(18 + int(i), 13).value = float(net)
46
47
except IndexError:
48
print("No Quarterly Report from KLScreener")
49
50
lwb.save('THRIVEN.xlsx')
51
Giving me :
Note that the Revenue
and Profit/Loss
colors are conditioned in Excel itself.
EDIT :
Finally I can achieve this by :
JavaScript
1
7
1
for currentQuarterReportRow in currentQuarterReportTable[1:]: #currentQuarterReportRow in currentQuarterReportTable[1:]:
2
currentQuarterReportRow = currentQuarterReportRow.find_all('td')[-2]
3
if currentQuarterReportRow.find('span', {'class':'btn-sm btn-danger'}):
4
print(float(currentQuarterReportRow.get_text().replace('%', '')) * -1)
5
else:
6
print(float(currentQuarterReportRow.get_text().replace('%', '')))
7
Thanks to @HedgeHog suggesting the solutions! :D
Advertisement
Answer
Check the class
of the button
to differentiate positive or negative value:
JavaScript
1
5
1
if net.select_one('.btn-danger'):
2
print(float(net.get_text().replace('%',''))*-1)
3
else:
4
print(float(net.get_text().replace('%','')))
5
Example
JavaScript
1
41
41
1
from bs4 import BeautifulSoup
2
3
html='''
4
<tr class="table-alternate">
5
<td class="number">-1.20</td>
6
<td class="number">0.000</td>
7
<td class="number">0.3400</td>
8
<td class="number">34,780k</td>
9
<td class="number">-6,537k</td>
10
<td class="text-center">4</td>
11
<td><span style="white-space: nowrap">2020-12-31</span></td>
12
<td><span style="white-space: nowrap">31 Dec, 2020</span></td>
13
<td><span style="white-space: nowrap">2021-02-25</span></td>
14
<td class="number"><span class="btn-sm btn-danger">20%</span></td>
15
<td><a href="/v2/stocks/financial-report/7889/2020-12-31" target="_blank">View</a> </td>
16
</tr>
17
<tr class="table-alternate">
18
<td class="number">1.27</td>
19
<td class="number">0.000</td>
20
<td class="number">0.3500</td>
21
<td class="number">49,244k</td>
22
<td class="number">6,959k</td>
23
<td class="text-center">3</td>
24
<td><span style="white-space: nowrap">2020-09-30</span></td>
25
<td><span style="white-space: nowrap">31 Dec, 2020</span></td>
26
<td><span style="white-space: nowrap">2020-11-20</span></td>
27
<td class="number"><span class="btn-sm btn-success">35%</span></td>
28
<td><a href="/v2/stocks/financial-report/7889/2020-09-30" target="_blank">View</a> </td>
29
</tr>
30
31
'''
32
soup = BeautifulSoup(html,'html.parser')
33
34
for currentQuarterReportRow in soup.find_all('tr'):
35
net = currentQuarterReportRow.find_all('td')[-2]
36
if net.select_one('.btn-danger'):
37
print(float(net.get_text().replace('%',''))*-1)
38
else:
39
print(float(net.get_text().replace('%','')))
40
41
Output
JavaScript
1
3
1
-20.0
2
35.0
3