Jul-12-2021, 10:51 PM
I'm trying to parse a flight tracker web page. But when I try to get the values from the 'Course' tab, I'm only getting the ←,→ ,↓ ,↑. Its not reading the others, ↗,↙
import urllib3
from bs4 import BeautifulSoup
import regex
url = f'https://flightaware.com/live/flight/ETH3626/history/20210705/1400Z/HAAB/VHHH/tracklog'
req = urllib3.PoolManager()
res = req.request('GET', url)
soup = BeautifulSoup(res.data, 'lxml')
contents = soup.find_all('td', attrs={'align':'right'})
for content in contents:
content = str(content)
kts = regex.search(r'<td align="right">\d+</td>',content)
kts = content.replace('<td align="right">', '').replace('</td>', '')
course = regex.search(r'<td align="right"><span>[\u2190-\u2199]\s\d+\W</span></td>', content)
#course = content.replace('<span>>">', '').replace('</span>', '')
'''course regex only find up,down,left,right not unicodes 2196-2198'''
print(course)
