Sep-27-2023, 12:02 AM
I am getting html that I want to decode. If I do it with an example it works but not with my pandas dataframe. Any suggestions?
#!/usr/bin/env python
# coding: utf-8
# import statements
import requests
import pandas as pd
import html
# constants
url = "https://chartexp1.sha.maryland.gov/CHARTExportClientService/getDMSMapDataJSON.do"
# getting response
response = requests.request("GET", url).json()
# converting to dataframe
df = pd.DataFrame(response['data'])
#adding new column/converting msgHTML Encoded to decoded
df['decodedHtml'] = html.unescape(df['msgHTML'])
# saving dataframe to csv
df.to_csv('output/response_python.csv')
##TESTING ONLY##
myHtml = "<body><h1> How to use html.unescape() in Python </h1></body>"
encodedHtml = html.escape(myHtml)
print("Encoded HTML: ", encodedHtml)
decodedHtml = html.unescape(encodedHtml)
print("Decoded HTML: ", decodedHtml)
print(html.unescape('© 2023'))
