Apr-19-2023, 08:37 PM
I am using the following code to retrieve costs from an Azure subscription. The 'Cost (USD)' data is returning a ValueError when the cost is in the thousands. Can anyone provide insight as to how this can be resolved?
from azure.mgmt.costmanagement import CostManagementClient
from azure.mgmt.costmanagement.models import QueryAggregation,QueryGrouping,QueryDataset,QueryDefinition,QueryTimePeriod,QueryFilter,QueryComparisonExpression
from azure.mgmt.resource import ResourceManagementClient
from azure.identity import DefaultAzureCredential
from tabulate import tabulate
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Font, Border, Side, PatternFill, Color
import json
import pandas as pd
import datetime as dt
import calendar
import numpy as np
billing_month = input("Enter billing month (MM): ")
billing_year = input("Enter billing year (YYYY): ")
thedate = dt.datetime.combine(dt.date.today(), dt.time())
first = thedate.replace(day=1)
last = thedate.replace(day = calendar.monthrange(thedate.year, thedate.month)[1])
# Instantiate Azure credentials and set the subscription ID
credential = DefaultAzureCredential()
subscription_id = '<enter_subscription_id>'
# Set start and end dates based on user input
start_date = f"{billing_year}-{billing_month}-01T00:00:00Z"
end_date = f"{billing_year}-{billing_month}-{calendar.monthrange(int(billing_year), int(billing_month))[1]}T23:59:59Z"
# Set scope of the query to the subscription
scope = '/subscriptions/{}'.format(subscription_id)
# Instantiate Resource Management client to get a list of resource groups
client = ResourceManagementClient(credential, subscription_id)
# Query Cost Management API for resource costs for all resource groups in the subscription
cmgmtc = CostManagementClient(credential = credential)
# List of columns available in the Cost Management API
"""
'ResourceGroup','ResourceGroupName','ResourceLocation',
'ConsumedService','ResourceType','ResourceId',
'MeterId','BillingMonth','MeterCategory',
'MeterSubcategory','Meter','AccountName',
'DepartmentName','SubscriptionId','SubscriptionName',
'ServiceName','ServiceTier','EnrollmentAccountName',
'BillingAccountId','ResourceGuid','BillingPeriod',
'InvoiceNumber','ChargeType','PublisherType',
'ReservationId','ReservationName','Frequency',
'PartNumber','CostAllocationRuleName','MarkupRuleName',
'PricingModel','BenefitId','BenefitName',''
"""
# Define query template
query_template = (
QueryDefinition(
type = "ActualCost"
#, timeframe = "ThisMonth"
, timeframe= "Custom"
, time_period= QueryTimePeriod(from_property = start_date, to = end_date)
, dataset =
QueryDataset(
granularity = "Monthly"
, aggregation = {
"totalCost": QueryAggregation(name = "Cost", function = "Sum")
,"totalCostUSD": QueryAggregation(name = "CostUSD", function = "Sum")
}
, grouping = [
QueryGrouping(name = "ResourceGroupName", type = "Dimension")
,QueryGrouping(name = "ResourceId" , type = "Dimension")
,QueryGrouping(name = "ResourceType" , type = "Dimension")
,QueryGrouping(name = "SubscriptionName" , type = "Dimension")
]
)
)
)
# Replace the resource group name in the query template with "*"
# to get costs for all resource groups
replaced_query = (
query_template.deserialize(
json.loads(
json.dumps(
query_template.serialize()
).replace('RESOURCE_GROUP','')
)
)
)
replaced_query.dataset.grouping = [
QueryGrouping(name="ResourceGroupName", type="Dimension")
,QueryGrouping(name="SubscriptionName", type="Dimension")
]
replaced_query.dataset.filter = None
# Get results of the query
result = cmgmtc.query.usage( scope = scope, parameters = replaced_query)
# Convert results to a pandas DataFrame
data = pd.DataFrame(result.rows, columns = list(map(lambda col: col.name, result.columns)))
# Sort the DataFrame by the total cost column
data_sorted = data.sort_values(by='CostUSD' ,ascending = False)
data_filtered = data_sorted[["SubscriptionName", "ResourceGroupName", "BillingMonth", "CostUSD"]]
data_filtered = data_filtered.rename(columns={'ResourceGroupName': 'Resource Group', 'BillingMonth': 'Billing Month', 'CostUSD': 'Cost (USD)', 'SubscriptionName': 'Subscription'})
data_filtered['Cost (USD)'] = data_filtered['Cost (USD)'].apply(lambda x: '${:,.2f}'.format(x))
data_filtered["Billing Month"] = pd.to_datetime(data_filtered["Billing Month"])
data_filtered["Billing Month"] = data_filtered["Billing Month"].dt.strftime("%Y-%m")
data_filtered["Subscription"] = data_filtered["Subscription"].apply(lambda x: x.split("(")[0].strip())
data_filtered = data_filtered[["Subscription", "Resource Group", "Billing Month", "Cost (USD)"]]
# Export the data to an Excel file
excel_file_path = 'C:\python\costs.xlsx'
subscription_name = data_filtered["Subscription"] = data_filtered["Subscription"].apply(lambda x: x.split("(")[0].strip()).iloc[0]
billing_month = data_filtered["Billing Month"].unique()[0]
# Create a new workbook and select the sheet
workbook = Workbook()
# Create the title sheet
sheet = workbook.create_sheet(title="Summary")
sheet['A1'] = "Report Title: Subscription Cost Summary"
sheet['A2'] = f"Billing Month: {billing_month} "
sheet['A3'] = f"Report Date: {dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
# Create the subscription sheet
ws = workbook.create_sheet(title=subscription_name)
# Write headers to sheet
headers = ["Subscription", "Resource Group", "Billing Period", "Cost (USD)"]
for col_num, header in enumerate(headers,1):
cell = ws.cell(row=1, column=col_num)
cell.value = header
cell.font = Font(bold=True)
cell.border = Border(bottom=Side(border_style='thin'))
if col_num <= 4:
cell.fill = PatternFill(start_color='BFBFBF', end_color='BFBFBF', fill_type='solid')
# Add the data to the sheet
data_rows = data_filtered.to_records(index=False)
total_cost = 0.0
for row_index, row in enumerate(data_rows, 2):
for col_index, cell_value in enumerate(row, 1):
cell = ws.cell(row=row_index, column=col_index)
cell.value = cell_value
cell.font = Font(size=9)
if isinstance(cell_value, str) and cell_value.startswith("$"):
total_cost += float(cell_value[1:])
# Write total cost to the last row of sheet
last_row_index = len(data_rows) + 1
total_cost_cell = ws.cell(row=last_row_index, column=4)
total_cost_cell.value = total_cost
total_cost_cell.font = Font(size=9, bold=True)
# Autosize the columns
for col in ws.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = (max_length + 2)
ws.column_dimensions[column].width = adjusted_width
# Remove blank sheet
workbook.remove(workbook["Sheet"])
# Save the workbook
workbook.save(excel_file_path)
# Display output to screen
pd.set_option('display.max_rows', data_filtered.shape[0]+1)
# Format and display output
subscription_name = data_filtered['Subscription'].iloc[0]
billing_period = data_filtered['Billing Month'].iloc[0]
table = tabulate(data_filtered, headers='keys', tablefmt='plain', showindex=False)
table = table.split('\n')
table.insert(1, '-' * len(table[0]))
table = '\n'.join(table)
print(f"Total Monthly Costs for Current Billing Period: {billing_period}")
print(f"Subscription Name: {subscription_name}\n")
print(table)
