I am new to Python. I have some coding experirence, though it is quite old.
I am working on a existing script that places a data frame (a row of data at a time based on the last datetime stamp data was written into the workbook) into one worksheet of the .xlsx workbook. The script works fine.
I created a VBA script to provide graphing of data from another worksheet where the data calculations are done and place that new data (based on the latest date/time timestamp of the data) into an existing chart on another worksheet. I tested the VBA script out and it works fine as well.
Then I used the orginal Python script to write new data into the .xlsm workbook.
This works, though if I try to open the workbook after the Pythoin script has completed to run the macro to graph the data, Excel gives an error that the file is corrupt and not formatted correctly.
I can change the exension of the workbook from .xlsm to .xlsx, open the workbook with out issue and see the new data that was added, and of course the macro is not preserved.
I spent some time reseaching this issue, though have not found a solution that does not, from my view, require some change to the orignal script.
Here is the Python script in use:
I am working on a existing script that places a data frame (a row of data at a time based on the last datetime stamp data was written into the workbook) into one worksheet of the .xlsx workbook. The script works fine.
I created a VBA script to provide graphing of data from another worksheet where the data calculations are done and place that new data (based on the latest date/time timestamp of the data) into an existing chart on another worksheet. I tested the VBA script out and it works fine as well.
Then I used the orginal Python script to write new data into the .xlsm workbook.
This works, though if I try to open the workbook after the Pythoin script has completed to run the macro to graph the data, Excel gives an error that the file is corrupt and not formatted correctly.
I can change the exension of the workbook from .xlsm to .xlsx, open the workbook with out issue and see the new data that was added, and of course the macro is not preserved.
I spent some time reseaching this issue, though have not found a solution that does not, from my view, require some change to the orignal script.

Here is the Python script in use:
import os
import pandas as pd
from dateutil import parser
import datetime
import numpy
import sigfig
from openpyxl import Workbook
import numpy as np
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.message import EmailMessage
import FTPPuller
# Setting this variable to True will populate the calculated totals to the middle sheet in the Excel
# NOT advised if keeping the current formlas on that sheet uncomment the line below and set to True to re-write the entire calcualtion worksheet
overwrite_calculated_BL_sheet = False
write_excel = True # leave set to True to allow writing to Excel workbook
send_alert_emails = False # set to True to send emails
pull_from_ftp = False # set to True to pull data from FTP server
# Upper and lower threshold TDR alert
upper_bound = 0.2
lower_bound = -0.2
# CHANGE VARIABLE TO SET EARLIEST DATE ***********************
# YEAR, MONTH, DAY
date_cutoff = pd.Timestamp(2023,6,1)
# root_sensor_folder_name = "Sensor Data" # 17jul25 Mhyatt, added for testing of script, comment out for test version usage, uncomment for live version usage.
root_sensor_folder_name = "Sensor Data - TEST" # 17jul25 Mhyatt, added for testing of script, comment out for live version usage, uncomment for test version fo script.
# For alerts outside of thresholds - uncomment line below to send emails
# email_recipients = [''] #
def GetMidnightRecords(row):
if str(parser.parse(row['TIMESTAMP']).time()) == "00:00:00":
# print("TRUE")
return row
def ParseLatestDate(row):
value = None
try:
value = parser.parse(str(row))
except:
print(f"Cannot parse {row}")
return value
def send_email(to, subject, message):
print("Sending email...")
msg = EmailMessage()
msg['From'] = '[email protected]'
msg['Subject'] = subject
msg['To'] = ", ".join(email_recipients)
msg.set_content(message)
s = smtplib.SMTP(host='', port="587")
s.starttls()
s.login('')
s.send_message(msg)
s.quit()
print("Email sent.")
def ProcessTDRSensor(sensor_root_folder, raw_folder, output_folder, dat_file_name, output_file_name, baseline_file):
raw_file = os.path.join(raw_folder, dat_file_name)
if pull_from_ftp:
FTPPuller.GetBouldinFTPFile(raw_folder, dat_file_name)
output_file = os.path.join(output_folder, output_file_name)
# Baseline file
tdr_bl_df = pd.read_csv(baseline_file, header = [0,1])
# Data file
tdr_df = pd.read_csv(raw_file, skiprows=1, usecols=range(2062), low_memory=False)
# Drop the Units, etc.. rows
tdr_df = tdr_df.drop(index=[0,1], axis=1)
# Drop unneeded columns
if 'TDR2' in raw_file:
print("Dropping columns - TDR2")
tdr_df.drop(["C2_Averaging","C2_PropVelocity","C2_nPoints","C2_Cable_m","C2_Window_m","C2_Probe_m","C2_ProbeoSet","C2_Mult","C2_oSet","C2_NR_Hz","C2_FilterLevel","C2_Laa"], axis=1, inplace=True)
print(f"TDR2 now has {len(tdr_df.columns)} columns.")
# Filter because of data errors...
tdr_df['TIMESTAMP'] = pd.to_datetime(tdr_df['TIMESTAMP'])
tdr_df = tdr_df[tdr_df['TIMESTAMP'] > date_cutoff]
log_file_path = f'{sensor_root_folder}\{dat_file_name.split(".")[0]}_Timestamp_Log.csv'
# Create a .CSV with just the dates from the file as a record.
current_file_dates = pd.to_datetime(tdr_df['TIMESTAMP']).drop_duplicates()
current_file_dates = (current_file_dates[pd.notnull(current_file_dates)])
# If there is not already a log file, simply spew out the current data.
if not os.path.exists(log_file_path):
current_file_dates.to_csv(log_file_path, index=False, header=False)
# This is what will be put in the import folder. If there are no existing dates, just take everything :)
# new_data = tdr_df
# Dummy for variable w/ no dates
existing_dates = pd.DataFrame([np.nan])
else:
existing_dates = pd.read_csv(log_file_path, header=None)
# 0 is what we get for a "header" when header=None
existing_dates[0] = pd.to_datetime(existing_dates[0])
# Exclude any rows that are in the log as having already been imported
tdr_df = tdr_df[~tdr_df["TIMESTAMP"].isin(list(existing_dates[0]))]
# Now, we need to append these new dates to the existing log CSV
new_dates = current_file_dates[~current_file_dates.isin(list(existing_dates[0]))]
# print(new_dates)
new_dates.to_csv(log_file_path, index=False, header=False, mode="a")
# print("Dataframe after filtering...")
# print(tdr_df)
# Threshold check on ALL ROWS -------------------------------------------------------------------
# Store any thresholds/alerts in a list and send all in a single e-mail
thresholds_exceeded = list()
thresholds_were_exceeded = False
print(f"Checking new data for threshold exceedance: {len(tdr_df.index)} rows...")
for n, row in enumerate(tdr_df.iterrows()):
# Limit to not-yet-processed rows
current_row_date = row[1][0].date() # TIMESTAMP column/value
# Report progress every 100 rows
if n % 100 == 0:
print(f"(Threshold Check) - Processing row: {n}")
try:
new_data = row
# Iterate over each column w/ a value...
for i, value in enumerate(row[1]):
# Skip the Timestamp & Record columns
if i < 2:
continue
raw_value = value
baseline_value = tdr_bl_df.loc[0][i]
new_value = float(raw_value) - float(baseline_value)
##### ALERT CHECK ######
if new_value < lower_bound or new_value > upper_bound:
thresholds_were_exceeded = True
# This will log the TIMESTAMP and the Value
threshold_text = f"***** TDR Threshold exceeded: {row[1][0]}\t{tdr_df.columns[i]}\t{new_value} ******"
# print(threshold_text)
thresholds_exceeded.append(threshold_text)
except Exception as argument:
print(f"WARNING: Error processing row {n}, column {i}:\n{value}\n{argument}")
if thresholds_were_exceeded:
print("The following thresholds were exceeded:")
thresholds_text = '\n'.join(str(x) for x in thresholds_exceeded)
print(thresholds_text)
if send_alert_emails:
send_email(email_recipients, 'TDR Threshold Exceeded!', thresholds_text)
# ------------------------------------------------------------------------------------------------
# Only take the midnight readings...
#tdr1_df = tdr1_df[tdr1_df['TIMESTAMP'].str.contains("00:00:00")]
tdr_df = tdr_df[tdr_df['TIMESTAMP'].dt.time == datetime.time(0)]
# The first 5 rows appear to not read numeric, while the rest do...so we'll misting effect this...
numeric_columns = tdr_df.columns.drop(['TIMESTAMP'])
tdr_df[numeric_columns] = tdr_df[numeric_columns].apply(pd.to_numeric)
# baseline_series = tdr_bl_df.loc[0].drop(['TIMESTAMP','Unnamed: 1_level_0'])
# print(baseline_series)
# print(tdr1_bl_df.columns.values[1][1])
# Make a copy of the dataframe to store the calculated values
calculated_df = tdr_df.iloc[:0].copy()
# Get the existing data from the Excel sheet so we know the latest timestmp, etc..
existing_raw = pd.read_excel(output_file, sheet_name="Raw Data")
existing_dates = existing_raw['TIMESTAMP'].apply(lambda x: ParseLatestDate(x))
existing_dates = existing_dates[pd.notnull(existing_dates)]
# latest_date = max(existing_dates).date()
latest_date = max(existing_dates)
print(f"Latest date in current file: {latest_date}")
# Compare each of these rows to baseline
for n, row in enumerate(tdr_df.iterrows()):
# Limit to not-yet-processed rows
current_row_date = row[1][0].date() # TIMESTAMP column/value
if current_row_date <= latest_date:
print(f"Not checking values for row {n}: {current_row_date}, already in Excel sheet...")
continue
print(f"Processing row: {n}")
new_data = row
for i, value in enumerate(row[1]):
# Skip the Timestamp & Record columns
if i < 2:
continue
raw_value = value
baseline_value = tdr_bl_df.loc[0][i]
new_value = float(raw_value) - float(baseline_value)
# Set the value and round!
new_data[1][i] = sigfig.round(new_value, sigfigs = 4)
# convert to list
new_row = list()
for i, value in enumerate(new_data[1]):
new_row.append(value)
# Above results in each value as another row, we want them all to be column values
# Reshape thus...
reshaped_row = numpy.reshape(new_row,(1,2050))
# Make the row a dataframe (concat below must have either dataframes or series')
df_to_append = pd.DataFrame(reshaped_row, columns=tdr_df.columns)
# Add the row to the output dataframe
calculated_df = pd.concat([calculated_df, df_to_append])
# Get the Baseline info without using multi-index, so that we can just insert it as part of a data frame
tdr_bl_final_df = pd.read_csv(baseline_file)
# Get the "FT" header into the raw data...
# tdr_df['TIMESTAMP'] = tdr_df['TIMESTAMP'].astype('string')
tdr_df['RECORD'] = tdr_df['RECORD'].astype('string')
tdr_bl_final_df['TIMESTAMP'] = tdr_bl_final_df['TIMESTAMP'].astype('string')
# print(tdr1_bl_df.dtypes)
# print(tdr1_df.dtypes)
# print(tdr1_bl_final_df.dtypes)
tdr_bl_final_df.columns = tdr_df.columns
tdr_df['RECORD'] = None
# Filters the dataframe for only dates that don't yet exist in the Excel sheet
tdr_df = tdr_df[tdr_df['TIMESTAMP'] > latest_date]
# raw_data = pd.concat([tdr_bl_final_df, tdr_df], ignore_index=True)
raw_data = pd.concat([existing_raw, tdr_df], ignore_index=True)
# print(raw_data)
# EXCEL OUTPUT
if write_excel:
print("Writing to Excel file...")
with pd.ExcelWriter(output_file, mode="a", engine="openpyxl", if_sheet_exists="replace") as writer:
# print(raw_data)
raw_data.to_excel(writer, sheet_name="Raw Data", index=False)
pass
if overwrite_calculated_BL_sheet:
calculated_df.to_excel(writer, sheet_name="Raw Data-BL", index=False)
if __name__ == '__main__':
# ProcessTDRSensor(f'C:\\{root_sensor_folder_name}\\TDR1\\', f'C:\\{root_sensor_folder_name}\\TDR1\\Raw\\', f'C:\\{root_sensor_folder_name}\\TDR1\\Output\\', 'BITDRCable1.Dat', 'BI TDR1 Calcs.xlsx', f'C:\{root_sensor_folder_name}\TDR1\TDR1_Baseline.csv') Commented out MHyatt 5dec23
# changed the TDR file name to be BI TDR2 Calcs.xlsm mhyatt 30sept25
ProcessTDRSensor(f'C:\\{root_sensor_folder_name}\\TDR2\\', f'C:\\{root_sensor_folder_name}\\TDR2\\Raw\\', f'C:\\{root_sensor_folder_name}\\TDR2\\Output\\', 'BITDRCable2.Dat', '[color=#C0392B]BI TDR2 Calcs.xlsm[/color]', f'C:\{root_sensor_folder_name}\TDR2\TDR2_Baseline.csv')
print("Done!")
# input('Press Enter to continue...')The file name in red is where I changed the workbook extension to .xlsm (thinking it was a fairly easy soltuion since everthing else worked.)
Larz60+ write Oct-06-2025, 07:42 PM:
added bbcode tags.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
added bbcode tags.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
