Hi everyone,
I am new to Python, and just completed some codes as below that take forever to run. It seemed working if I only run the first 10 rows in the main DataFrame, I hope someone can assist me in improving the codes, so that it can run much faster. Many thanks
I am new to Python, and just completed some codes as below that take forever to run. It seemed working if I only run the first 10 rows in the main DataFrame, I hope someone can assist me in improving the codes, so that it can run much faster. Many thanks
from datetime import timedelta
import numpy as np
import pandas as pd
# Get data from the given excel file
filepath = r'C:\Users\User\Desktop\Power BI\All.xlsx'
df_CoC = pd.read_excel(filepath, usecols=['ID, 'Is there time loss?', 'MC Capacity Date From',
'MC Capacity Date To'], sheet_name='CoC') #18000 rows
df_WRS = pd.read_excel(filepath, usecols=['ID', 'Date From', 'Date To'], sheet_name='WRS') #22000 rows
df_Open = pd.read_excel(filepath, sheet_name='Open') #2500 rows
Count_Open = df_Open['ID'].count()
df_Open['CoC_MissingWRS'] = None
df_Open['WRS_MissingCoC'] = None
####### Fill new 'CoC_MissingWRS' column in open query ########
for i in range(0, Count_Open):
if i > 90: break
imn = df_Open.loc[i, 'Injury Management: Injury Management Number']
#Fill WRS date array
arrWRSDates = np.array([])
filtered_df = df_WRS[df_WRS['ID'] == imn]
if not filtered_df.empty:
Count_WRS = filtered_df['ID'].count()-1
for w in range(0, Count_WRS):
#Date_From is always available
Date_From = filtered_df.iloc[w]['Date From'].date()
if filtered_df.iloc[w]['Date To'] is None: Date_To = Date_From
else: Date_To = filtered_df.iloc[w]['Date To'].date()
if Date_To < Date_From: Date_To = Date_From
adate = Date_From
while adate <= Date_To:
if adate not in arrWRSDates: arrWRSDates = np.append(arrWRSDates, adate)
adate += timedelta(days=1)
# Fill Timeloss & all CoC date array
arrTLCoCDates = np.array([])
arrAllCoCDates = np.array([])
filtered_df = df_CoC[df_CoC['ID'] == imn]
if not filtered_df.empty:
Count_CoC= filtered_df['ID'].count() - 1
for c in range(0, Count_CoC):
Date_From = filtered_df.iloc[c]['MC Capacity Date From'].date()
if filtered_df.iloc[c]['MC Capacity Date To'] is None: Date_To = Date_From
else: Date_To = filtered_df.iloc[c]['MC Capacity Date To'].date()
if Date_To < Date_From or Date_To is None: Date_To = Date_From
adate = Date_From
while adate <= Date_To:
if df_CoC.iloc[c]['Is there time loss?'] == 'Yes':
if adate not in arrTLCoCDates: arrTLCoCDates = np.append(arrTLCoCDates, adate)
else:
if adate not in arrAllCoCDates: arrAllCoCDatesCoCDates = np.append(arrAllCoCDates, adate)
adate += timedelta(days=1)
##### Check if CoC missing WRS #####
bMissingWRS = 'No'
if len(arrTLCoCDates) > 0:
adate = min(arrTLCoCDates)
while adate <= max(arrTLCoCDates) and bMissingWRS == 'No':
if adate not in arrWRSDates and adate.weekday() < 5: bMissingWRS = 'Yes'
df_Open.loc[i, 'CoC_MissingWRS'] = bMissingWRS
##### Check if WRS missing CoC #####
bMissingCoC = 'No'
if len(arrWRSDates) > 0:
adate = min(arrWRSDates)
while adate <= max(arrWRSDates) and bMissingCoC == 'No':
if adate not in arrAllCoCDates and adate.weekday() < 5: bMissingCoC = 'Yes'
df_Open.loc[i, 'WRS_MissingCoC'] = bMissingCoC
