May-22-2024, 05:21 PM
In this python code I am attempting to extract files from a FTP folder and then search through them for an invoice#. I have the command window open when running and keep getting stuck on the part where the code is trying to strip the date out of the file name, I get the error "Invalid Date Format" and it states it is skipping that file. The folder contains a ton of files and I really only need to look at the last 30 days worth of files. We have another python code that is doing something similar but with a different file name layout and I used that code to try to replicate it in this instance. This is what the files look like on the ftp folder, attached. This is the beginning of the code that I am getting stuck on will see about the search part once I figure this out;
import os
import datetime
from ftplib import FTP
import csv
from tkinter import *
from tkinter import messagebox
import tkinter as tk
# FTP and path configurations
outputPath = r'\\xxxxxx\xxxxxxxxxx\SanMar Invoice'
cacheDir = 'C:/temp/SanMarRoi/cache'
cacheSize = 60
ftp_site = "xxxxxxxxx"
ftp_username = "xxxxxxxx"
ftp_password = "xxxxxxxx"
# Ensure directories exist
if not os.path.exists(cacheDir):
os.makedirs(cacheDir)
if not os.path.exists(outputPath):
os.makedirs(outputPath)
rows = []
invoice = set()
ponumber = ""
# Function to fetch files from FTP and sync cache
def sync_cache():
ftp = FTP(ftp_site)
ftp.login(user=ftp_username, passwd=ftp_password)
ftp.cwd("Outbound")
filenames = []
ftp.retrlines("LIST", lambda line: filenames.append(line.split()[-1]))
print("Files retrieved from FTP server:")
for file in filenames:
print(file)
# Get current date
now = datetime.datetime.now()
valid_filenames = []
for file in filenames:
try:
# Extract the date from the filename
file_date_str = file.split('-')[-1].split('.')[0] # Get the last part of the filename and remove extension
file_date = datetime.datetime.strptime(file_date_str, "%m-%d-%y")
valid_filenames.append((file, file_date))
except (ValueError, IndexError):
# If parsing fails, skip the file
print(f"Skipping file {file}: Invalid date format")
# Filter to include only files from the last 30 days
recent_files = [file for file in valid_filenames if (now - file[1]).days <= 30]
print("Recent files from the last 30 days:")
for file in recent_files:
print(file[0])
recent_files.sort(key=lambda filename: filename[1], reverse=True)
print("Syncing cache. Please wait...")
for i, (filename, _) in enumerate(recent_files):
if i == cacheSize:
break
local_path = os.path.join(cacheDir, filename)
if os.path.exists(local_path):
print(f"File already in cache: {filename}")
continue
with open(local_path, "w") as cacheFile:
ftp.retrbinary(f"RETR {filename}", lambda data: cacheFile.write(data.decode("utf-8")))
print(f"Downloaded and cached file: {filename}")
print("Cache sync complete")
return [file[0] for file in recent_files]
