import os from dotenv import load_dotenv load_dotenv() insert_query = "INSERT INTO default.TRANSACTION (TDATE, ACCOUNTID, MEMO, CITY, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )" input_file = os.getenv('INPUT_FILE') def create_lists(input_file: str): import re with open(input_file, "r", encoding="latin") as file: contents = file.readlines() # REGEX owner_pattern = r"\d\s*-\s*(\w+)" line_pattern = r"\d{2}\.\d{2}\.\d{4}.{23}.{14}.{2}\s*-?\d*\.?\d+,\d{2}\s*\d+,\d{2}" payment_pattern = (r"\d{2}\.\d{2}\.\d{4}PGTO.*200211(\s*-?\d*\.?\d+,\d{2})(\s*\d+,\d{2})") # Lists current_list = None owner_list = [] result = {} silly_counter = 1 # Find Owners for line in contents: line = line.strip() found_owners = re.findall(owner_pattern, line) if found_owners: for owner_name in found_owners: list_name = f"list_{owner_name.lower()}" owner_list.append(list_name) result[list_name] = {} result[list_name]["owner_name"] = owner_name result[list_name]["owner_id"] = silly_counter silly_counter = silly_counter + 1 for line in contents: line = line.strip() if re.match(owner_pattern, line): found_owner = re.match(owner_pattern, line) owner_list = f"list_{found_owner.group(1).lower()}" current_list = owner_list result[current_list]["tlist"] = [] else: if re.match(payment_pattern, line): result[current_list]["tlist"].append(line) elif re.match(line_pattern, line): result[current_list]["tlist"].append(line) return result def build_insert(input_dict: dict, account: int): from datetime import date, datetime import re insert_bulk = [] line_group_pattern = r"(\d{2})\.(\d{2})\.(\d{4})((.+PARC (\d+.)\/(\d+))(\s.{12})|(.{23})(.{14}))(.{2})(\s*-?\d*\.?\d+,\d{2})(\s*\d*\.?\d+,\d{2})" payment_pattern = r"(\d{2})\.(\d{2})\.(\d{4})(PGTO DEBITO CONTA).*200211(\s*-?\d*\.?\d+,\d{2})(\s*\d+,\d{2})" for key in input_dict: for item in input_dict[key]["tlist"]: # * check for payment matches = re.match(payment_pattern, item) if matches: tTdate = str( date( int(matches.group(3)), int(matches.group(2)), int(matches.group(1)), ) ) tAccount = account tMemo = matches.group(4) tCity = None tCountry = None tOutflow = None tInflow = matches.group(5).strip().replace(".", "").replace(",", ".") tOwner = input_dict[key]["owner_id"] tInstallmentNr = None tInstallmentTt = None tCreated = str(datetime.now(tz=None)) tUpdated = None else: matches = re.match(line_group_pattern, item) tTdate = str( date( int(matches.group(3)), int(matches.group(2)), int(matches.group(1)), ) ) tAccount = account # * check for Installments if matches.group(5): tMemo = matches.group(5) tCity = matches.group(8) tInstallmentNr = int(matches.group(6)) tInstallmentTt = int(matches.group(7)) else: tMemo = matches.group(9) tCity = matches.group(10) tInstallmentNr = 1 tInstallmentTt = None tCountry = matches.group(11) tOutflow = matches.group(12).strip().replace(".", "").replace(",", ".") tInflow = matches.group(13).strip().replace(".", "").replace(",", ".") tOwner = input_dict[key]["owner_id"] tCreated = str(datetime.now(tz=None)) tUpdated = None insert_bulk.append( ( tTdate, tAccount, tMemo, tCity, tCountry, tOutflow, tInflow, tOwner, tInstallmentNr, tInstallmentTt, tCreated, tUpdated, ) ) return insert_bulk def db_insert(insert_bulk: list[tuple]): from mysql.connector import connect, Error try: with connect( host="localhost", user="root", password="pleasehashapasswordomg", database="default", ) as connection: print("CONNECTED!", connection) with connection.cursor() as cursor: cursor.executemany(insert_query, insert_bulk) connection.commit() print("DONE!") except Error as e: print(e) finally: connection.close() db_insert(build_insert(create_lists(), 1))