personal finance control engine
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

331 lines
12 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"insert_query = \"INSERT IGNORE INTO default.TRANSACTION (ID, TDATE, ACCOUNTID, MEMO, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )\""
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def find_owner(queried_name: str):\n",
" from mysql.connector import connect, Error\n",
"\n",
" query = \"SELECT * FROM OWNER\"\n",
" result = []\n",
"\n",
" try:\n",
" with connect(\n",
" host=\"localhost\",\n",
" user=\"root\",\n",
" password=\"pleasehashapasswordomg\",\n",
" database=\"default\",\n",
" ) as connection:\n",
" # print(\"CONNECTED!\", connection)\n",
" with connection.cursor() as cursor:\n",
" cursor.execute(query)\n",
" query_result = cursor.fetchall()\n",
"\n",
" for item in query_result:\n",
" if item[1].lower() == queried_name:\n",
" result.append(item[0])\n",
" result.append(item[1])\n",
" # print(result)\n",
" # print(\"DONE!\")\n",
" except Error as e:\n",
" print(e)\n",
" finally:\n",
" connection.close()\n",
"\n",
" return result if result else None\n",
" \n",
"find_owner(\"daniel\")[0]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\n",
"# OUROCARD_VISA_INFINITE-Ago_24.txt\n",
"def create_lists(file_name):\n",
" import re\n",
"\n",
" # Open the text file\n",
" # with open(\"OUROCARD_VISA_INFINITE-Próxima_Fatura.txt\", \"r\", encoding=\"latin\") as file:\n",
" with open(file_name or \"OUROCARD_VISA_INFINITE-Ago_24.txt\", \"r\", encoding=\"latin\") as file:\n",
" # Read the contents of the file\n",
" contents = file.readlines()\n",
"\n",
" # Define the regex patterns\n",
" owner_pattern = r\"\\d\\s?-\\s?([A-Z]+)\"\n",
" line_pattern = r\"\\d{2}\\.\\d{2}\\.\\d{4}.{23}.{14}.{2}\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n",
" payment_pattern = (r\"\\d{2}\\.\\d{2}\\.\\d{4}PGTO.*200211(\\s*-?\\d*\\.?\\d+,\\d{2})(\\s*\\d+,\\d{2})\")\n",
" partial_invoice_line_pattern = r\"\\d{2}\\/\\d{2}.{27}.{16}.{2}\\s+\\s*-?\\d*\\.?\\d+,\\d{2}\\s*\\d+,\\d{2}\"\n",
"\n",
" # Lists\n",
" current_list = None\n",
" owner_list = []\n",
" result = {}\n",
"\n",
" # silly_counter = 1\n",
" isPartial = True\n",
"\n",
" # Find Owners\n",
" try:\n",
" for line in contents:\n",
" line = line.strip()\n",
"\n",
" found_owners = re.findall(owner_pattern, line)\n",
" if found_owners:\n",
" for owner_name in found_owners:\n",
" list_name = f\"list_{owner_name.lower()}\"\n",
" owner_list.append(list_name)\n",
" result[list_name] = {}\n",
" result[list_name][\"owner_name\"] = owner_name\n",
" result[list_name][\"owner_id\"] = find_owner(owner_name.lower())[0]\n",
" except:\n",
" print(\"Error during owner search\")\n",
"\n",
"\n",
" # Treat and create transaction lists\n",
" try:\n",
" for line in contents:\n",
" line = line.strip()\n",
"\n",
" if re.match(owner_pattern, line):\n",
" found_owner = re.match(owner_pattern, line)\n",
" owner_list = f\"list_{found_owner.group(1).lower()}\"\n",
" current_list = owner_list\n",
" result[current_list][\"tlist\"] = []\n",
" else:\n",
" if re.match(payment_pattern, line):\n",
" result[current_list][\"tlist\"].append(line)\n",
" elif re.match(line_pattern, line) or re.match(partial_invoice_line_pattern, line):\n",
" result[current_list][\"tlist\"].append(line)\n",
" except:\n",
" print(\"Error during Transaction Lists creation\")\n",
"\n",
" # Check file pattern\n",
" sample = result[current_list][\"tlist\"][0]\n",
" if re.match(line_pattern, sample):\n",
" isPartial = False\n",
"\n",
" for listObj in result:\n",
" result[listObj][\"isPartial\"] = isPartial\n",
"\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def build_insert(input_dict: dict, account: int):\n",
" from datetime import date, datetime\n",
" from decimal import Decimal\n",
" import re\n",
" import hashlib\n",
"\n",
" insert_bulk = []\n",
"\n",
" # RegEx Patterns\n",
" line_group_pattern = r\"(?P<day>\\d{2})\\.(?P<month>\\d{2})\\.(?P<year>\\d{4})(?:(?P<p_memo>.+PARC (?P<p_nr>\\d+.)\\/(?P<p_tt>\\d+)\\s.{12})|(?P<memo>.{37}))(?P<country>.{2})(?P<outflow>\\s*-?\\d*\\.?\\d+,\\d{2})(?P<inflow>\\s*\\d*\\.?\\d+,\\d{2})\"\n",
" partial_invoice_group_pattern = r\"(?P<day>\\d{2})\\/(?P<month>\\d{2})(?:(?P<p_memo>.+PARC (?P<p_nr>\\d{2})\\/(?P<p_tt>\\d{2}).{15})|(?P<memo>.{43}))(?P<country>.{2})(?P<outflow>\\s+\\s*-?\\d*\\.?\\d+,\\d{2})(?P<inflow>\\s*\\d+,\\d{2})\"\n",
" payment_pattern = r\"(?P<day>\\d{2})\\.(?P<month>\\d{2})\\.(?P<year>\\d{4})(?P<memo>PGTO DEBITO CONTA).*200211(?P<inflow>\\s*-?\\d*\\.?\\d+,\\d{2})(?P<outflow>\\s*\\d+,\\d{2})\"\n",
"\n",
" for key in input_dict:\n",
" if input_dict[key][\"isPartial\"]:\n",
" pattern_to_use = partial_invoice_group_pattern\n",
" else:\n",
" pattern_to_use = line_group_pattern\n",
"\n",
" for item in input_dict[key][\"tlist\"]:\n",
" # check if it is an invoice payment\n",
" matches = re.match(payment_pattern, item)\n",
" if matches:\n",
" tTdate = str(\n",
" date(\n",
" int(matches.group(\"year\")),\n",
" int(matches.group(\"month\")),\n",
" int(matches.group(\"day\")),\n",
" )\n",
" )\n",
" tAccount = account\n",
" tMemo = matches.group(\"memo\")\n",
" tCountry = None\n",
" tOutflow = \"0.00\"\n",
" tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\").replace(\"-\", \"\")\n",
" tOwner = input_dict[key][\"owner_id\"]\n",
" tInstallmentNr = None\n",
" tInstallmentTt = None\n",
" tCreated = str(datetime.now(tz=None))\n",
" tUpdated = None\n",
" else:\n",
" matches = re.match(pattern_to_use, item)\n",
" tTdate = str(\n",
" date(\n",
" # partial files will not have the year data on transactions\n",
" int(matches.group(\"year\")) if pattern_to_use == line_group_pattern else datetime.now().year,\n",
" int(matches.group(\"month\")),\n",
" int(matches.group(\"day\")),\n",
" )\n",
" )\n",
" \n",
" tAccount = account\n",
"\n",
" tMemo = matches.group(\"p_memo\") if matches.group(\"p_memo\") else matches.group(\"memo\")\n",
" tInstallmentNr = int(matches.group(\"p_nr\")) if matches.group(\"p_nr\") else None\n",
" tInstallmentTt = int(matches.group(\"p_tt\")) if matches.group(\"p_tt\") else None\n",
"\n",
" tCountry = matches.group(\"country\")\n",
" tOutflow = matches.group(\"outflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n",
" tInflow = matches.group(\"inflow\").strip().replace(\".\", \"\").replace(\",\", \".\")\n",
" tOwner = input_dict[key][\"owner_id\"]\n",
"\n",
" tCreated = str(datetime.now(tz=None))\n",
" tUpdated = None\n",
"\n",
" preHash = tTdate + tMemo + tOutflow + tInflow\n",
" tId = hashlib.sha256(preHash.encode()).hexdigest()\n",
"\n",
" insert_bulk.append(\n",
" (\n",
" tId,\n",
" tTdate,\n",
" tAccount,\n",
" tMemo,\n",
" tCountry,\n",
" tOutflow,\n",
" tInflow,\n",
" tOwner,\n",
" tInstallmentNr,\n",
" tInstallmentTt,\n",
" tCreated,\n",
" tUpdated,\n",
" )\n",
" )\n",
"\n",
" return insert_bulk"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def db_insert(insert_bulk: list[tuple]):\n",
" from mysql.connector import connect, Error\n",
"\n",
" try:\n",
" with connect(\n",
" host=\"localhost\",\n",
" user=\"root\",\n",
" password=\"pleasehashapasswordomg\",\n",
" database=\"default\",\n",
" ) as connection:\n",
" print(\"CONNECTED!\", connection)\n",
" with connection.cursor() as cursor:\n",
" cursor.executemany(insert_query, insert_bulk)\n",
" connection.commit()\n",
" print(\"DONE!\")\n",
" except Error as e:\n",
" print(e)\n",
" finally:\n",
" connection.close()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def executor():\n",
"\n",
" import glob\n",
"\n",
" matched_files = glob.glob(\"./documents/OUROCARD*.txt\")\n",
"\n",
" try:\n",
" for file_name in matched_files:\n",
" db_insert(build_insert(create_lists(file_name), 1))\n",
" except:\n",
" print(\"executor(): Error\")\n",
" \n",
" print(\"EXECUTOR COMPLETED.\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CONNECTED! <mysql.connector.connection_cext.CMySQLConnection object at 0x72ad687912e0>\n",
"DONE!\n",
"CONNECTED! <mysql.connector.connection_cext.CMySQLConnection object at 0x72ad68792b70>\n",
"DONE!\n",
"CONNECTED! <mysql.connector.connection_cext.CMySQLConnection object at 0x72ad68793050>\n",
"DONE!\n",
"CONNECTED! <mysql.connector.connection_cext.CMySQLConnection object at 0x72ad687928a0>\n",
"DONE!\n",
"CONNECTED! <mysql.connector.connection_cext.CMySQLConnection object at 0x72ad68792bd0>\n",
"DONE!\n",
"EXECUTOR COMPLETED.\n"
]
}
],
"source": [
"executor()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}