Browse Source

updates

master
F2256342 - Daniel de Oliveira Carvalho 1 year ago
parent
commit
30e7203490
  1. 48
      etl.py
  2. 970
      lab.ipynb
  3. 6
      robopato.sql

48
etl.py

@ -3,7 +3,7 @@ from dotenv import load_dotenv
load_dotenv()
insert_query = "INSERT INTO default.TRANSACTION (TDATE, ACCOUNTID, MEMO, CITY, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )"
insert_query = "INSERT IGNORE INTO default.TRANSACTION (TDATE, ACCOUNTID, MEMO, CITY, COUNTRY, OUTFLOW, INFLOW, OWNERID, INSTALLMENT_NR, INSTALLMENT_TT, CREATED, UPDATED) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )"
input_file = os.getenv("INPUT_FILE")
@ -79,6 +79,7 @@ def create_lists():
def build_insert(input_dict: dict, account: int):
from datetime import date, datetime
import re
import hashlib
insert_bulk = []
@ -107,14 +108,8 @@ def build_insert(input_dict: dict, account: int):
tAccount = account
tMemo = matches.group("memo")
tCountry = None
tOutflow = None
tInflow = (
matches.group("inflow")
.strip()
.replace(".", "")
.replace(",", ".")
.replace("-", "")
)
tOutflow = "0.00"
tInflow = matches.group("inflow").strip().replace(".", "").replace(",", ".").replace("-", "")
tOwner = input_dict[key]["owner_id"]
tInstallmentNr = None
tInstallmentTt = None
@ -125,43 +120,32 @@ def build_insert(input_dict: dict, account: int):
tTdate = str(
date(
# partial files will not have the year data on transactions
(
int(matches.group("year"))
if pattern_to_use == line_group_pattern
else datetime.now().year
),
int(matches.group("year")) if pattern_to_use == line_group_pattern else datetime.now().year,
int(matches.group("month")),
int(matches.group("day")),
)
)
tAccount = account
tMemo = (
matches.group("p_memo")
if matches.group("p_memo")
else matches.group("memo")
)
tInstallmentNr = (
int(matches.group("p_nr")) if matches.group("p_nr") else None
)
tInstallmentTt = (
int(matches.group("p_tt")) if matches.group("p_tt") else None
)
tMemo = matches.group("p_memo") if matches.group("p_memo") else matches.group("memo")
tInstallmentNr = int(matches.group("p_nr")) if matches.group("p_nr") else None
tInstallmentTt = int(matches.group("p_tt")) if matches.group("p_tt") else None
tCountry = matches.group("country")
tOutflow = (
matches.group("outflow").strip().replace(".", "").replace(",", ".")
)
tInflow = (
matches.group("inflow").strip().replace(".", "").replace(",", ".")
)
tOutflow = matches.group("outflow").strip().replace(".", "").replace(",", ".")
tInflow = matches.group("inflow").strip().replace(".", "").replace(",", ".")
tOwner = input_dict[key]["owner_id"]
tCreated = str(datetime.now(tz=None))
tUpdated = None
preHash = tTdate + tMemo + tOutflow + tInflow
tId = hashlib.sha256(preHash.encode()).hexdigest()
insert_bulk.append(
(
tId,
tTdate,
tAccount,
tMemo,

970
lab.ipynb

File diff suppressed because one or more lines are too long

6
robopato.sql

@ -6,7 +6,7 @@ CREATE TABLE `ACCOUNTS` (
);
CREATE TABLE `TRANSACTION` (
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`ID` char(64) PRIMARY KEY NOT NULL,
`TDATE` date NOT NULL,
`ACCOUNTID` integer NOT NULL,
`MEMO` varchar(50) NOT NULL,
@ -52,7 +52,7 @@ CREATE TABLE `SUBCATEGORY` (
CREATE TABLE `CATEGORIZED_TRANSACTIONS` (
`ID` integer PRIMARY KEY NOT NULL AUTO_INCREMENT,
`ACCOUNTID` integer,
`TRANSACTIONID` integer,
`TRANSACTIONID` char(64),
`PAYEEID` integer,
`CATEGORYID` integer,
`SUBCATEGORYID` integer,
@ -72,4 +72,4 @@ ALTER TABLE `CATEGORY` ADD FOREIGN KEY (`ID`) REFERENCES `CATEGORIZED_TRANSACTIO
ALTER TABLE `SUBCATEGORY` ADD FOREIGN KEY (`ID`) REFERENCES `CATEGORIZED_TRANSACTIONS` (`SUBCATEGORYID`);
ALTER TABLE `PAYEE` ADD FOREIGN KEY (`ID`) REFERENCES `CATEGORIZED_TRANSACTIONS` (`PAYEEID`);
ALTER TABLE `PAYEE` ADD FOREIGN KEY (`ID`) REFERENCES `CATEGORIZED_TRANSACTIONS` (`PAYEEID`);
Loading…
Cancel
Save