From 3bf842ae0d52503ef3e516e557a20af113278f76 Mon Sep 17 00:00:00 2001 From: laurentbarontini Date: Sun, 11 Jan 2026 16:46:46 +0100 Subject: [PATCH] 11.01.26 --- app.py | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index ca10339..8c09de6 100644 --- a/app.py +++ b/app.py @@ -58,21 +58,30 @@ file_handler.setFormatter(logging.Formatter( class AHKParser: lab = "AHK" - def grab(self, text, labels): + def extract_table(self, text, headers): lines = [l.strip() for l in text.splitlines() if l.strip()] - idx = [i for i,l in enumerate(lines) if l in labels] - if not idx: - return {} + out = {} + for h in headers: + for i,l in enumerate(lines): + if l == h: + for j in range(i+1, i+8): + if j < len(lines) and lines[j].startswith(":"): + out[h] = lines[j][1:].strip() + break + return out - values = [] - start = idx[-1] + 1 - for l in lines[start:]: - if l.startswith(":"): - values.append(l[1:].strip()) - if len(values) == len(labels): - break - - return dict(zip(labels, values)) + def extract_weights(self, text): + lines = [l.strip() for l in text.splitlines() if l.strip()] + res = {} + for i,l in enumerate(lines): + if l == "Bales Weighed": + headers = ["Bales","Gross","Tare","Net"] + for h in headers: + for j in range(i, i+20): + if j < len(lines) and lines[j].startswith(":"): + res[h] = lines[j][1:].replace("kg","").strip() + break + return res def parse(self, text): r = empty_weight_report("AHK") @@ -85,14 +94,14 @@ class AHKParser: r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text) r["contract"]["commodity"] = "Raw Cotton" - # parties + # buyer r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text) - # shipment block - ship = self.grab(text, [ + # shipment tables + ship = self.extract_table(text, [ "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination" ]) - ship2 = self.grab(text, [ + ship2 = self.extract_table(text, [ "Growth","Arrival Date","First date of weighing", "Last Date of Weighing","Weighing method","Tare" ]) @@ -106,8 +115,8 @@ class AHKParser: r["contract"]["origin"] = ship2.get("Growth") # weights - inv = self.grab(text, ["Bales","Gross","Tare","Net"]) - land = self.grab(section(text,"Bales Weighed","Outturn"),["Bales","Gross","Tare","Net"]) + inv = self.extract_table(text, ["Bales","Gross","Tare","Net"]) + land = self.extract_weights(text) r["weights"]["invoice_net_kg"] = to_float(inv.get("Net")) r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))