From ba853074d086ed0eacc3e0ee1eab498c80b53ec7 Mon Sep 17 00:00:00 2001 From: laurentbarontini Date: Sun, 11 Jan 2026 16:29:29 +0100 Subject: [PATCH] 11.01.26 --- app.py | 121 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 96 insertions(+), 25 deletions(-) diff --git a/app.py b/app.py index f20f966..7ced0cb 100644 --- a/app.py +++ b/app.py @@ -23,37 +23,108 @@ file_handler.setFormatter(logging.Formatter( "%(asctime)s - %(levelname)s - %(name)s - %(message)s" )) +# class AHKParser: +# lab="AHK" +# def parse(self,text): +# r=empty_weight_report("AHK") +# inv=section(text,"INVOICE WEIGHTS","Bales Weighed") +# land=section(text,"Bales Weighed","Outturn") +# loss=section(text,"LOSS","Invoice average") + +# r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text) +# r["report"]["date"]=extract("Produced On",text) + +# r["contract"]["invoice_no"]=extract("Client Reference",text) +# r["contract"]["origin"]=extract("Growth",text) +# r["contract"]["commodity"]="Raw Cotton" + +# r["parties"]["seller"]=extract("Client",text) +# r["parties"]["buyer"]=extract("Buyer",text) + +# r["shipment"]["vessel"]=extract("Vessel",text) +# r["shipment"]["bl_no"]=extract("B/L No",text) +# r["shipment"]["port_destination"]=extract("Destination",text) +# r["shipment"]["arrival_date"]=extract("Arrival Date",text) +# r["shipment"]["weighing_method"]=extract("Weighing method",text) +# r["shipment"]["bales"]=to_float(extract("Total Bales",text)) + +# r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land)) +# r["weights"]["tare_kg"]=to_float(extract("Tare",land)) +# r["weights"]["net_landed_kg"]=to_float(extract("Net",land)) +# r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv)) +# r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss)) +# r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss)) +# return r class AHKParser: - lab="AHK" - def parse(self,text): - r=empty_weight_report("AHK") - inv=section(text,"INVOICE WEIGHTS","Bales Weighed") - land=section(text,"Bales Weighed","Outturn") - loss=section(text,"LOSS","Invoice average") + lab = "AHK" - r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text) - r["report"]["date"]=extract("Produced On",text) + # ---------- helpers ---------- + def clean(self, t): + return " ".join(t.replace("\xa0", " ").split()) - r["contract"]["invoice_no"]=extract("Client Reference",text) - r["contract"]["origin"]=extract("Growth",text) - r["contract"]["commodity"]="Raw Cotton" + def find(self, pattern, text): + m = re.search(pattern, text, re.I) + return self.clean(m.group(1)) if m else None - r["parties"]["seller"]=extract("Client",text) - r["parties"]["buyer"]=extract("Buyer",text) + def block(self, text, labels): + lines = [self.clean(l) for l in text.splitlines() if self.clean(l)] + idx = [i for i,l in enumerate(lines) if l in labels] + values = [] - r["shipment"]["vessel"]=extract("Vessel",text) - r["shipment"]["bl_no"]=extract("B/L No",text) - r["shipment"]["port_destination"]=extract("Destination",text) - r["shipment"]["arrival_date"]=extract("Arrival Date",text) - r["shipment"]["weighing_method"]=extract("Weighing method",text) - r["shipment"]["bales"]=to_float(extract("Total Bales",text)) + for i in range(len(idx)): + start = idx[i] + end = idx[i+1] if i+1 < len(idx) else len(lines) + for j in range(start, end): + if lines[j].startswith(":"): + values.append(lines[j].lstrip(":").strip()) + break + return dict(zip(labels, values)) + + # ---------- parser ---------- + def parse(self, text): + r = empty_weight_report("AHK") + text = self.clean(text) + + # ---------- report ---------- + r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text) + r["report"]["date"] = self.find(r"Produced On\s*([0-9A-Za-z ]+)", text) + + # ---------- contract ---------- + r["contract"]["invoice_no"] = self.find(r"Client Reference:\s*([A-Z0-9\- /]+)", text) + r["contract"]["commodity"] = "Raw Cotton" + + # ---------- parties ---------- + r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text) + r["parties"]["seller"] = self.find(r"Client\s*Ref No\.\s*:\s*(.+)", text) + + # ---------- shipment block ---------- + ship = self.block(text, [ + "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination", + "Growth","Arrival Date","First date of weighing","Last Date of Weighing", + "Weighing method","Tare" + ]) + + r["shipment"]["bales"] = to_float(ship.get("Total Bales")) + r["shipment"]["vessel"] = ship.get("Vessel") + r["shipment"]["bl_no"] = ship.get("B/L No.") + r["shipment"]["port_destination"] = ship.get("Destination") + r["shipment"]["arrival_date"] = ship.get("Arrival Date") + r["shipment"]["weighing_method"] = ship.get("Weighing method") + r["contract"]["origin"] = ship.get("Growth") + + # ---------- weights ---------- + inv = self.block(text, ["Bales","Gross","Tare","Net"]) + land = self.block(section(text,"Bales Weighed","Outturn"),["Bales","Gross","Tare","Net"]) + loss = section(text,"LOSS","Invoice average") + + r["weights"]["gross_landed_kg"] = to_float(land.get("Gross")) + r["weights"]["tare_kg"] = to_float(land.get("Tare")) + r["weights"]["net_landed_kg"] = to_float(land.get("Net")) + r["weights"]["invoice_net_kg"] = to_float(inv.get("Net")) + + r["weights"]["gain_loss_kg"] = to_float(self.find(r"(-?\d+\.?\d*)\s*kg", loss)) + r["weights"]["gain_loss_percent"] = to_float(self.find(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss)) - r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land)) - r["weights"]["tare_kg"]=to_float(extract("Tare",land)) - r["weights"]["net_landed_kg"]=to_float(extract("Net",land)) - r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv)) - r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss)) - r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss)) return r class IntertekParser: