From 9726bb57bcb5f819af1ad3a0bb2bd22339271f4d Mon Sep 17 00:00:00 2001 From: laurentbarontini Date: Sun, 11 Jan 2026 20:25:13 +0100 Subject: [PATCH] 11.01.26 --- app.py | 315 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 255 insertions(+), 60 deletions(-) diff --git a/app.py b/app.py index bb379ac..71d035a 100644 --- a/app.py +++ b/app.py @@ -24,80 +24,275 @@ file_handler.setFormatter(logging.Formatter( "%(asctime)s - %(levelname)s - %(name)s - %(message)s" )) +import re +from datetime import datetime + class AHKParser: - lab = "AHK" + def __init__(self, text_content): + self.text = text_content + self.data = None + + def parse(self, lab="AHK"): + """Parse le texte et retourne un dictionnaire structuré""" + result = { + "lab": lab, + "report": self._extract_report_info(), + "contract": self._extract_contract_info(), + "parties": self._extract_parties_info(), + "shipment": self._extract_shipment_info(), + "weights": self._extract_weights_info() + } + self.data = result + return result + + def _extract_report_info(self): + """Extrait les informations du rapport""" + report_info = { + "reference": None, + "file_no": None, + "date": None + } + + # Recherche de la référence client + ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) + if ref_match: + report_info["reference"] = ref_match.group(1).strip() + + # Recherche du numéro de fichier AHK + file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text) + if file_no_match: + report_info["file_no"] = file_no_match.group(1).strip() + + # Recherche de la date du rapport + date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + if date_match: + report_info["date"] = date_match.group(1).strip() + + return report_info + + def _extract_contract_info(self): + """Extrait les informations du contrat""" + contract_info = { + "contract_no": None, + "invoice_no": None, + "lc_no": None, + "origin": None, + "commodity": None + } + + # Extraction de la référence client (peut servir comme numéro de contrat) + ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) + if ref_match: + ref_parts = ref_match.group(1).split('/') + if len(ref_parts) >= 2: + contract_info["contract_no"] = ref_parts[0].strip() + contract_info["invoice_no"] = ref_parts[1].strip() + + # Extraction de l'origine et de la marchandise + origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text) + if origin_match: + origin_text = origin_match.group(1).strip() + if "AUSTRALIAN" in origin_text.upper(): + contract_info["origin"] = "AUSTRALIA" + # La marchandise est généralement "RAW COTTON" + contract_info["commodity"] = "RAW COTTON" + + return contract_info + + def _extract_parties_info(self): + """Extrait les informations sur les parties""" + parties_info = { + "seller": None, + "buyer": None, + "carrier": None + } + + # Extraction du vendeur (Client) + seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text) + if seller_match: + parties_info["seller"] = seller_match.group(1).strip() + + # Extraction de l'acheteur (Buyer) + buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text) + if buyer_match: + parties_info["buyer"] = buyer_match.group(1).strip() + + # Extraction du transporteur (Vessel) + vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) + if vessel_match: + # On considère le nom du navire comme transporteur + parties_info["carrier"] = vessel_match.group(1).strip() + + return parties_info + + def _extract_shipment_info(self): + """Extrait les informations d'expédition""" + shipment_info = { + "vessel": None, + "bl_no": None, + "bl_date": None, + "port_loading": None, # Non spécifié dans le texte + "port_destination": None, + "arrival_date": None, + "weighing_place": None, # Non spécifié dans le texte + "weighing_method": None, + "bales": None + } + + # Extraction du navire + vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) + if vessel_match: + shipment_info["vessel"] = vessel_match.group(1).strip() + + # Extraction du numéro de connaissement + bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text) + if bl_no_match: + shipment_info["bl_no"] = bl_no_match.group(1).strip() + + # Extraction de la date du connaissement + bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + if bl_date_match: + shipment_info["bl_date"] = bl_date_match.group(1).strip() + + # Extraction du port de destination + dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text) + if dest_match: + shipment_info["port_destination"] = dest_match.group(1).strip() + + # Extraction de la date d'arrivée + arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + if arrival_match: + shipment_info["arrival_date"] = arrival_match.group(1).strip() + + # Extraction de la méthode de pesée + weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text) + if weighing_method_match: + shipment_info["weighing_method"] = weighing_method_match.group(1).strip() + + # Extraction du nombre de balles + bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text) + if bales_match: + shipment_info["bales"] = int(bales_match.group(1).strip()) + + return shipment_info + + def _extract_weights_info(self): + """Extrait les informations de poids""" + weights_info = { + "gross_landed_kg": None, + "tare_kg": None, + "net_landed_kg": None, + "invoice_net_kg": None, + "gain_loss_kg": None, + "gain_loss_percent": None + } + + # Extraction du poids brut débarqué + gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text) + if gross_landed_match: + weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip()) + + # Extraction du poids de tare + tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text) + if tare_match: + weights_info["tare_kg"] = float(tare_match.group(1).strip()) + + # Extraction du poids net débarqué + net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) + if net_landed_match: + weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip()) + + # Extraction du poids net facturé + invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) + if invoice_net_match: + weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip()) + + # Extraction de la perte en kg + loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text) + if loss_match: + weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip()) + + # Extraction du pourcentage de perte + percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text) + if percent_match: + weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip()) + + return weights_info + +# class AHKParser: +# lab = "AHK" - def _lines(self, text): - return [l.strip() for l in text.splitlines() if l.strip()] +# def _lines(self, text): +# return [l.strip() for l in text.splitlines() if l.strip()] - def _col_block(self, lines, labels, max_scan=30): - idx = [i for i,l in enumerate(lines) if l in labels] - if not idx: - return {} # << empêche le crash - start = max(idx) + 1 - vals = [] - for l in lines[start:start+max_scan]: - if l.startswith(":"): - v = l[1:].replace("kg","").strip() - vals.append(v) - if len(vals) == len(labels): - break - return dict(zip(labels, vals)) +# def _col_block(self, lines, labels, max_scan=30): +# idx = [i for i,l in enumerate(lines) if l in labels] +# if not idx: +# return {} # << empêche le crash +# start = max(idx) + 1 +# vals = [] +# for l in lines[start:start+max_scan]: +# if l.startswith(":"): +# v = l[1:].replace("kg","").strip() +# vals.append(v) +# if len(vals) == len(labels): +# break +# return dict(zip(labels, vals)) - def parse(self, text): - L = self._lines(text) - r = empty_weight_report("AHK") +# def parse(self, text): +# L = self._lines(text) +# r = empty_weight_report("AHK") - # report - r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text) - r["report"]["date"] = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text) +# # report +# r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text) +# r["report"]["date"] = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text) - # contract - r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text) - r["contract"]["commodity"] = "Raw Cotton" +# # contract +# r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text) +# r["contract"]["commodity"] = "Raw Cotton" - # buyer - r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text) +# # buyer +# r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text) - # shipment block 1 - ship1 = self._col_block(L, [ - "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination" - ]) +# # shipment block 1 +# ship1 = self._col_block(L, [ +# "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination" +# ]) - # shipment block 2 - ship2 = self._col_block(L, [ - "Growth","Arrival Date","First date of weighing", - "Last Date of Weighing","Weighing method","Tare" - ]) +# # shipment block 2 +# ship2 = self._col_block(L, [ +# "Growth","Arrival Date","First date of weighing", +# "Last Date of Weighing","Weighing method","Tare" +# ]) - r["shipment"]["bales"] = to_float(ship1.get("Total Bales")) - r["shipment"]["vessel"] = ship1.get("Vessel") - r["shipment"]["bl_no"] = ship1.get("B/L No.") - r["shipment"]["port_destination"] = ship1.get("Destination") - r["shipment"]["arrival_date"] = ship2.get("Arrival Date") - r["shipment"]["weighing_method"] = ship2.get("Weighing method") - r["contract"]["origin"] = ship2.get("Growth") +# r["shipment"]["bales"] = to_float(ship1.get("Total Bales")) +# r["shipment"]["vessel"] = ship1.get("Vessel") +# r["shipment"]["bl_no"] = ship1.get("B/L No.") +# r["shipment"]["port_destination"] = ship1.get("Destination") +# r["shipment"]["arrival_date"] = ship2.get("Arrival Date") +# r["shipment"]["weighing_method"] = ship2.get("Weighing method") +# r["contract"]["origin"] = ship2.get("Growth") - # invoice weights - inv = self._col_block(L, ["Bales","Gross","Tare","Net"]) - r["weights"]["invoice_net_kg"] = to_float(inv.get("Net")) +# # invoice weights +# inv = self._col_block(L, ["Bales","Gross","Tare","Net"]) +# r["weights"]["invoice_net_kg"] = to_float(inv.get("Net")) - # landed weights - land = self._col_block( - self._lines(section(text,"Bales Weighed","Outturn")), - ["Bales","Gross","Tare","Net"] - ) +# # landed weights +# land = self._col_block( +# self._lines(section(text,"Bales Weighed","Outturn")), +# ["Bales","Gross","Tare","Net"] +# ) - r["weights"]["gross_landed_kg"] = to_float(land.get("Gross")) - r["weights"]["tare_kg"] = to_float(land.get("Tare")) - r["weights"]["net_landed_kg"] = to_float(land.get("Net")) +# r["weights"]["gross_landed_kg"] = to_float(land.get("Gross")) +# r["weights"]["tare_kg"] = to_float(land.get("Tare")) +# r["weights"]["net_landed_kg"] = to_float(land.get("Net")) - # loss - loss = section(text,"LOSS","Invoice average") - r["weights"]["gain_loss_kg"] = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss)) - r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss)) +# # loss +# loss = section(text,"LOSS","Invoice average") +# r["weights"]["gain_loss_kg"] = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss)) +# r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss)) - return r +# return r class IntertekParser: lab="INTERTEK" @@ -616,7 +811,7 @@ def empty_weight_report(lab): "contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None}, "parties": {"seller": None, "buyer": None, "carrier": None}, "shipment": { - "vessel": None, "bl_no": None, "port_loading": None, + "vessel": None, "bl_no": None, "bl_date": None, "port_loading": None, "port_destination": None, "arrival_date": None, "weighing_place": None, "weighing_method": None, "bales": None