From 9726bb57bcb5f819af1ad3a0bb2bd22339271f4d Mon Sep 17 00:00:00 2001
From: laurentbarontini <l.barontini@open-squared.ch>
Date: Sun, 11 Jan 2026 20:25:13 +0100
Subject: [PATCH] 11.01.26

---
 app.py | 315 ++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 255 insertions(+), 60 deletions(-)

diff --git a/app.py b/app.py
index bb379ac..71d035a 100644
--- a/app.py
+++ b/app.py
@@ -24,80 +24,275 @@ file_handler.setFormatter(logging.Formatter(
     "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
 ))
 
+import re
+from datetime import datetime
+
 class AHKParser:
-    lab = "AHK"
+    def __init__(self, text_content):
+        self.text = text_content
+        self.data = None
+    
+    def parse(self, lab="AHK"):
+        """Parse le texte et retourne un dictionnaire structuré"""
+        result = {
+            "lab": lab,
+            "report": self._extract_report_info(),
+            "contract": self._extract_contract_info(),
+            "parties": self._extract_parties_info(),
+            "shipment": self._extract_shipment_info(),
+            "weights": self._extract_weights_info()
+        }
+        self.data = result
+        return result
+    
+    def _extract_report_info(self):
+        """Extrait les informations du rapport"""
+        report_info = {
+            "reference": None,
+            "file_no": None,
+            "date": None
+        }
+        
+        # Recherche de la référence client
+        ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
+        if ref_match:
+            report_info["reference"] = ref_match.group(1).strip()
+        
+        # Recherche du numéro de fichier AHK
+        file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text)
+        if file_no_match:
+            report_info["file_no"] = file_no_match.group(1).strip()
+        
+        # Recherche de la date du rapport
+        date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
+        if date_match:
+            report_info["date"] = date_match.group(1).strip()
+        
+        return report_info
+    
+    def _extract_contract_info(self):
+        """Extrait les informations du contrat"""
+        contract_info = {
+            "contract_no": None,
+            "invoice_no": None,
+            "lc_no": None,
+            "origin": None,
+            "commodity": None
+        }
+        
+        # Extraction de la référence client (peut servir comme numéro de contrat)
+        ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
+        if ref_match:
+            ref_parts = ref_match.group(1).split('/')
+            if len(ref_parts) >= 2:
+                contract_info["contract_no"] = ref_parts[0].strip()
+                contract_info["invoice_no"] = ref_parts[1].strip()
+        
+        # Extraction de l'origine et de la marchandise
+        origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text)
+        if origin_match:
+            origin_text = origin_match.group(1).strip()
+            if "AUSTRALIAN" in origin_text.upper():
+                contract_info["origin"] = "AUSTRALIA"
+                # La marchandise est généralement "RAW COTTON"
+                contract_info["commodity"] = "RAW COTTON"
+        
+        return contract_info
+    
+    def _extract_parties_info(self):
+        """Extrait les informations sur les parties"""
+        parties_info = {
+            "seller": None,
+            "buyer": None,
+            "carrier": None
+        }
+        
+        # Extraction du vendeur (Client)
+        seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text)
+        if seller_match:
+            parties_info["seller"] = seller_match.group(1).strip()
+        
+        # Extraction de l'acheteur (Buyer)
+        buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text)
+        if buyer_match:
+            parties_info["buyer"] = buyer_match.group(1).strip()
+        
+        # Extraction du transporteur (Vessel)
+        vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
+        if vessel_match:
+            # On considère le nom du navire comme transporteur
+            parties_info["carrier"] = vessel_match.group(1).strip()
+        
+        return parties_info
+    
+    def _extract_shipment_info(self):
+        """Extrait les informations d'expédition"""
+        shipment_info = {
+            "vessel": None,
+            "bl_no": None,
+            "bl_date": None,
+            "port_loading": None,  # Non spécifié dans le texte
+            "port_destination": None,
+            "arrival_date": None,
+            "weighing_place": None,  # Non spécifié dans le texte
+            "weighing_method": None,
+            "bales": None
+        }
+        
+        # Extraction du navire
+        vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
+        if vessel_match:
+            shipment_info["vessel"] = vessel_match.group(1).strip()
+        
+        # Extraction du numéro de connaissement
+        bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text)
+        if bl_no_match:
+            shipment_info["bl_no"] = bl_no_match.group(1).strip()
+        
+        # Extraction de la date du connaissement
+        bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
+        if bl_date_match:
+            shipment_info["bl_date"] = bl_date_match.group(1).strip()
+        
+        # Extraction du port de destination
+        dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text)
+        if dest_match:
+            shipment_info["port_destination"] = dest_match.group(1).strip()
+        
+        # Extraction de la date d'arrivée
+        arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
+        if arrival_match:
+            shipment_info["arrival_date"] = arrival_match.group(1).strip()
+        
+        # Extraction de la méthode de pesée
+        weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text)
+        if weighing_method_match:
+            shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
+        
+        # Extraction du nombre de balles
+        bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text)
+        if bales_match:
+            shipment_info["bales"] = int(bales_match.group(1).strip())
+        
+        return shipment_info
+    
+    def _extract_weights_info(self):
+        """Extrait les informations de poids"""
+        weights_info = {
+            "gross_landed_kg": None,
+            "tare_kg": None,
+            "net_landed_kg": None,
+            "invoice_net_kg": None,
+            "gain_loss_kg": None,
+            "gain_loss_percent": None
+        }
+        
+        # Extraction du poids brut débarqué
+        gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text)
+        if gross_landed_match:
+            weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
+        
+        # Extraction du poids de tare
+        tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text)
+        if tare_match:
+            weights_info["tare_kg"] = float(tare_match.group(1).strip())
+        
+        # Extraction du poids net débarqué
+        net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
+        if net_landed_match:
+            weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
+        
+        # Extraction du poids net facturé
+        invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
+        if invoice_net_match:
+            weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
+        
+        # Extraction de la perte en kg
+        loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text)
+        if loss_match:
+            weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
+        
+        # Extraction du pourcentage de perte
+        percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text)
+        if percent_match:
+            weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
+        
+        return weights_info
+    
+# class AHKParser:
+#     lab = "AHK"
 
-    def _lines(self, text):
-        return [l.strip() for l in text.splitlines() if l.strip()]
+#     def _lines(self, text):
+#         return [l.strip() for l in text.splitlines() if l.strip()]
 
-    def _col_block(self, lines, labels, max_scan=30):
-        idx = [i for i,l in enumerate(lines) if l in labels]
-        if not idx:
-            return {}   # << empêche le crash
-        start = max(idx) + 1
-        vals = []
-        for l in lines[start:start+max_scan]:
-            if l.startswith(":"):
-                v = l[1:].replace("kg","").strip()
-                vals.append(v)
-            if len(vals) == len(labels):
-                break
-        return dict(zip(labels, vals))
+#     def _col_block(self, lines, labels, max_scan=30):
+#         idx = [i for i,l in enumerate(lines) if l in labels]
+#         if not idx:
+#             return {}   # << empêche le crash
+#         start = max(idx) + 1
+#         vals = []
+#         for l in lines[start:start+max_scan]:
+#             if l.startswith(":"):
+#                 v = l[1:].replace("kg","").strip()
+#                 vals.append(v)
+#             if len(vals) == len(labels):
+#                 break
+#         return dict(zip(labels, vals))
 
-    def parse(self, text):
-        L = self._lines(text)
-        r = empty_weight_report("AHK")
+#     def parse(self, text):
+#         L = self._lines(text)
+#         r = empty_weight_report("AHK")
 
-        # report
-        r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
-        r["report"]["date"]      = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
+#         # report
+#         r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
+#         r["report"]["date"]      = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
 
-        # contract
-        r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
-        r["contract"]["commodity"]  = "Raw Cotton"
+#         # contract
+#         r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
+#         r["contract"]["commodity"]  = "Raw Cotton"
 
-        # buyer
-        r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
+#         # buyer
+#         r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
 
-        # shipment block 1
-        ship1 = self._col_block(L, [
-            "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
-        ])
+#         # shipment block 1
+#         ship1 = self._col_block(L, [
+#             "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
+#         ])
 
-        # shipment block 2
-        ship2 = self._col_block(L, [
-            "Growth","Arrival Date","First date of weighing",
-            "Last Date of Weighing","Weighing method","Tare"
-        ])
+#         # shipment block 2
+#         ship2 = self._col_block(L, [
+#             "Growth","Arrival Date","First date of weighing",
+#             "Last Date of Weighing","Weighing method","Tare"
+#         ])
 
-        r["shipment"]["bales"]            = to_float(ship1.get("Total Bales"))
-        r["shipment"]["vessel"]           = ship1.get("Vessel")
-        r["shipment"]["bl_no"]            = ship1.get("B/L No.")
-        r["shipment"]["port_destination"] = ship1.get("Destination")
-        r["shipment"]["arrival_date"]     = ship2.get("Arrival Date")
-        r["shipment"]["weighing_method"]  = ship2.get("Weighing method")
-        r["contract"]["origin"]           = ship2.get("Growth")
+#         r["shipment"]["bales"]            = to_float(ship1.get("Total Bales"))
+#         r["shipment"]["vessel"]           = ship1.get("Vessel")
+#         r["shipment"]["bl_no"]            = ship1.get("B/L No.")
+#         r["shipment"]["port_destination"] = ship1.get("Destination")
+#         r["shipment"]["arrival_date"]     = ship2.get("Arrival Date")
+#         r["shipment"]["weighing_method"]  = ship2.get("Weighing method")
+#         r["contract"]["origin"]           = ship2.get("Growth")
 
-        # invoice weights
-        inv = self._col_block(L, ["Bales","Gross","Tare","Net"])
-        r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
+#         # invoice weights
+#         inv = self._col_block(L, ["Bales","Gross","Tare","Net"])
+#         r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
 
-        # landed weights
-        land = self._col_block(
-            self._lines(section(text,"Bales Weighed","Outturn")),
-            ["Bales","Gross","Tare","Net"]
-        )
+#         # landed weights
+#         land = self._col_block(
+#             self._lines(section(text,"Bales Weighed","Outturn")),
+#             ["Bales","Gross","Tare","Net"]
+#         )
 
-        r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
-        r["weights"]["tare_kg"]         = to_float(land.get("Tare"))
-        r["weights"]["net_landed_kg"]   = to_float(land.get("Net"))
+#         r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
+#         r["weights"]["tare_kg"]         = to_float(land.get("Tare"))
+#         r["weights"]["net_landed_kg"]   = to_float(land.get("Net"))
 
-        # loss
-        loss = section(text,"LOSS","Invoice average")
-        r["weights"]["gain_loss_kg"]      = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
-        r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
+#         # loss
+#         loss = section(text,"LOSS","Invoice average")
+#         r["weights"]["gain_loss_kg"]      = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
+#         r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
 
-        return r
+#         return r
 
 class IntertekParser:
     lab="INTERTEK"
@@ -616,7 +811,7 @@ def empty_weight_report(lab):
         "contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None},
         "parties": {"seller": None, "buyer": None, "carrier": None},
         "shipment": {
-            "vessel": None, "bl_no": None, "port_loading": None,
+            "vessel": None, "bl_no": None, "bl_date": None, "port_loading": None,
             "port_destination": None, "arrival_date": None,
             "weighing_place": None, "weighing_method": None,
             "bales": None