11.01.26

2026-01-11 21:04:10 +01:00
parent 79bdc8beda
commit 1a1759871d
1 changed files with 281 additions and 140 deletions
--- a/app.py
+++ b/app.py
@@ -11,6 +11,7 @@ import logging
 import io
 from logging.handlers import RotatingFileHandler
 import re
+from datetime import datetime

 LOG_PATH = "/var/log/automation-service.log"

@@ -24,12 +25,15 @@ file_handler.setFormatter(logging.Formatter(
    "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
 ))

-import re
-from datetime import datetime
-
 class AHKParser:
    lab = "AHK"    
    
+    def _clean_value(self, value):
+        """Nettoie la valeur en supprimant les espaces inutiles"""
+        if value:
+            return value.strip()
+        return value
+    
    def parse(self, text):
        """Parse le texte et retourne un dictionnaire structuré"""
        result = {
@@ -51,20 +55,20 @@ class AHKParser:
            "date": None
        }
        
-        # Recherche de la référence client
-        ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
+        # Recherche de la référence client - plus précise
+        ref_match = re.search(r'Client\s+Reference:\s*(S-\d+\s*/\s*INV\s*\d+)', text)
        if ref_match:
-            report_info["reference"] = ref_match.group(1).strip()
+            report_info["reference"] = self._clean_value(ref_match.group(1))
        
        # Recherche du numéro de fichier AHK
-        file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
+        file_no_match = re.search(r'AHK\s+S/([\w/]+)', text)
        if file_no_match:
-            report_info["file_no"] = file_no_match.group(1).strip()
+            report_info["file_no"] = self._clean_value(file_no_match.group(1))
        
        # Recherche de la date du rapport
-        date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+        date_match = re.search(r'Signed\s+on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
        if date_match:
-            report_info["date"] = date_match.group(1).strip()
+            report_info["date"] = self._clean_value(date_match.group(1))
        
        return report_info
    
@@ -78,21 +82,24 @@ class AHKParser:
            "commodity": None
        }
        
-        # Extraction de la référence client (peut servir comme numéro de contrat)
-        ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
+        # Extraction de la référence client
+        ref_match = re.search(r'Client\s+Ref\s+No\.\s*:\s*([^\n]+)', text)
        if ref_match:
-            ref_parts = ref_match.group(1).split('/')
-            if len(ref_parts) >= 2:
-                contract_info["contract_no"] = ref_parts[0].strip()
-                contract_info["invoice_no"] = ref_parts[1].strip()
+            ref_text = ref_match.group(1).strip()
+            # Sépare S-3488 et INV 4013
+            parts = re.split(r'[/\s]+', ref_text)
+            for part in parts:
+                if part.startswith('S-'):
+                    contract_info["contract_no"] = part.strip()
+                elif part.startswith('INV'):
+                    contract_info["invoice_no"] = part.strip()
        
-        # Extraction de l'origine et de la marchandise
-        origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
-        if origin_match:
-            origin_text = origin_match.group(1).strip()
+        # Extraction de l'origine et de la marchandise - regex plus précise
+        growth_match = re.search(r'Growth\s*:\s*([A-Z\s]+?)(?=\s*(?:Vessel|$))', text)
+        if growth_match:
+            origin_text = growth_match.group(1).strip()
            if "AUSTRALIAN" in origin_text.upper():
                contract_info["origin"] = "AUSTRALIA"
-                # La marchandise est généralement "RAW COTTON"
                contract_info["commodity"] = "RAW COTTON"
        
        return contract_info
@@ -105,21 +112,20 @@ class AHKParser:
            "carrier": None
        }
        
-        # Extraction du vendeur (Client)
-        seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
+        # Extraction du vendeur (Client) - regex plus précise
+        seller_match = re.search(r'Client\s*:\s*([^\n:]+?)(?=\s*(?:Client Ref|Buyer|$))', text)
        if seller_match:
-            parties_info["seller"] = seller_match.group(1).strip()
+            parties_info["seller"] = self._clean_value(seller_match.group(1))
        
-        # Extraction de l'acheteur (Buyer)
-        buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
+        # Extraction de l'acheteur (Buyer) - regex plus précise
+        buyer_match = re.search(r'Buyer\s*:\s*([^\n:]+?)(?=\s*(?:Total Bales|$))', text)
        if buyer_match:
-            parties_info["buyer"] = buyer_match.group(1).strip()
+            parties_info["buyer"] = self._clean_value(buyer_match.group(1))
        
-        # Extraction du transporteur (Vessel)
-        vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
+        # Extraction du transporteur (nom du navire seulement)
+        vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|Voy|$))', text)
        if vessel_match:
-            # On considère le nom du navire comme transporteur
-            parties_info["carrier"] = vessel_match.group(1).strip()
+            parties_info["carrier"] = self._clean_value(vessel_match.group(1))
        
        return parties_info
    
@@ -129,48 +135,51 @@ class AHKParser:
            "vessel": None,
            "bl_no": None,
            "bl_date": None,
-            "port_loading": None,  # Non spécifié dans le texte
+            "port_loading": None,
            "port_destination": None,
            "arrival_date": None,
-            "weighing_place": None,  # Non spécifié dans le texte
+            "weighing_place": None,
            "weighing_method": None,
            "bales": None
        }
        
-        # Extraction du navire
-        vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
+        # Extraction du navire (nom seulement)
+        vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|Voy|$))', text)
        if vessel_match:
-            shipment_info["vessel"] = vessel_match.group(1).strip()
+            shipment_info["vessel"] = self._clean_value(vessel_match.group(1))
        
-        # Extraction du numéro de connaissement
-        bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
+        # Extraction du numéro de connaissement (seulement le numéro)
+        bl_no_match = re.search(r'B/L\s+No\.\s*:\s*([A-Z0-9]+)(?=\s|$)', text)
        if bl_no_match:
-            shipment_info["bl_no"] = bl_no_match.group(1).strip()
+            shipment_info["bl_no"] = self._clean_value(bl_no_match.group(1))
        
        # Extraction de la date du connaissement
-        bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+        bl_date_match = re.search(r'B/L\s+Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})(?=\s|$)', text)
        if bl_date_match:
-            shipment_info["bl_date"] = bl_date_match.group(1).strip()
+            shipment_info["bl_date"] = self._clean_value(bl_date_match.group(1))
        
-        # Extraction du port de destination
-        dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
+        # Extraction du port de destination (sans le "Tare")
+        dest_match = re.search(r'Destination\s*:\s*([A-Z,\s]+?)(?=\s*(?:Tare|$))', text)
        if dest_match:
-            shipment_info["port_destination"] = dest_match.group(1).strip()
+            shipment_info["port_destination"] = self._clean_value(dest_match.group(1))
        
        # Extraction de la date d'arrivée
-        arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+        arrival_match = re.search(r'Arrival\s+Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})(?=\s|$)', text)
        if arrival_match:
-            shipment_info["arrival_date"] = arrival_match.group(1).strip()
+            shipment_info["arrival_date"] = self._clean_value(arrival_match.group(1))
        
        # Extraction de la méthode de pesée
-        weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
+        weighing_method_match = re.search(r'Weighing\s+method\s*:\s*([^\n]+?)(?=\s*(?:Tare|$))', text)
        if weighing_method_match:
-            shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
+            shipment_info["weighing_method"] = self._clean_value(weighing_method_match.group(1))
        
        # Extraction du nombre de balles
-        bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
+        bales_match = re.search(r'Total\s+Bales\s*:\s*(\d+)(?=\s|$)', text)
        if bales_match:
+            try:
                shipment_info["bales"] = int(bales_match.group(1).strip())
+            except ValueError:
+                shipment_info["bales"] = None
        
        return shipment_info
    
@@ -185,112 +194,244 @@ class AHKParser:
            "gain_loss_percent": None
        }
        
-        # Extraction du poids brut débarqué
-        gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
+        # Extraction du poids brut débarqué (corrigé - doit être 100580 kg)
+        gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.,]+)\s*kg', text)
        if gross_landed_match:
-            weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
+            try:
+                weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
        # Extraction du poids de tare
-        tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
+        tare_match = re.search(r'Tare\s*:\s*([\d.,]+)\s*kg', text)
        if tare_match:
-            weights_info["tare_kg"] = float(tare_match.group(1).strip())
+            try:
+                weights_info["tare_kg"] = float(tare_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
-        # Extraction du poids net débarqué
-        net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
+        # Extraction du poids net débarqué (corrigé - doit être 100078.40 kg)
+        net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.,]+)\s*kg', text)
        if net_landed_match:
-            weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
+            try:
+                weights_info["net_landed_kg"] = float(net_landed_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
-        # Extraction du poids net facturé
-        invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
+        # Extraction du poids net facturé (101299 kg)
+        invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.,]+)\s*kg', text)
        if invoice_net_match:
-            weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
+            try:
+                weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
        # Extraction de la perte en kg
-        loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
+        loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.,]+)\s*kg', text)
        if loss_match:
-            weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
+            try:
+                weights_info["gain_loss_kg"] = -float(loss_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
        # Extraction du pourcentage de perte
-        percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
+        percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.,]+)%', text)
        if percent_match:
-            weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
+            try:
+                weights_info["gain_loss_percent"] = -float(percent_match.group(1).replace(',', '').strip())
+            except ValueError:
+                pass
        
        return weights_info
-    
 # class AHKParser:
 #     lab="AHK"    
    
-#     def _lines(self, text):
-#         return [l.strip() for l in text.splitlines() if l.strip()]
-
-#     def _col_block(self, lines, labels, max_scan=30):
-#         idx = [i for i,l in enumerate(lines) if l in labels]
-#         if not idx:
-#             return {}   # << empêche le crash
-#         start = max(idx) + 1
-#         vals = []
-#         for l in lines[start:start+max_scan]:
-#             if l.startswith(":"):
-#                 v = l[1:].replace("kg","").strip()
-#                 vals.append(v)
-#             if len(vals) == len(labels):
-#                 break
-#         return dict(zip(labels, vals))
-
 #     def parse(self, text):
-#         L = self._lines(text)
-#         r = empty_weight_report("AHK")
+#         """Parse le texte et retourne un dictionnaire structuré"""
+#         result = {
+#             "lab": self.lab,
+#             "report": self._extract_report_info(text),
+#             "contract": self._extract_contract_info(text),
+#             "parties": self._extract_parties_info(text),
+#             "shipment": self._extract_shipment_info(text),
+#             "weights": self._extract_weights_info(text)
+#         }
+#         self.data = result
+#         return result
    
-#         # report
-#         r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
-#         r["report"]["date"]      = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
+#     def _extract_report_info(self, text):
+#         """Extrait les informations du rapport"""
+#         report_info = {
+#             "reference": None,
+#             "file_no": None,
+#             "date": None
+#         }
        
-#         # contract
-#         r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
-#         r["contract"]["commodity"]  = "Raw Cotton"
+#         # Recherche de la référence client
+#         ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
+#         if ref_match:
+#             report_info["reference"] = ref_match.group(1).strip()
        
-#         # buyer
-#         r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
+#         # Recherche du numéro de fichier AHK
+#         file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
+#         if file_no_match:
+#             report_info["file_no"] = file_no_match.group(1).strip()
        
-#         # shipment block 1
-#         ship1 = self._col_block(L, [
-#             "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
-#         ])
+#         # Recherche de la date du rapport
+#         date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+#         if date_match:
+#             report_info["date"] = date_match.group(1).strip()
        
-#         # shipment block 2
-#         ship2 = self._col_block(L, [
-#             "Growth","Arrival Date","First date of weighing",
-#             "Last Date of Weighing","Weighing method","Tare"
-#         ])
+#         return report_info
    
-#         r["shipment"]["bales"]            = to_float(ship1.get("Total Bales"))
-#         r["shipment"]["vessel"]           = ship1.get("Vessel")
-#         r["shipment"]["bl_no"]            = ship1.get("B/L No.")
-#         r["shipment"]["port_destination"] = ship1.get("Destination")
-#         r["shipment"]["arrival_date"]     = ship2.get("Arrival Date")
-#         r["shipment"]["weighing_method"]  = ship2.get("Weighing method")
-#         r["contract"]["origin"]           = ship2.get("Growth")
+#     def _extract_contract_info(self, text):
+#         """Extrait les informations du contrat"""
+#         contract_info = {
+#             "contract_no": None,
+#             "invoice_no": None,
+#             "lc_no": None,
+#             "origin": None,
+#             "commodity": None
+#         }
        
-#         # invoice weights
-#         inv = self._col_block(L, ["Bales","Gross","Tare","Net"])
-#         r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
+#         # Extraction de la référence client (peut servir comme numéro de contrat)
+#         ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
+#         if ref_match:
+#             ref_parts = ref_match.group(1).split('/')
+#             if len(ref_parts) >= 2:
+#                 contract_info["contract_no"] = ref_parts[0].strip()
+#                 contract_info["invoice_no"] = ref_parts[1].strip()
        
-#         # landed weights
-#         land = self._col_block(
-#             self._lines(section(text,"Bales Weighed","Outturn")),
-#             ["Bales","Gross","Tare","Net"]
-#         )
+#         # Extraction de l'origine et de la marchandise
+#         origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
+#         if origin_match:
+#             origin_text = origin_match.group(1).strip()
+#             if "AUSTRALIAN" in origin_text.upper():
+#                 contract_info["origin"] = "AUSTRALIA"
+#                 # La marchandise est généralement "RAW COTTON"
+#                 contract_info["commodity"] = "RAW COTTON"
        
-#         r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
-#         r["weights"]["tare_kg"]         = to_float(land.get("Tare"))
-#         r["weights"]["net_landed_kg"]   = to_float(land.get("Net"))
+#         return contract_info
    
-#         # loss
-#         loss = section(text,"LOSS","Invoice average")
-#         r["weights"]["gain_loss_kg"]      = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
-#         r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
+#     def _extract_parties_info(self, text):
+#         """Extrait les informations sur les parties"""
+#         parties_info = {
+#             "seller": None,
+#             "buyer": None,
+#             "carrier": None
+#         }
        
-#         return r
+#         # Extraction du vendeur (Client)
+#         seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
+#         if seller_match:
+#             parties_info["seller"] = seller_match.group(1).strip()
+        
+#         # Extraction de l'acheteur (Buyer)
+#         buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
+#         if buyer_match:
+#             parties_info["buyer"] = buyer_match.group(1).strip()
+        
+#         # Extraction du transporteur (Vessel)
+#         vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
+#         if vessel_match:
+#             # On considère le nom du navire comme transporteur
+#             parties_info["carrier"] = vessel_match.group(1).strip()
+        
+#         return parties_info
+    
+#     def _extract_shipment_info(self, text):
+#         """Extrait les informations d'expédition"""
+#         shipment_info = {
+#             "vessel": None,
+#             "bl_no": None,
+#             "bl_date": None,
+#             "port_loading": None,  # Non spécifié dans le texte
+#             "port_destination": None,
+#             "arrival_date": None,
+#             "weighing_place": None,  # Non spécifié dans le texte
+#             "weighing_method": None,
+#             "bales": None
+#         }
+        
+#         # Extraction du navire
+#         vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
+#         if vessel_match:
+#             shipment_info["vessel"] = vessel_match.group(1).strip()
+        
+#         # Extraction du numéro de connaissement
+#         bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
+#         if bl_no_match:
+#             shipment_info["bl_no"] = bl_no_match.group(1).strip()
+        
+#         # Extraction de la date du connaissement
+#         bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+#         if bl_date_match:
+#             shipment_info["bl_date"] = bl_date_match.group(1).strip()
+        
+#         # Extraction du port de destination
+#         dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
+#         if dest_match:
+#             shipment_info["port_destination"] = dest_match.group(1).strip()
+        
+#         # Extraction de la date d'arrivée
+#         arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
+#         if arrival_match:
+#             shipment_info["arrival_date"] = arrival_match.group(1).strip()
+        
+#         # Extraction de la méthode de pesée
+#         weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
+#         if weighing_method_match:
+#             shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
+        
+#         # Extraction du nombre de balles
+#         bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
+#         if bales_match:
+#             shipment_info["bales"] = int(bales_match.group(1).strip())
+        
+#         return shipment_info
+    
+#     def _extract_weights_info(self, text):
+#         """Extrait les informations de poids"""
+#         weights_info = {
+#             "gross_landed_kg": None,
+#             "tare_kg": None,
+#             "net_landed_kg": None,
+#             "invoice_net_kg": None,
+#             "gain_loss_kg": None,
+#             "gain_loss_percent": None
+#         }
+        
+#         # Extraction du poids brut débarqué
+#         gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
+#         if gross_landed_match:
+#             weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
+        
+#         # Extraction du poids de tare
+#         tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
+#         if tare_match:
+#             weights_info["tare_kg"] = float(tare_match.group(1).strip())
+        
+#         # Extraction du poids net débarqué
+#         net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
+#         if net_landed_match:
+#             weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
+        
+#         # Extraction du poids net facturé
+#         invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
+#         if invoice_net_match:
+#             weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
+        
+#         # Extraction de la perte en kg
+#         loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
+#         if loss_match:
+#             weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
+        
+#         # Extraction du pourcentage de perte
+#         percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
+#         if percent_match:
+#             weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
+        
+#         return weights_info
    
 class IntertekParser:
    lab="INTERTEK"