From 79bdc8beda1fc2886c094ac7b80f288721cde0c9 Mon Sep 17 00:00:00 2001 From: laurentbarontini Date: Sun, 11 Jan 2026 20:35:29 +0100 Subject: [PATCH] 11.01.26 --- app.py | 70 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/app.py b/app.py index 71d035a..f1f6d83 100644 --- a/app.py +++ b/app.py @@ -28,24 +28,22 @@ import re from datetime import datetime class AHKParser: - def __init__(self, text_content): - self.text = text_content - self.data = None + lab="AHK" - def parse(self, lab="AHK"): + def parse(self, text): """Parse le texte et retourne un dictionnaire structuré""" result = { - "lab": lab, - "report": self._extract_report_info(), - "contract": self._extract_contract_info(), - "parties": self._extract_parties_info(), - "shipment": self._extract_shipment_info(), - "weights": self._extract_weights_info() + "lab": self.lab, + "report": self._extract_report_info(text), + "contract": self._extract_contract_info(text), + "parties": self._extract_parties_info(text), + "shipment": self._extract_shipment_info(text), + "weights": self._extract_weights_info(text) } self.data = result return result - def _extract_report_info(self): + def _extract_report_info(self, text): """Extrait les informations du rapport""" report_info = { "reference": None, @@ -54,23 +52,23 @@ class AHKParser: } # Recherche de la référence client - ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) + ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text) if ref_match: report_info["reference"] = ref_match.group(1).strip() # Recherche du numéro de fichier AHK - file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text) + file_no_match = re.search(r'AHK\s*S/([\w/]+)', text) if file_no_match: report_info["file_no"] = file_no_match.group(1).strip() # Recherche de la date du rapport - date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text) if date_match: report_info["date"] = date_match.group(1).strip() return report_info - def _extract_contract_info(self): + def _extract_contract_info(self, text): """Extrait les informations du contrat""" contract_info = { "contract_no": None, @@ -81,7 +79,7 @@ class AHKParser: } # Extraction de la référence client (peut servir comme numéro de contrat) - ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) + ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text) if ref_match: ref_parts = ref_match.group(1).split('/') if len(ref_parts) >= 2: @@ -89,7 +87,7 @@ class AHKParser: contract_info["invoice_no"] = ref_parts[1].strip() # Extraction de l'origine et de la marchandise - origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text) + origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text) if origin_match: origin_text = origin_match.group(1).strip() if "AUSTRALIAN" in origin_text.upper(): @@ -99,7 +97,7 @@ class AHKParser: return contract_info - def _extract_parties_info(self): + def _extract_parties_info(self, text): """Extrait les informations sur les parties""" parties_info = { "seller": None, @@ -108,24 +106,24 @@ class AHKParser: } # Extraction du vendeur (Client) - seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text) + seller_match = re.search(r'Client\s*:\s*([^\n]+)', text) if seller_match: parties_info["seller"] = seller_match.group(1).strip() # Extraction de l'acheteur (Buyer) - buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text) + buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text) if buyer_match: parties_info["buyer"] = buyer_match.group(1).strip() # Extraction du transporteur (Vessel) - vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) + vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text) if vessel_match: # On considère le nom du navire comme transporteur parties_info["carrier"] = vessel_match.group(1).strip() return parties_info - def _extract_shipment_info(self): + def _extract_shipment_info(self, text): """Extrait les informations d'expédition""" shipment_info = { "vessel": None, @@ -140,43 +138,43 @@ class AHKParser: } # Extraction du navire - vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) + vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text) if vessel_match: shipment_info["vessel"] = vessel_match.group(1).strip() # Extraction du numéro de connaissement - bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text) + bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text) if bl_no_match: shipment_info["bl_no"] = bl_no_match.group(1).strip() # Extraction de la date du connaissement - bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text) if bl_date_match: shipment_info["bl_date"] = bl_date_match.group(1).strip() # Extraction du port de destination - dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text) + dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text) if dest_match: shipment_info["port_destination"] = dest_match.group(1).strip() # Extraction de la date d'arrivée - arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) + arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text) if arrival_match: shipment_info["arrival_date"] = arrival_match.group(1).strip() # Extraction de la méthode de pesée - weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text) + weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text) if weighing_method_match: shipment_info["weighing_method"] = weighing_method_match.group(1).strip() # Extraction du nombre de balles - bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text) + bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text) if bales_match: shipment_info["bales"] = int(bales_match.group(1).strip()) return shipment_info - def _extract_weights_info(self): + def _extract_weights_info(self, text): """Extrait les informations de poids""" weights_info = { "gross_landed_kg": None, @@ -188,32 +186,32 @@ class AHKParser: } # Extraction du poids brut débarqué - gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text) + gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text) if gross_landed_match: weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip()) # Extraction du poids de tare - tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text) + tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text) if tare_match: weights_info["tare_kg"] = float(tare_match.group(1).strip()) # Extraction du poids net débarqué - net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) + net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text) if net_landed_match: weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip()) # Extraction du poids net facturé - invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) + invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text) if invoice_net_match: weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip()) # Extraction de la perte en kg - loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text) + loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text) if loss_match: weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip()) # Extraction du pourcentage de perte - percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text) + percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text) if percent_match: weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())