This commit is contained in:
2026-01-11 20:35:29 +01:00
parent 9726bb57bc
commit 79bdc8beda

70
app.py
View File

@@ -28,24 +28,22 @@ import re
from datetime import datetime from datetime import datetime
class AHKParser: class AHKParser:
def __init__(self, text_content): lab="AHK"
self.text = text_content
self.data = None
def parse(self, lab="AHK"): def parse(self, text):
"""Parse le texte et retourne un dictionnaire structuré""" """Parse le texte et retourne un dictionnaire structuré"""
result = { result = {
"lab": lab, "lab": self.lab,
"report": self._extract_report_info(), "report": self._extract_report_info(text),
"contract": self._extract_contract_info(), "contract": self._extract_contract_info(text),
"parties": self._extract_parties_info(), "parties": self._extract_parties_info(text),
"shipment": self._extract_shipment_info(), "shipment": self._extract_shipment_info(text),
"weights": self._extract_weights_info() "weights": self._extract_weights_info(text)
} }
self.data = result self.data = result
return result return result
def _extract_report_info(self): def _extract_report_info(self, text):
"""Extrait les informations du rapport""" """Extrait les informations du rapport"""
report_info = { report_info = {
"reference": None, "reference": None,
@@ -54,23 +52,23 @@ class AHKParser:
} }
# Recherche de la référence client # Recherche de la référence client
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
if ref_match: if ref_match:
report_info["reference"] = ref_match.group(1).strip() report_info["reference"] = ref_match.group(1).strip()
# Recherche du numéro de fichier AHK # Recherche du numéro de fichier AHK
file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text) file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
if file_no_match: if file_no_match:
report_info["file_no"] = file_no_match.group(1).strip() report_info["file_no"] = file_no_match.group(1).strip()
# Recherche de la date du rapport # Recherche de la date du rapport
date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
if date_match: if date_match:
report_info["date"] = date_match.group(1).strip() report_info["date"] = date_match.group(1).strip()
return report_info return report_info
def _extract_contract_info(self): def _extract_contract_info(self, text):
"""Extrait les informations du contrat""" """Extrait les informations du contrat"""
contract_info = { contract_info = {
"contract_no": None, "contract_no": None,
@@ -81,7 +79,7 @@ class AHKParser:
} }
# Extraction de la référence client (peut servir comme numéro de contrat) # Extraction de la référence client (peut servir comme numéro de contrat)
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text) ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
if ref_match: if ref_match:
ref_parts = ref_match.group(1).split('/') ref_parts = ref_match.group(1).split('/')
if len(ref_parts) >= 2: if len(ref_parts) >= 2:
@@ -89,7 +87,7 @@ class AHKParser:
contract_info["invoice_no"] = ref_parts[1].strip() contract_info["invoice_no"] = ref_parts[1].strip()
# Extraction de l'origine et de la marchandise # Extraction de l'origine et de la marchandise
origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text) origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
if origin_match: if origin_match:
origin_text = origin_match.group(1).strip() origin_text = origin_match.group(1).strip()
if "AUSTRALIAN" in origin_text.upper(): if "AUSTRALIAN" in origin_text.upper():
@@ -99,7 +97,7 @@ class AHKParser:
return contract_info return contract_info
def _extract_parties_info(self): def _extract_parties_info(self, text):
"""Extrait les informations sur les parties""" """Extrait les informations sur les parties"""
parties_info = { parties_info = {
"seller": None, "seller": None,
@@ -108,24 +106,24 @@ class AHKParser:
} }
# Extraction du vendeur (Client) # Extraction du vendeur (Client)
seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text) seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
if seller_match: if seller_match:
parties_info["seller"] = seller_match.group(1).strip() parties_info["seller"] = seller_match.group(1).strip()
# Extraction de l'acheteur (Buyer) # Extraction de l'acheteur (Buyer)
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text) buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
if buyer_match: if buyer_match:
parties_info["buyer"] = buyer_match.group(1).strip() parties_info["buyer"] = buyer_match.group(1).strip()
# Extraction du transporteur (Vessel) # Extraction du transporteur (Vessel)
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
if vessel_match: if vessel_match:
# On considère le nom du navire comme transporteur # On considère le nom du navire comme transporteur
parties_info["carrier"] = vessel_match.group(1).strip() parties_info["carrier"] = vessel_match.group(1).strip()
return parties_info return parties_info
def _extract_shipment_info(self): def _extract_shipment_info(self, text):
"""Extrait les informations d'expédition""" """Extrait les informations d'expédition"""
shipment_info = { shipment_info = {
"vessel": None, "vessel": None,
@@ -140,43 +138,43 @@ class AHKParser:
} }
# Extraction du navire # Extraction du navire
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text) vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
if vessel_match: if vessel_match:
shipment_info["vessel"] = vessel_match.group(1).strip() shipment_info["vessel"] = vessel_match.group(1).strip()
# Extraction du numéro de connaissement # Extraction du numéro de connaissement
bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text) bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
if bl_no_match: if bl_no_match:
shipment_info["bl_no"] = bl_no_match.group(1).strip() shipment_info["bl_no"] = bl_no_match.group(1).strip()
# Extraction de la date du connaissement # Extraction de la date du connaissement
bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
if bl_date_match: if bl_date_match:
shipment_info["bl_date"] = bl_date_match.group(1).strip() shipment_info["bl_date"] = bl_date_match.group(1).strip()
# Extraction du port de destination # Extraction du port de destination
dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text) dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
if dest_match: if dest_match:
shipment_info["port_destination"] = dest_match.group(1).strip() shipment_info["port_destination"] = dest_match.group(1).strip()
# Extraction de la date d'arrivée # Extraction de la date d'arrivée
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text) arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
if arrival_match: if arrival_match:
shipment_info["arrival_date"] = arrival_match.group(1).strip() shipment_info["arrival_date"] = arrival_match.group(1).strip()
# Extraction de la méthode de pesée # Extraction de la méthode de pesée
weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text) weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
if weighing_method_match: if weighing_method_match:
shipment_info["weighing_method"] = weighing_method_match.group(1).strip() shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
# Extraction du nombre de balles # Extraction du nombre de balles
bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text) bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
if bales_match: if bales_match:
shipment_info["bales"] = int(bales_match.group(1).strip()) shipment_info["bales"] = int(bales_match.group(1).strip())
return shipment_info return shipment_info
def _extract_weights_info(self): def _extract_weights_info(self, text):
"""Extrait les informations de poids""" """Extrait les informations de poids"""
weights_info = { weights_info = {
"gross_landed_kg": None, "gross_landed_kg": None,
@@ -188,32 +186,32 @@ class AHKParser:
} }
# Extraction du poids brut débarqué # Extraction du poids brut débarqué
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text) gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
if gross_landed_match: if gross_landed_match:
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip()) weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
# Extraction du poids de tare # Extraction du poids de tare
tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text) tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
if tare_match: if tare_match:
weights_info["tare_kg"] = float(tare_match.group(1).strip()) weights_info["tare_kg"] = float(tare_match.group(1).strip())
# Extraction du poids net débarqué # Extraction du poids net débarqué
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
if net_landed_match: if net_landed_match:
weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip()) weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
# Extraction du poids net facturé # Extraction du poids net facturé
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text) invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
if invoice_net_match: if invoice_net_match:
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip()) weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
# Extraction de la perte en kg # Extraction de la perte en kg
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text) loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
if loss_match: if loss_match:
weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip()) weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
# Extraction du pourcentage de perte # Extraction du pourcentage de perte
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text) percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
if percent_match: if percent_match:
weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip()) weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())