11.01.26
This commit is contained in:
70
app.py
70
app.py
@@ -28,24 +28,22 @@ import re
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
class AHKParser:
|
class AHKParser:
|
||||||
def __init__(self, text_content):
|
lab="AHK"
|
||||||
self.text = text_content
|
|
||||||
self.data = None
|
|
||||||
|
|
||||||
def parse(self, lab="AHK"):
|
def parse(self, text):
|
||||||
"""Parse le texte et retourne un dictionnaire structuré"""
|
"""Parse le texte et retourne un dictionnaire structuré"""
|
||||||
result = {
|
result = {
|
||||||
"lab": lab,
|
"lab": self.lab,
|
||||||
"report": self._extract_report_info(),
|
"report": self._extract_report_info(text),
|
||||||
"contract": self._extract_contract_info(),
|
"contract": self._extract_contract_info(text),
|
||||||
"parties": self._extract_parties_info(),
|
"parties": self._extract_parties_info(text),
|
||||||
"shipment": self._extract_shipment_info(),
|
"shipment": self._extract_shipment_info(text),
|
||||||
"weights": self._extract_weights_info()
|
"weights": self._extract_weights_info(text)
|
||||||
}
|
}
|
||||||
self.data = result
|
self.data = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _extract_report_info(self):
|
def _extract_report_info(self, text):
|
||||||
"""Extrait les informations du rapport"""
|
"""Extrait les informations du rapport"""
|
||||||
report_info = {
|
report_info = {
|
||||||
"reference": None,
|
"reference": None,
|
||||||
@@ -54,23 +52,23 @@ class AHKParser:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Recherche de la référence client
|
# Recherche de la référence client
|
||||||
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
|
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
|
||||||
if ref_match:
|
if ref_match:
|
||||||
report_info["reference"] = ref_match.group(1).strip()
|
report_info["reference"] = ref_match.group(1).strip()
|
||||||
|
|
||||||
# Recherche du numéro de fichier AHK
|
# Recherche du numéro de fichier AHK
|
||||||
file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text)
|
file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
|
||||||
if file_no_match:
|
if file_no_match:
|
||||||
report_info["file_no"] = file_no_match.group(1).strip()
|
report_info["file_no"] = file_no_match.group(1).strip()
|
||||||
|
|
||||||
# Recherche de la date du rapport
|
# Recherche de la date du rapport
|
||||||
date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
|
date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||||
if date_match:
|
if date_match:
|
||||||
report_info["date"] = date_match.group(1).strip()
|
report_info["date"] = date_match.group(1).strip()
|
||||||
|
|
||||||
return report_info
|
return report_info
|
||||||
|
|
||||||
def _extract_contract_info(self):
|
def _extract_contract_info(self, text):
|
||||||
"""Extrait les informations du contrat"""
|
"""Extrait les informations du contrat"""
|
||||||
contract_info = {
|
contract_info = {
|
||||||
"contract_no": None,
|
"contract_no": None,
|
||||||
@@ -81,7 +79,7 @@ class AHKParser:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extraction de la référence client (peut servir comme numéro de contrat)
|
# Extraction de la référence client (peut servir comme numéro de contrat)
|
||||||
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
|
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
|
||||||
if ref_match:
|
if ref_match:
|
||||||
ref_parts = ref_match.group(1).split('/')
|
ref_parts = ref_match.group(1).split('/')
|
||||||
if len(ref_parts) >= 2:
|
if len(ref_parts) >= 2:
|
||||||
@@ -89,7 +87,7 @@ class AHKParser:
|
|||||||
contract_info["invoice_no"] = ref_parts[1].strip()
|
contract_info["invoice_no"] = ref_parts[1].strip()
|
||||||
|
|
||||||
# Extraction de l'origine et de la marchandise
|
# Extraction de l'origine et de la marchandise
|
||||||
origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text)
|
origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
|
||||||
if origin_match:
|
if origin_match:
|
||||||
origin_text = origin_match.group(1).strip()
|
origin_text = origin_match.group(1).strip()
|
||||||
if "AUSTRALIAN" in origin_text.upper():
|
if "AUSTRALIAN" in origin_text.upper():
|
||||||
@@ -99,7 +97,7 @@ class AHKParser:
|
|||||||
|
|
||||||
return contract_info
|
return contract_info
|
||||||
|
|
||||||
def _extract_parties_info(self):
|
def _extract_parties_info(self, text):
|
||||||
"""Extrait les informations sur les parties"""
|
"""Extrait les informations sur les parties"""
|
||||||
parties_info = {
|
parties_info = {
|
||||||
"seller": None,
|
"seller": None,
|
||||||
@@ -108,24 +106,24 @@ class AHKParser:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extraction du vendeur (Client)
|
# Extraction du vendeur (Client)
|
||||||
seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text)
|
seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
|
||||||
if seller_match:
|
if seller_match:
|
||||||
parties_info["seller"] = seller_match.group(1).strip()
|
parties_info["seller"] = seller_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction de l'acheteur (Buyer)
|
# Extraction de l'acheteur (Buyer)
|
||||||
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text)
|
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
|
||||||
if buyer_match:
|
if buyer_match:
|
||||||
parties_info["buyer"] = buyer_match.group(1).strip()
|
parties_info["buyer"] = buyer_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction du transporteur (Vessel)
|
# Extraction du transporteur (Vessel)
|
||||||
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
|
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
|
||||||
if vessel_match:
|
if vessel_match:
|
||||||
# On considère le nom du navire comme transporteur
|
# On considère le nom du navire comme transporteur
|
||||||
parties_info["carrier"] = vessel_match.group(1).strip()
|
parties_info["carrier"] = vessel_match.group(1).strip()
|
||||||
|
|
||||||
return parties_info
|
return parties_info
|
||||||
|
|
||||||
def _extract_shipment_info(self):
|
def _extract_shipment_info(self, text):
|
||||||
"""Extrait les informations d'expédition"""
|
"""Extrait les informations d'expédition"""
|
||||||
shipment_info = {
|
shipment_info = {
|
||||||
"vessel": None,
|
"vessel": None,
|
||||||
@@ -140,43 +138,43 @@ class AHKParser:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extraction du navire
|
# Extraction du navire
|
||||||
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
|
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
|
||||||
if vessel_match:
|
if vessel_match:
|
||||||
shipment_info["vessel"] = vessel_match.group(1).strip()
|
shipment_info["vessel"] = vessel_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction du numéro de connaissement
|
# Extraction du numéro de connaissement
|
||||||
bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text)
|
bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
|
||||||
if bl_no_match:
|
if bl_no_match:
|
||||||
shipment_info["bl_no"] = bl_no_match.group(1).strip()
|
shipment_info["bl_no"] = bl_no_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction de la date du connaissement
|
# Extraction de la date du connaissement
|
||||||
bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
|
bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||||
if bl_date_match:
|
if bl_date_match:
|
||||||
shipment_info["bl_date"] = bl_date_match.group(1).strip()
|
shipment_info["bl_date"] = bl_date_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction du port de destination
|
# Extraction du port de destination
|
||||||
dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text)
|
dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
|
||||||
if dest_match:
|
if dest_match:
|
||||||
shipment_info["port_destination"] = dest_match.group(1).strip()
|
shipment_info["port_destination"] = dest_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction de la date d'arrivée
|
# Extraction de la date d'arrivée
|
||||||
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
|
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||||
if arrival_match:
|
if arrival_match:
|
||||||
shipment_info["arrival_date"] = arrival_match.group(1).strip()
|
shipment_info["arrival_date"] = arrival_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction de la méthode de pesée
|
# Extraction de la méthode de pesée
|
||||||
weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text)
|
weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
|
||||||
if weighing_method_match:
|
if weighing_method_match:
|
||||||
shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
|
shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
|
||||||
|
|
||||||
# Extraction du nombre de balles
|
# Extraction du nombre de balles
|
||||||
bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text)
|
bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
|
||||||
if bales_match:
|
if bales_match:
|
||||||
shipment_info["bales"] = int(bales_match.group(1).strip())
|
shipment_info["bales"] = int(bales_match.group(1).strip())
|
||||||
|
|
||||||
return shipment_info
|
return shipment_info
|
||||||
|
|
||||||
def _extract_weights_info(self):
|
def _extract_weights_info(self, text):
|
||||||
"""Extrait les informations de poids"""
|
"""Extrait les informations de poids"""
|
||||||
weights_info = {
|
weights_info = {
|
||||||
"gross_landed_kg": None,
|
"gross_landed_kg": None,
|
||||||
@@ -188,32 +186,32 @@ class AHKParser:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Extraction du poids brut débarqué
|
# Extraction du poids brut débarqué
|
||||||
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text)
|
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
|
||||||
if gross_landed_match:
|
if gross_landed_match:
|
||||||
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
|
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
|
||||||
|
|
||||||
# Extraction du poids de tare
|
# Extraction du poids de tare
|
||||||
tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text)
|
tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
|
||||||
if tare_match:
|
if tare_match:
|
||||||
weights_info["tare_kg"] = float(tare_match.group(1).strip())
|
weights_info["tare_kg"] = float(tare_match.group(1).strip())
|
||||||
|
|
||||||
# Extraction du poids net débarqué
|
# Extraction du poids net débarqué
|
||||||
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
|
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
|
||||||
if net_landed_match:
|
if net_landed_match:
|
||||||
weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
|
weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
|
||||||
|
|
||||||
# Extraction du poids net facturé
|
# Extraction du poids net facturé
|
||||||
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
|
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
|
||||||
if invoice_net_match:
|
if invoice_net_match:
|
||||||
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
|
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
|
||||||
|
|
||||||
# Extraction de la perte en kg
|
# Extraction de la perte en kg
|
||||||
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text)
|
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
|
||||||
if loss_match:
|
if loss_match:
|
||||||
weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
|
weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
|
||||||
|
|
||||||
# Extraction du pourcentage de perte
|
# Extraction du pourcentage de perte
|
||||||
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text)
|
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
|
||||||
if percent_match:
|
if percent_match:
|
||||||
weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
|
weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user