This commit is contained in:
2026-01-11 20:25:13 +01:00
parent e6824fea9d
commit 9726bb57bc

315
app.py
View File

@@ -24,80 +24,275 @@ file_handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(name)s - %(message)s" "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
)) ))
import re
from datetime import datetime
class AHKParser: class AHKParser:
lab = "AHK" def __init__(self, text_content):
self.text = text_content
self.data = None
def parse(self, lab="AHK"):
"""Parse le texte et retourne un dictionnaire structuré"""
result = {
"lab": lab,
"report": self._extract_report_info(),
"contract": self._extract_contract_info(),
"parties": self._extract_parties_info(),
"shipment": self._extract_shipment_info(),
"weights": self._extract_weights_info()
}
self.data = result
return result
def _extract_report_info(self):
"""Extrait les informations du rapport"""
report_info = {
"reference": None,
"file_no": None,
"date": None
}
# Recherche de la référence client
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
if ref_match:
report_info["reference"] = ref_match.group(1).strip()
# Recherche du numéro de fichier AHK
file_no_match = re.search(r'AHK\s*S/([\w/]+)', self.text)
if file_no_match:
report_info["file_no"] = file_no_match.group(1).strip()
# Recherche de la date du rapport
date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
if date_match:
report_info["date"] = date_match.group(1).strip()
return report_info
def _extract_contract_info(self):
"""Extrait les informations du contrat"""
contract_info = {
"contract_no": None,
"invoice_no": None,
"lc_no": None,
"origin": None,
"commodity": None
}
# Extraction de la référence client (peut servir comme numéro de contrat)
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', self.text)
if ref_match:
ref_parts = ref_match.group(1).split('/')
if len(ref_parts) >= 2:
contract_info["contract_no"] = ref_parts[0].strip()
contract_info["invoice_no"] = ref_parts[1].strip()
# Extraction de l'origine et de la marchandise
origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', self.text)
if origin_match:
origin_text = origin_match.group(1).strip()
if "AUSTRALIAN" in origin_text.upper():
contract_info["origin"] = "AUSTRALIA"
# La marchandise est généralement "RAW COTTON"
contract_info["commodity"] = "RAW COTTON"
return contract_info
def _extract_parties_info(self):
"""Extrait les informations sur les parties"""
parties_info = {
"seller": None,
"buyer": None,
"carrier": None
}
# Extraction du vendeur (Client)
seller_match = re.search(r'Client\s*:\s*([^\n]+)', self.text)
if seller_match:
parties_info["seller"] = seller_match.group(1).strip()
# Extraction de l'acheteur (Buyer)
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', self.text)
if buyer_match:
parties_info["buyer"] = buyer_match.group(1).strip()
# Extraction du transporteur (Vessel)
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
if vessel_match:
# On considère le nom du navire comme transporteur
parties_info["carrier"] = vessel_match.group(1).strip()
return parties_info
def _extract_shipment_info(self):
"""Extrait les informations d'expédition"""
shipment_info = {
"vessel": None,
"bl_no": None,
"bl_date": None,
"port_loading": None, # Non spécifié dans le texte
"port_destination": None,
"arrival_date": None,
"weighing_place": None, # Non spécifié dans le texte
"weighing_method": None,
"bales": None
}
# Extraction du navire
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', self.text)
if vessel_match:
shipment_info["vessel"] = vessel_match.group(1).strip()
# Extraction du numéro de connaissement
bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', self.text)
if bl_no_match:
shipment_info["bl_no"] = bl_no_match.group(1).strip()
# Extraction de la date du connaissement
bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
if bl_date_match:
shipment_info["bl_date"] = bl_date_match.group(1).strip()
# Extraction du port de destination
dest_match = re.search(r'Destination\s*:\s*([^\n]+)', self.text)
if dest_match:
shipment_info["port_destination"] = dest_match.group(1).strip()
# Extraction de la date d'arrivée
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', self.text)
if arrival_match:
shipment_info["arrival_date"] = arrival_match.group(1).strip()
# Extraction de la méthode de pesée
weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', self.text)
if weighing_method_match:
shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
# Extraction du nombre de balles
bales_match = re.search(r'Total Bales\s*:\s*(\d+)', self.text)
if bales_match:
shipment_info["bales"] = int(bales_match.group(1).strip())
return shipment_info
def _extract_weights_info(self):
"""Extrait les informations de poids"""
weights_info = {
"gross_landed_kg": None,
"tare_kg": None,
"net_landed_kg": None,
"invoice_net_kg": None,
"gain_loss_kg": None,
"gain_loss_percent": None
}
# Extraction du poids brut débarqué
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', self.text)
if gross_landed_match:
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
# Extraction du poids de tare
tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', self.text)
if tare_match:
weights_info["tare_kg"] = float(tare_match.group(1).strip())
# Extraction du poids net débarqué
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
if net_landed_match:
weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
# Extraction du poids net facturé
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', self.text)
if invoice_net_match:
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
# Extraction de la perte en kg
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', self.text)
if loss_match:
weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
# Extraction du pourcentage de perte
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', self.text)
if percent_match:
weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
return weights_info
# class AHKParser:
# lab = "AHK"
def _lines(self, text): # def _lines(self, text):
return [l.strip() for l in text.splitlines() if l.strip()] # return [l.strip() for l in text.splitlines() if l.strip()]
def _col_block(self, lines, labels, max_scan=30): # def _col_block(self, lines, labels, max_scan=30):
idx = [i for i,l in enumerate(lines) if l in labels] # idx = [i for i,l in enumerate(lines) if l in labels]
if not idx: # if not idx:
return {} # << empêche le crash # return {} # << empêche le crash
start = max(idx) + 1 # start = max(idx) + 1
vals = [] # vals = []
for l in lines[start:start+max_scan]: # for l in lines[start:start+max_scan]:
if l.startswith(":"): # if l.startswith(":"):
v = l[1:].replace("kg","").strip() # v = l[1:].replace("kg","").strip()
vals.append(v) # vals.append(v)
if len(vals) == len(labels): # if len(vals) == len(labels):
break # break
return dict(zip(labels, vals)) # return dict(zip(labels, vals))
def parse(self, text): # def parse(self, text):
L = self._lines(text) # L = self._lines(text)
r = empty_weight_report("AHK") # r = empty_weight_report("AHK")
# report # # report
r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text) # r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
r["report"]["date"] = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text) # r["report"]["date"] = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
# contract # # contract
r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text) # r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
r["contract"]["commodity"] = "Raw Cotton" # r["contract"]["commodity"] = "Raw Cotton"
# buyer # # buyer
r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text) # r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
# shipment block 1 # # shipment block 1
ship1 = self._col_block(L, [ # ship1 = self._col_block(L, [
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination" # "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
]) # ])
# shipment block 2 # # shipment block 2
ship2 = self._col_block(L, [ # ship2 = self._col_block(L, [
"Growth","Arrival Date","First date of weighing", # "Growth","Arrival Date","First date of weighing",
"Last Date of Weighing","Weighing method","Tare" # "Last Date of Weighing","Weighing method","Tare"
]) # ])
r["shipment"]["bales"] = to_float(ship1.get("Total Bales")) # r["shipment"]["bales"] = to_float(ship1.get("Total Bales"))
r["shipment"]["vessel"] = ship1.get("Vessel") # r["shipment"]["vessel"] = ship1.get("Vessel")
r["shipment"]["bl_no"] = ship1.get("B/L No.") # r["shipment"]["bl_no"] = ship1.get("B/L No.")
r["shipment"]["port_destination"] = ship1.get("Destination") # r["shipment"]["port_destination"] = ship1.get("Destination")
r["shipment"]["arrival_date"] = ship2.get("Arrival Date") # r["shipment"]["arrival_date"] = ship2.get("Arrival Date")
r["shipment"]["weighing_method"] = ship2.get("Weighing method") # r["shipment"]["weighing_method"] = ship2.get("Weighing method")
r["contract"]["origin"] = ship2.get("Growth") # r["contract"]["origin"] = ship2.get("Growth")
# invoice weights # # invoice weights
inv = self._col_block(L, ["Bales","Gross","Tare","Net"]) # inv = self._col_block(L, ["Bales","Gross","Tare","Net"])
r["weights"]["invoice_net_kg"] = to_float(inv.get("Net")) # r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
# landed weights # # landed weights
land = self._col_block( # land = self._col_block(
self._lines(section(text,"Bales Weighed","Outturn")), # self._lines(section(text,"Bales Weighed","Outturn")),
["Bales","Gross","Tare","Net"] # ["Bales","Gross","Tare","Net"]
) # )
r["weights"]["gross_landed_kg"] = to_float(land.get("Gross")) # r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
r["weights"]["tare_kg"] = to_float(land.get("Tare")) # r["weights"]["tare_kg"] = to_float(land.get("Tare"))
r["weights"]["net_landed_kg"] = to_float(land.get("Net")) # r["weights"]["net_landed_kg"] = to_float(land.get("Net"))
# loss # # loss
loss = section(text,"LOSS","Invoice average") # loss = section(text,"LOSS","Invoice average")
r["weights"]["gain_loss_kg"] = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss)) # r["weights"]["gain_loss_kg"] = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss)) # r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
return r # return r
class IntertekParser: class IntertekParser:
lab="INTERTEK" lab="INTERTEK"
@@ -616,7 +811,7 @@ def empty_weight_report(lab):
"contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None}, "contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None},
"parties": {"seller": None, "buyer": None, "carrier": None}, "parties": {"seller": None, "buyer": None, "carrier": None},
"shipment": { "shipment": {
"vessel": None, "bl_no": None, "port_loading": None, "vessel": None, "bl_no": None, "bl_date": None, "port_loading": None,
"port_destination": None, "arrival_date": None, "port_destination": None, "arrival_date": None,
"weighing_place": None, "weighing_method": None, "weighing_place": None, "weighing_method": None,
"bales": None "bales": None