This commit is contained in:
2026-01-11 21:04:10 +01:00
parent 79bdc8beda
commit 1a1759871d

389
app.py
View File

@@ -11,6 +11,7 @@ import logging
import io
from logging.handlers import RotatingFileHandler
import re
from datetime import datetime
LOG_PATH = "/var/log/automation-service.log"
@@ -24,12 +25,15 @@ file_handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
))
import re
from datetime import datetime
class AHKParser:
lab = "AHK"
def _clean_value(self, value):
"""Nettoie la valeur en supprimant les espaces inutiles"""
if value:
return value.strip()
return value
def parse(self, text):
"""Parse le texte et retourne un dictionnaire structuré"""
result = {
@@ -51,20 +55,20 @@ class AHKParser:
"date": None
}
# Recherche de la référence client
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
# Recherche de la référence client - plus précise
ref_match = re.search(r'Client\s+Reference:\s*(S-\d+\s*/\s*INV\s*\d+)', text)
if ref_match:
report_info["reference"] = ref_match.group(1).strip()
report_info["reference"] = self._clean_value(ref_match.group(1))
# Recherche du numéro de fichier AHK
file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
file_no_match = re.search(r'AHK\s+S/([\w/]+)', text)
if file_no_match:
report_info["file_no"] = file_no_match.group(1).strip()
report_info["file_no"] = self._clean_value(file_no_match.group(1))
# Recherche de la date du rapport
date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
date_match = re.search(r'Signed\s+on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
if date_match:
report_info["date"] = date_match.group(1).strip()
report_info["date"] = self._clean_value(date_match.group(1))
return report_info
@@ -78,21 +82,24 @@ class AHKParser:
"commodity": None
}
# Extraction de la référence client (peut servir comme numéro de contrat)
ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
# Extraction de la référence client
ref_match = re.search(r'Client\s+Ref\s+No\.\s*:\s*([^\n]+)', text)
if ref_match:
ref_parts = ref_match.group(1).split('/')
if len(ref_parts) >= 2:
contract_info["contract_no"] = ref_parts[0].strip()
contract_info["invoice_no"] = ref_parts[1].strip()
ref_text = ref_match.group(1).strip()
# Sépare S-3488 et INV 4013
parts = re.split(r'[/\s]+', ref_text)
for part in parts:
if part.startswith('S-'):
contract_info["contract_no"] = part.strip()
elif part.startswith('INV'):
contract_info["invoice_no"] = part.strip()
# Extraction de l'origine et de la marchandise
origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
if origin_match:
origin_text = origin_match.group(1).strip()
# Extraction de l'origine et de la marchandise - regex plus précise
growth_match = re.search(r'Growth\s*:\s*([A-Z\s]+?)(?=\s*(?:Vessel|$))', text)
if growth_match:
origin_text = growth_match.group(1).strip()
if "AUSTRALIAN" in origin_text.upper():
contract_info["origin"] = "AUSTRALIA"
# La marchandise est généralement "RAW COTTON"
contract_info["commodity"] = "RAW COTTON"
return contract_info
@@ -105,21 +112,20 @@ class AHKParser:
"carrier": None
}
# Extraction du vendeur (Client)
seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
# Extraction du vendeur (Client) - regex plus précise
seller_match = re.search(r'Client\s*:\s*([^\n:]+?)(?=\s*(?:Client Ref|Buyer|$))', text)
if seller_match:
parties_info["seller"] = seller_match.group(1).strip()
parties_info["seller"] = self._clean_value(seller_match.group(1))
# Extraction de l'acheteur (Buyer)
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
# Extraction de l'acheteur (Buyer) - regex plus précise
buyer_match = re.search(r'Buyer\s*:\s*([^\n:]+?)(?=\s*(?:Total Bales|$))', text)
if buyer_match:
parties_info["buyer"] = buyer_match.group(1).strip()
parties_info["buyer"] = self._clean_value(buyer_match.group(1))
# Extraction du transporteur (Vessel)
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
# Extraction du transporteur (nom du navire seulement)
vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|Voy|$))', text)
if vessel_match:
# On considère le nom du navire comme transporteur
parties_info["carrier"] = vessel_match.group(1).strip()
parties_info["carrier"] = self._clean_value(vessel_match.group(1))
return parties_info
@@ -129,48 +135,51 @@ class AHKParser:
"vessel": None,
"bl_no": None,
"bl_date": None,
"port_loading": None, # Non spécifié dans le texte
"port_loading": None,
"port_destination": None,
"arrival_date": None,
"weighing_place": None, # Non spécifié dans le texte
"weighing_place": None,
"weighing_method": None,
"bales": None
}
# Extraction du navire
vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
# Extraction du navire (nom seulement)
vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|Voy|$))', text)
if vessel_match:
shipment_info["vessel"] = vessel_match.group(1).strip()
shipment_info["vessel"] = self._clean_value(vessel_match.group(1))
# Extraction du numéro de connaissement
bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
# Extraction du numéro de connaissement (seulement le numéro)
bl_no_match = re.search(r'B/L\s+No\.\s*:\s*([A-Z0-9]+)(?=\s|$)', text)
if bl_no_match:
shipment_info["bl_no"] = bl_no_match.group(1).strip()
shipment_info["bl_no"] = self._clean_value(bl_no_match.group(1))
# Extraction de la date du connaissement
bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
bl_date_match = re.search(r'B/L\s+Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})(?=\s|$)', text)
if bl_date_match:
shipment_info["bl_date"] = bl_date_match.group(1).strip()
shipment_info["bl_date"] = self._clean_value(bl_date_match.group(1))
# Extraction du port de destination
dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
# Extraction du port de destination (sans le "Tare")
dest_match = re.search(r'Destination\s*:\s*([A-Z,\s]+?)(?=\s*(?:Tare|$))', text)
if dest_match:
shipment_info["port_destination"] = dest_match.group(1).strip()
shipment_info["port_destination"] = self._clean_value(dest_match.group(1))
# Extraction de la date d'arrivée
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
arrival_match = re.search(r'Arrival\s+Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})(?=\s|$)', text)
if arrival_match:
shipment_info["arrival_date"] = arrival_match.group(1).strip()
shipment_info["arrival_date"] = self._clean_value(arrival_match.group(1))
# Extraction de la méthode de pesée
weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
weighing_method_match = re.search(r'Weighing\s+method\s*:\s*([^\n]+?)(?=\s*(?:Tare|$))', text)
if weighing_method_match:
shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
shipment_info["weighing_method"] = self._clean_value(weighing_method_match.group(1))
# Extraction du nombre de balles
bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
bales_match = re.search(r'Total\s+Bales\s*:\s*(\d+)(?=\s|$)', text)
if bales_match:
try:
shipment_info["bales"] = int(bales_match.group(1).strip())
except ValueError:
shipment_info["bales"] = None
return shipment_info
@@ -185,112 +194,244 @@ class AHKParser:
"gain_loss_percent": None
}
# Extraction du poids brut débarqué
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
# Extraction du poids brut débarqué (corrigé - doit être 100580 kg)
gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.,]+)\s*kg', text)
if gross_landed_match:
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
try:
weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).replace(',', '').strip())
except ValueError:
pass
# Extraction du poids de tare
tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
tare_match = re.search(r'Tare\s*:\s*([\d.,]+)\s*kg', text)
if tare_match:
weights_info["tare_kg"] = float(tare_match.group(1).strip())
try:
weights_info["tare_kg"] = float(tare_match.group(1).replace(',', '').strip())
except ValueError:
pass
# Extraction du poids net débarqué
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
# Extraction du poids net débarqué (corrigé - doit être 100078.40 kg)
net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.,]+)\s*kg', text)
if net_landed_match:
weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
try:
weights_info["net_landed_kg"] = float(net_landed_match.group(1).replace(',', '').strip())
except ValueError:
pass
# Extraction du poids net facturé
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
# Extraction du poids net facturé (101299 kg)
invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.,]+)\s*kg', text)
if invoice_net_match:
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
try:
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).replace(',', '').strip())
except ValueError:
pass
# Extraction de la perte en kg
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.,]+)\s*kg', text)
if loss_match:
weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
try:
weights_info["gain_loss_kg"] = -float(loss_match.group(1).replace(',', '').strip())
except ValueError:
pass
# Extraction du pourcentage de perte
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.,]+)%', text)
if percent_match:
weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
try:
weights_info["gain_loss_percent"] = -float(percent_match.group(1).replace(',', '').strip())
except ValueError:
pass
return weights_info
# class AHKParser:
# lab="AHK"
# def _lines(self, text):
# return [l.strip() for l in text.splitlines() if l.strip()]
# def _col_block(self, lines, labels, max_scan=30):
# idx = [i for i,l in enumerate(lines) if l in labels]
# if not idx:
# return {} # << empêche le crash
# start = max(idx) + 1
# vals = []
# for l in lines[start:start+max_scan]:
# if l.startswith(":"):
# v = l[1:].replace("kg","").strip()
# vals.append(v)
# if len(vals) == len(labels):
# break
# return dict(zip(labels, vals))
# def parse(self, text):
# L = self._lines(text)
# r = empty_weight_report("AHK")
# """Parse le texte et retourne un dictionnaire structuré"""
# result = {
# "lab": self.lab,
# "report": self._extract_report_info(text),
# "contract": self._extract_contract_info(text),
# "parties": self._extract_parties_info(text),
# "shipment": self._extract_shipment_info(text),
# "weights": self._extract_weights_info(text)
# }
# self.data = result
# return result
# # report
# r["report"]["reference"] = safe_search(r"(AHK\s*/[A-Z0-9/]+)", text)
# r["report"]["date"] = safe_search(r"Produced On\s*([0-9A-Za-z ]+)", text)
# def _extract_report_info(self, text):
# """Extrait les informations du rapport"""
# report_info = {
# "reference": None,
# "file_no": None,
# "date": None
# }
# # contract
# r["contract"]["invoice_no"] = safe_search(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
# r["contract"]["commodity"] = "Raw Cotton"
# # Recherche de la référence client
# ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
# if ref_match:
# report_info["reference"] = ref_match.group(1).strip()
# # buyer
# r["parties"]["buyer"] = safe_search(r"Buyer:\s*([A-Z0-9 ().,-]+)", text)
# # Recherche du numéro de fichier AHK
# file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
# if file_no_match:
# report_info["file_no"] = file_no_match.group(1).strip()
# # shipment block 1
# ship1 = self._col_block(L, [
# "Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination"
# ])
# # Recherche de la date du rapport
# date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
# if date_match:
# report_info["date"] = date_match.group(1).strip()
# # shipment block 2
# ship2 = self._col_block(L, [
# "Growth","Arrival Date","First date of weighing",
# "Last Date of Weighing","Weighing method","Tare"
# ])
# return report_info
# r["shipment"]["bales"] = to_float(ship1.get("Total Bales"))
# r["shipment"]["vessel"] = ship1.get("Vessel")
# r["shipment"]["bl_no"] = ship1.get("B/L No.")
# r["shipment"]["port_destination"] = ship1.get("Destination")
# r["shipment"]["arrival_date"] = ship2.get("Arrival Date")
# r["shipment"]["weighing_method"] = ship2.get("Weighing method")
# r["contract"]["origin"] = ship2.get("Growth")
# def _extract_contract_info(self, text):
# """Extrait les informations du contrat"""
# contract_info = {
# "contract_no": None,
# "invoice_no": None,
# "lc_no": None,
# "origin": None,
# "commodity": None
# }
# # invoice weights
# inv = self._col_block(L, ["Bales","Gross","Tare","Net"])
# r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
# # Extraction de la référence client (peut servir comme numéro de contrat)
# ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
# if ref_match:
# ref_parts = ref_match.group(1).split('/')
# if len(ref_parts) >= 2:
# contract_info["contract_no"] = ref_parts[0].strip()
# contract_info["invoice_no"] = ref_parts[1].strip()
# # landed weights
# land = self._col_block(
# self._lines(section(text,"Bales Weighed","Outturn")),
# ["Bales","Gross","Tare","Net"]
# )
# # Extraction de l'origine et de la marchandise
# origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
# if origin_match:
# origin_text = origin_match.group(1).strip()
# if "AUSTRALIAN" in origin_text.upper():
# contract_info["origin"] = "AUSTRALIA"
# # La marchandise est généralement "RAW COTTON"
# contract_info["commodity"] = "RAW COTTON"
# r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
# r["weights"]["tare_kg"] = to_float(land.get("Tare"))
# r["weights"]["net_landed_kg"] = to_float(land.get("Net"))
# return contract_info
# # loss
# loss = section(text,"LOSS","Invoice average")
# r["weights"]["gain_loss_kg"] = to_float(safe_search(r"(-?\d+\.?\d*)\s*kg", loss))
# r["weights"]["gain_loss_percent"] = to_float(safe_search(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
# def _extract_parties_info(self, text):
# """Extrait les informations sur les parties"""
# parties_info = {
# "seller": None,
# "buyer": None,
# "carrier": None
# }
# return r
# # Extraction du vendeur (Client)
# seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
# if seller_match:
# parties_info["seller"] = seller_match.group(1).strip()
# # Extraction de l'acheteur (Buyer)
# buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
# if buyer_match:
# parties_info["buyer"] = buyer_match.group(1).strip()
# # Extraction du transporteur (Vessel)
# vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
# if vessel_match:
# # On considère le nom du navire comme transporteur
# parties_info["carrier"] = vessel_match.group(1).strip()
# return parties_info
# def _extract_shipment_info(self, text):
# """Extrait les informations d'expédition"""
# shipment_info = {
# "vessel": None,
# "bl_no": None,
# "bl_date": None,
# "port_loading": None, # Non spécifié dans le texte
# "port_destination": None,
# "arrival_date": None,
# "weighing_place": None, # Non spécifié dans le texte
# "weighing_method": None,
# "bales": None
# }
# # Extraction du navire
# vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
# if vessel_match:
# shipment_info["vessel"] = vessel_match.group(1).strip()
# # Extraction du numéro de connaissement
# bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
# if bl_no_match:
# shipment_info["bl_no"] = bl_no_match.group(1).strip()
# # Extraction de la date du connaissement
# bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
# if bl_date_match:
# shipment_info["bl_date"] = bl_date_match.group(1).strip()
# # Extraction du port de destination
# dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
# if dest_match:
# shipment_info["port_destination"] = dest_match.group(1).strip()
# # Extraction de la date d'arrivée
# arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
# if arrival_match:
# shipment_info["arrival_date"] = arrival_match.group(1).strip()
# # Extraction de la méthode de pesée
# weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
# if weighing_method_match:
# shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
# # Extraction du nombre de balles
# bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
# if bales_match:
# shipment_info["bales"] = int(bales_match.group(1).strip())
# return shipment_info
# def _extract_weights_info(self, text):
# """Extrait les informations de poids"""
# weights_info = {
# "gross_landed_kg": None,
# "tare_kg": None,
# "net_landed_kg": None,
# "invoice_net_kg": None,
# "gain_loss_kg": None,
# "gain_loss_percent": None
# }
# # Extraction du poids brut débarqué
# gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
# if gross_landed_match:
# weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
# # Extraction du poids de tare
# tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
# if tare_match:
# weights_info["tare_kg"] = float(tare_match.group(1).strip())
# # Extraction du poids net débarqué
# net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
# if net_landed_match:
# weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
# # Extraction du poids net facturé
# invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
# if invoice_net_match:
# weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
# # Extraction de la perte en kg
# loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
# if loss_match:
# weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
# # Extraction du pourcentage de perte
# percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
# if percent_match:
# weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
# return weights_info
class IntertekParser:
lab="INTERTEK"