11.01.26
This commit is contained in:
494
app.py
494
app.py
@@ -243,326 +243,227 @@ class AHKParser:
|
||||
pass
|
||||
|
||||
return weights_info
|
||||
# class AHKParser:
|
||||
# lab="AHK"
|
||||
|
||||
# def parse(self, text):
|
||||
# """Parse le texte et retourne un dictionnaire structuré"""
|
||||
# result = {
|
||||
# "lab": self.lab,
|
||||
# "report": self._extract_report_info(text),
|
||||
# "contract": self._extract_contract_info(text),
|
||||
# "parties": self._extract_parties_info(text),
|
||||
# "shipment": self._extract_shipment_info(text),
|
||||
# "weights": self._extract_weights_info(text)
|
||||
# }
|
||||
# self.data = result
|
||||
# return result
|
||||
|
||||
# def _extract_report_info(self, text):
|
||||
# """Extrait les informations du rapport"""
|
||||
# report_info = {
|
||||
# "reference": None,
|
||||
# "file_no": None,
|
||||
# "date": None
|
||||
# }
|
||||
|
||||
# # Recherche de la référence client
|
||||
# ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
|
||||
# if ref_match:
|
||||
# report_info["reference"] = ref_match.group(1).strip()
|
||||
|
||||
# # Recherche du numéro de fichier AHK
|
||||
# file_no_match = re.search(r'AHK\s*S/([\w/]+)', text)
|
||||
# if file_no_match:
|
||||
# report_info["file_no"] = file_no_match.group(1).strip()
|
||||
|
||||
# # Recherche de la date du rapport
|
||||
# date_match = re.search(r'Signed on\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||
# if date_match:
|
||||
# report_info["date"] = date_match.group(1).strip()
|
||||
|
||||
# return report_info
|
||||
|
||||
# def _extract_contract_info(self, text):
|
||||
# """Extrait les informations du contrat"""
|
||||
# contract_info = {
|
||||
# "contract_no": None,
|
||||
# "invoice_no": None,
|
||||
# "lc_no": None,
|
||||
# "origin": None,
|
||||
# "commodity": None
|
||||
# }
|
||||
|
||||
# # Extraction de la référence client (peut servir comme numéro de contrat)
|
||||
# ref_match = re.search(r'Client Reference:\s*(S-\d+/\s*INV\s*\d+)', text)
|
||||
# if ref_match:
|
||||
# ref_parts = ref_match.group(1).split('/')
|
||||
# if len(ref_parts) >= 2:
|
||||
# contract_info["contract_no"] = ref_parts[0].strip()
|
||||
# contract_info["invoice_no"] = ref_parts[1].strip()
|
||||
|
||||
# # Extraction de l'origine et de la marchandise
|
||||
# origin_match = re.search(r'Growth\s*:\s*([\w\s]+)', text)
|
||||
# if origin_match:
|
||||
# origin_text = origin_match.group(1).strip()
|
||||
# if "AUSTRALIAN" in origin_text.upper():
|
||||
# contract_info["origin"] = "AUSTRALIA"
|
||||
# # La marchandise est généralement "RAW COTTON"
|
||||
# contract_info["commodity"] = "RAW COTTON"
|
||||
|
||||
# return contract_info
|
||||
|
||||
# def _extract_parties_info(self, text):
|
||||
# """Extrait les informations sur les parties"""
|
||||
# parties_info = {
|
||||
# "seller": None,
|
||||
# "buyer": None,
|
||||
# "carrier": None
|
||||
# }
|
||||
|
||||
# # Extraction du vendeur (Client)
|
||||
# seller_match = re.search(r'Client\s*:\s*([^\n]+)', text)
|
||||
# if seller_match:
|
||||
# parties_info["seller"] = seller_match.group(1).strip()
|
||||
|
||||
# # Extraction de l'acheteur (Buyer)
|
||||
# buyer_match = re.search(r'Buyer\s*:\s*([^\n]+)', text)
|
||||
# if buyer_match:
|
||||
# parties_info["buyer"] = buyer_match.group(1).strip()
|
||||
|
||||
# # Extraction du transporteur (Vessel)
|
||||
# vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
|
||||
# if vessel_match:
|
||||
# # On considère le nom du navire comme transporteur
|
||||
# parties_info["carrier"] = vessel_match.group(1).strip()
|
||||
|
||||
# return parties_info
|
||||
|
||||
# def _extract_shipment_info(self, text):
|
||||
# """Extrait les informations d'expédition"""
|
||||
# shipment_info = {
|
||||
# "vessel": None,
|
||||
# "bl_no": None,
|
||||
# "bl_date": None,
|
||||
# "port_loading": None, # Non spécifié dans le texte
|
||||
# "port_destination": None,
|
||||
# "arrival_date": None,
|
||||
# "weighing_place": None, # Non spécifié dans le texte
|
||||
# "weighing_method": None,
|
||||
# "bales": None
|
||||
# }
|
||||
|
||||
# # Extraction du navire
|
||||
# vessel_match = re.search(r'Vessel\s*:\s*([^\n]+)', text)
|
||||
# if vessel_match:
|
||||
# shipment_info["vessel"] = vessel_match.group(1).strip()
|
||||
|
||||
# # Extraction du numéro de connaissement
|
||||
# bl_no_match = re.search(r'B/L No\.\s*:\s*([^\n]+)', text)
|
||||
# if bl_no_match:
|
||||
# shipment_info["bl_no"] = bl_no_match.group(1).strip()
|
||||
|
||||
# # Extraction de la date du connaissement
|
||||
# bl_date_match = re.search(r'B/L Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||
# if bl_date_match:
|
||||
# shipment_info["bl_date"] = bl_date_match.group(1).strip()
|
||||
|
||||
# # Extraction du port de destination
|
||||
# dest_match = re.search(r'Destination\s*:\s*([^\n]+)', text)
|
||||
# if dest_match:
|
||||
# shipment_info["port_destination"] = dest_match.group(1).strip()
|
||||
|
||||
# # Extraction de la date d'arrivée
|
||||
# arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}-[A-Za-z]{3}-\d{4})', text)
|
||||
# if arrival_match:
|
||||
# shipment_info["arrival_date"] = arrival_match.group(1).strip()
|
||||
|
||||
# # Extraction de la méthode de pesée
|
||||
# weighing_method_match = re.search(r'Weighing method\s*:\s*([^\n]+)', text)
|
||||
# if weighing_method_match:
|
||||
# shipment_info["weighing_method"] = weighing_method_match.group(1).strip()
|
||||
|
||||
# # Extraction du nombre de balles
|
||||
# bales_match = re.search(r'Total Bales\s*:\s*(\d+)', text)
|
||||
# if bales_match:
|
||||
# shipment_info["bales"] = int(bales_match.group(1).strip())
|
||||
|
||||
# return shipment_info
|
||||
|
||||
# def _extract_weights_info(self, text):
|
||||
# """Extrait les informations de poids"""
|
||||
# weights_info = {
|
||||
# "gross_landed_kg": None,
|
||||
# "tare_kg": None,
|
||||
# "net_landed_kg": None,
|
||||
# "invoice_net_kg": None,
|
||||
# "gain_loss_kg": None,
|
||||
# "gain_loss_percent": None
|
||||
# }
|
||||
|
||||
# # Extraction du poids brut débarqué
|
||||
# gross_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Gross\s*:\s*([\d.]+)\s*kg', text)
|
||||
# if gross_landed_match:
|
||||
# weights_info["gross_landed_kg"] = float(gross_landed_match.group(1).strip())
|
||||
|
||||
# # Extraction du poids de tare
|
||||
# tare_match = re.search(r'Tare\s*:\s*([\d.]+)\s*kg', text)
|
||||
# if tare_match:
|
||||
# weights_info["tare_kg"] = float(tare_match.group(1).strip())
|
||||
|
||||
# # Extraction du poids net débarqué
|
||||
# net_landed_match = re.search(r'LANDED WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
|
||||
# if net_landed_match:
|
||||
# weights_info["net_landed_kg"] = float(net_landed_match.group(1).strip())
|
||||
|
||||
# # Extraction du poids net facturé
|
||||
# invoice_net_match = re.search(r'INVOICE WEIGHTS[\s\S]*?Net\s*:\s*([\d.]+)\s*kg', text)
|
||||
# if invoice_net_match:
|
||||
# weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).strip())
|
||||
|
||||
# # Extraction de la perte en kg
|
||||
# loss_match = re.search(r'LOSS\s*:\s*-\s*([\d.]+)\s*kg', text)
|
||||
# if loss_match:
|
||||
# weights_info["gain_loss_kg"] = -float(loss_match.group(1).strip())
|
||||
|
||||
# # Extraction du pourcentage de perte
|
||||
# percent_match = re.search(r'Percentage\s*:\s*-\s*([\d.]+)%', text)
|
||||
# if percent_match:
|
||||
# weights_info["gain_loss_percent"] = -float(percent_match.group(1).strip())
|
||||
|
||||
# return weights_info
|
||||
import re
|
||||
|
||||
class IntertekParser:
|
||||
lab="INTERTEK"
|
||||
lab = "Intertek"
|
||||
|
||||
def _clean_value(self, value):
|
||||
"""Nettoie la valeur en supprimant les espaces inutiles"""
|
||||
if value:
|
||||
return value.strip()
|
||||
return value
|
||||
|
||||
def _extract_number(self, text, pattern, is_int=False):
|
||||
"""Extrait un nombre (int ou float) du texte selon un pattern regex"""
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
try:
|
||||
# Nettoie la chaîne numérique
|
||||
num_str = match.group(1).replace(',', '').replace(' ', '').strip()
|
||||
if is_int:
|
||||
return int(num_str)
|
||||
else:
|
||||
return float(num_str)
|
||||
except (ValueError, AttributeError):
|
||||
return None
|
||||
return None
|
||||
|
||||
def parse(self, text):
|
||||
r=empty_weight_report("INTERTEK")
|
||||
pct=safe_search(r"([0-9.]+)\s*%",text)
|
||||
"""Parse le texte et retourne un dictionnaire structuré"""
|
||||
result = {
|
||||
"lab": self.lab,
|
||||
"report": self._extract_report_info(text),
|
||||
"contract": self._extract_contract_info(text),
|
||||
"parties": self._extract_parties_info(text),
|
||||
"shipment": self._extract_shipment_info(text),
|
||||
"weights": self._extract_weights_info(text)
|
||||
}
|
||||
return result
|
||||
|
||||
r["report"]["reference"]=extract("Global Ref",text)
|
||||
r["report"]["file_no"]=extract("Report / File No",text)
|
||||
r["report"]["date"]=extract("Dated",text)
|
||||
def _extract_report_info(self, text):
|
||||
"""Extrait les informations du rapport"""
|
||||
report_info = {
|
||||
"reference": None,
|
||||
"file_no": None,
|
||||
"date": None
|
||||
}
|
||||
|
||||
r["contract"]["contract_no"]=extract("Contract No",text)
|
||||
r["contract"]["invoice_no"]=extract("Invoice No",text)
|
||||
r["contract"]["origin"]=extract("Growth",text)
|
||||
r["contract"]["commodity"]="Raw Cotton"
|
||||
# Recherche de la référence globale
|
||||
ref_match = re.search(r'Global Ref\s*:\s*(GLO-\d+-[A-Z]+)', text)
|
||||
if ref_match:
|
||||
report_info["reference"] = self._clean_value(ref_match.group(1))
|
||||
|
||||
r["parties"]["buyer"]=extract("Buyer",text)
|
||||
# Recherche du numéro de fichier
|
||||
file_no_match = re.search(r'Report\s*/\s*File No\s*:\s*([A-Z]+-AGR\d+-?)', text)
|
||||
if file_no_match:
|
||||
report_info["file_no"] = self._clean_value(file_no_match.group(1))
|
||||
|
||||
r["shipment"]["vessel"]=extract("Vessel",text)
|
||||
r["shipment"]["bl_no"]=extract("B/L No",text)
|
||||
r["shipment"]["arrival_date"]=extract("Arrival Date",text)
|
||||
r["shipment"]["weighing_place"]=extract("Weighed at",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Invoice Quantity",text))
|
||||
# Recherche de la date du rapport
|
||||
date_match = re.search(r'Dated\s*:\s*(\d{1,2}\.\d{1,2}\.\d{4})', text)
|
||||
if date_match:
|
||||
report_info["date"] = self._clean_value(date_match.group(1))
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Gross",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Invoice Tare",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Landed Weight",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Invoice Weight",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(extract("Gain",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(pct)
|
||||
return r
|
||||
return report_info
|
||||
|
||||
class RobertsonParser:
|
||||
lab="ROBERTSON"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("ROBERTSON")
|
||||
pct=safe_search(r"([0-9.]+)\s*%",text)
|
||||
def _extract_contract_info(self, text):
|
||||
"""Extrait les informations du contrat"""
|
||||
contract_info = {
|
||||
"contract_no": None,
|
||||
"invoice_no": None,
|
||||
"lc_no": None, # Non présent dans ce rapport
|
||||
"origin": None,
|
||||
"commodity": None
|
||||
}
|
||||
|
||||
r["report"]["reference"]=extract("OUR REF",text)
|
||||
r["report"]["date"]=extract("DATE",text)
|
||||
# Extraction du numéro de contrat
|
||||
contract_match = re.search(r'Contract No\s*:\s*([A-Z]?-\d+)', text)
|
||||
if contract_match:
|
||||
contract_info["contract_no"] = self._clean_value(contract_match.group(1))
|
||||
|
||||
r["contract"]["contract_no"]=extract("CONTRACT NO",text)
|
||||
r["contract"]["invoice_no"]=extract("INVOICE NO",text)
|
||||
r["contract"]["lc_no"]=extract("LIC NO",text)
|
||||
r["contract"]["commodity"]="Raw Cotton"
|
||||
# Extraction du numéro de facture
|
||||
invoice_match = re.search(r'Invoice No\s*:\s*(\d+)', text)
|
||||
if invoice_match:
|
||||
contract_info["invoice_no"] = self._clean_value(invoice_match.group(1))
|
||||
|
||||
r["parties"]["seller"]=extract("SELLER",text)
|
||||
r["parties"]["buyer"]=extract("BUYER",text)
|
||||
# Extraction de l'origine et de la marchandise
|
||||
growth_match = re.search(r'Growth\s*:\s*([A-Z\s]+)(?=\s*Shipper|\n|$)', text)
|
||||
if growth_match:
|
||||
origin_text = growth_match.group(1).strip()
|
||||
if "GREECE" in origin_text.upper():
|
||||
contract_info["origin"] = "GREECE"
|
||||
contract_info["commodity"] = "RAW COTTON"
|
||||
|
||||
r["shipment"]["vessel"]=extract("NAME OF VESSEL",text)
|
||||
r["shipment"]["port_loading"]=extract("SAILED FROM",text)
|
||||
r["shipment"]["port_destination"]=extract("ARRIVED AT",text)
|
||||
r["shipment"]["arrival_date"]=extract("DATE OF ARRIVAL",text)
|
||||
r["shipment"]["weighing_place"]=extract("PLACE OF CONTROL",text)
|
||||
r["shipment"]["bales"]=to_float(extract("CONSIGNMENT",text))
|
||||
return contract_info
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("GROSS",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("TARE",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("LANDED NET",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("INVOICE NET",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(extract("GAIN",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(pct)
|
||||
return r
|
||||
def _extract_parties_info(self, text):
|
||||
"""Extrait les informations sur les parties"""
|
||||
parties_info = {
|
||||
"seller": None,
|
||||
"buyer": None,
|
||||
"carrier": None
|
||||
}
|
||||
|
||||
class SGSParser:
|
||||
lab="SGS"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("SGS")
|
||||
r["report"]["reference"]=extract("LANDING REPORT No",text)
|
||||
r["report"]["file_no"]=extract("FILE NO.",text)
|
||||
r["report"]["date"]=extract("DATE",text)
|
||||
# Extraction du vendeur (Shipper)
|
||||
seller_match = re.search(r'Shipper\s*:\s*([^\n]+?)(?=\s*(?:Buyer|$))', text)
|
||||
if seller_match:
|
||||
parties_info["seller"] = self._clean_value(seller_match.group(1))
|
||||
|
||||
r["contract"]["contract_no"]=extract("CONTRACT NO.",text)
|
||||
r["contract"]["invoice_no"]=extract("INVOICE NO.",text)
|
||||
r["contract"]["origin"]=extract("ORIGIN",text)
|
||||
r["contract"]["commodity"]=extract("PRODUCT",text)
|
||||
# Extraction de l'acheteur (Buyer)
|
||||
buyer_match = re.search(r'Buyer\s*:\s*([^\n]+?)(?=\s*(?:CONTAINER|TOTAL|$))', text)
|
||||
if buyer_match:
|
||||
parties_info["buyer"] = self._clean_value(buyer_match.group(1))
|
||||
|
||||
r["parties"]["seller"]=extract("Seller",text)
|
||||
r["parties"]["buyer"]=extract("Buyer",text)
|
||||
r["parties"]["carrier"]=extract("Carrier",text)
|
||||
# Extraction du transporteur (nom du navire seulement)
|
||||
vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|$))', text)
|
||||
if vessel_match:
|
||||
parties_info["carrier"] = self._clean_value(vessel_match.group(1))
|
||||
|
||||
r["shipment"]["bl_no"]=extract("B/L no.",text)
|
||||
r["shipment"]["port_loading"]=extract("Port of loading",text)
|
||||
r["shipment"]["port_destination"]=extract("Port of destination",text)
|
||||
r["shipment"]["arrival_date"]=extract("Vessel arrival date",text)
|
||||
r["shipment"]["weighing_place"]=extract("Place of weighing",text)
|
||||
r["shipment"]["weighing_method"]=extract("Weighing mode",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Quantity arrived",text))
|
||||
return parties_info
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Gross landed",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Tare",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Net landed",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Net invoiced",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%",text))
|
||||
return r
|
||||
def _extract_shipment_info(self, text):
|
||||
"""Extrait les informations d'expédition"""
|
||||
shipment_info = {
|
||||
"vessel": None,
|
||||
"bl_no": None,
|
||||
"bl_date": None, # Non présent dans ce rapport
|
||||
"port_loading": None, # Non présent dans ce rapport
|
||||
"port_destination": None, # Non présent dans ce rapport
|
||||
"arrival_date": None,
|
||||
"weighing_place": None,
|
||||
"weighing_method": None,
|
||||
"bales": None
|
||||
}
|
||||
|
||||
class PICLParser:
|
||||
lab="PICL"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("PICL")
|
||||
# Extraction du navire
|
||||
vessel_match = re.search(r'Vessel\s*:\s*([A-Z\s]+?)(?=\s*(?:Arrival|$))', text)
|
||||
if vessel_match:
|
||||
shipment_info["vessel"] = self._clean_value(vessel_match.group(1))
|
||||
|
||||
r["report"]["reference"]=safe_search(r"No[:\s]+([A-Z0-9\-]+)",text)
|
||||
r["report"]["date"]=safe_search(r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})",text,group_index=2)
|
||||
# Extraction du numéro de connaissement
|
||||
bl_no_match = re.search(r'B/L\s+No\.\s*:\s*([A-Z0-9]+)', text)
|
||||
if bl_no_match:
|
||||
shipment_info["bl_no"] = self._clean_value(bl_no_match.group(1))
|
||||
|
||||
r["contract"]["contract_no"]=extract("Contract/Pl No & Date",text)
|
||||
r["contract"]["invoice_no"]=extract("Invoice ilo & Date",text)
|
||||
r["contract"]["lc_no"]=extract("L/C No & Date",text)
|
||||
r["contract"]["origin"]=extract("Country of Origin",text)
|
||||
r["contract"]["commodity"]=extract("Commodity",text)
|
||||
# Extraction de la date d'arrivée
|
||||
arrival_match = re.search(r'Arrival Date\s*:\s*(\d{1,2}\.\d{1,2}\.\d{4})', text)
|
||||
if arrival_match:
|
||||
shipment_info["arrival_date"] = self._clean_value(arrival_match.group(1))
|
||||
|
||||
r["parties"]["seller"]=extract("FAIRCOT SA",text)
|
||||
r["parties"]["buyer"]=extract("M/S.",text)
|
||||
r["parties"]["carrier"]=extract("Shipping Agent",text)
|
||||
# Extraction du lieu de pesée
|
||||
weighing_place_match = re.search(r'Weighed at\s*:\s*([^\n]+?)(?=\s*(?:Vessel|$))', text)
|
||||
if weighing_place_match:
|
||||
shipment_info["weighing_place"] = self._clean_value(weighing_place_match.group(1))
|
||||
|
||||
r["shipment"]["vessel"]=extract("Shipped Per Vessel",text)
|
||||
r["shipment"]["bl_no"]=extract("B/L No & Date",text)
|
||||
r["shipment"]["port_loading"]=extract("Port of Loading",text)
|
||||
r["shipment"]["port_destination"]=extract("Port of Discharge",text)
|
||||
r["shipment"]["arrival_date"]=extract("Date of Anival & LDL",text)
|
||||
r["shipment"]["weighing_place"]=extract("Place & Date of Weighment",text)
|
||||
r["shipment"]["weighing_method"]=extract("Method of Weighment",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Grand Total",text))
|
||||
# Extraction de la méthode de pesée
|
||||
# Recherche dans les remarques
|
||||
remarks_section = re.search(r'REMARKS\s*(.+?)(?=ISSUED BY|$)', text, re.DOTALL | re.IGNORECASE)
|
||||
if remarks_section:
|
||||
remarks_text = remarks_section.group(1)
|
||||
if "weighbridge" in remarks_text.lower():
|
||||
shipment_info["weighing_method"] = "Weighbridge weighing by empty/full truck"
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Total;",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Tare Weight",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Grand Total",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Invoice weight",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(safe_search(r"(-[0-9.,]+)\s*KGS",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)",text))
|
||||
return r
|
||||
# Extraction du nombre de balles (à partir du total)
|
||||
bales_match = re.search(r'TOTAL\s+(\d{1,4}(?:,\d{3})?)\s+[\d,]+\.\d{2}', text)
|
||||
if not bales_match:
|
||||
# Essayons une autre approche
|
||||
bales_match = re.search(r'Invoice Quantity\s*:\s*(\d+)\s+Bales', text)
|
||||
|
||||
if bales_match:
|
||||
try:
|
||||
bales_str = bales_match.group(1).replace(',', '').strip()
|
||||
shipment_info["bales"] = int(bales_str)
|
||||
except ValueError:
|
||||
shipment_info["bales"] = None
|
||||
|
||||
return shipment_info
|
||||
|
||||
def _extract_weights_info(self, text):
|
||||
"""Extrait les informations de poids"""
|
||||
weights_info = {
|
||||
"gross_landed_kg": None,
|
||||
"tare_kg": None,
|
||||
"net_landed_kg": None,
|
||||
"invoice_net_kg": None,
|
||||
"gain_loss_kg": None,
|
||||
"gain_loss_percent": None
|
||||
}
|
||||
|
||||
# Extraction du poids brut débarqué
|
||||
gross_match = re.search(r'Gross Landed Weight\s*:\s*([\d,]+\.\d{2})\s*kgs', text)
|
||||
if gross_match:
|
||||
weights_info["gross_landed_kg"] = float(gross_match.group(1).replace(',', ''))
|
||||
|
||||
# Extraction du poids de tare
|
||||
tare_match = re.search(r'Invoice Tare\s*:\s*([\d,]+\.\d{2})\s*Kgs', text)
|
||||
if tare_match:
|
||||
weights_info["tare_kg"] = float(tare_match.group(1).replace(',', ''))
|
||||
|
||||
# Extraction du poids net débarqué
|
||||
net_landed_match = re.search(r'Net Landed Weight\s*:\s*([\d,]+\.\d{2})\s*Kgs', text)
|
||||
if net_landed_match:
|
||||
weights_info["net_landed_kg"] = float(net_landed_match.group(1).replace(',', ''))
|
||||
|
||||
# Extraction du poids net facturé
|
||||
invoice_net_match = re.search(r'Net Invoice Weight\s*:\s*([\d,]+\.\d{2})\s*Kgs', text)
|
||||
if invoice_net_match:
|
||||
weights_info["invoice_net_kg"] = float(invoice_net_match.group(1).replace(',', ''))
|
||||
|
||||
# Extraction du gain en kg
|
||||
gain_match = re.search(r'Gain\s+([\d,]+\.\d{2})\s*Kgs', text)
|
||||
if gain_match:
|
||||
weights_info["gain_loss_kg"] = float(gain_match.group(1).replace(',', ''))
|
||||
|
||||
# Extraction du pourcentage de gain (0.4% dans le tableau)
|
||||
percent_match = re.search(r'TOTAL\s+\d+\s+[\d,]+\.\d{2}\s+([\d.]+)%', text)
|
||||
if percent_match:
|
||||
try:
|
||||
weights_info["gain_loss_percent"] = float(percent_match.group(1))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return weights_info
|
||||
|
||||
# Configure root logger explicitly
|
||||
root = logging.getLogger()
|
||||
@@ -937,10 +838,7 @@ async def parse_endpoint(text: str = Body(..., embed=True)):
|
||||
|
||||
PARSERS = {
|
||||
"AHK": AHKParser(),
|
||||
"INTERTEK": IntertekParser(),
|
||||
"ROBERTSON": RobertsonParser(),
|
||||
"SGS": SGSParser(),
|
||||
"PICL": PICLParser()
|
||||
"INTERTEK": IntertekParser()
|
||||
}
|
||||
|
||||
def empty_weight_report(lab):
|
||||
|
||||
Reference in New Issue
Block a user