This commit is contained in:
2026-01-10 18:53:09 +01:00
parent e6e0d98593
commit 13c599ac0b

323
app.py
View File

@@ -24,174 +24,169 @@ file_handler.setFormatter(logging.Formatter(
)) ))
class AHKParser: class AHKParser:
lab = "AHK" lab="AHK"
def parse(self,text):
r=empty_weight_report("AHK")
inv=section(text,"INVOICE WEIGHTS","Bales Weighed")
land=section(text,"Bales Weighed","Outturn")
loss=section(text,"LOSS","Invoice average")
def parse(self, text): r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text)
invoice_block = section(text, "INVOICE WEIGHTS", "Bales Weighed") r["report"]["date"]=extract("Produced On",text)
landed_block = section(text, "Bales Weighed", "Outturn")
loss_block = section(text, "LOSS", "Invoice average")
return { r["contract"]["invoice_no"]=extract("Client Reference",text)
"report": { r["contract"]["origin"]=extract("Growth",text)
"lab": "AHK", r["contract"]["commodity"]="Raw Cotton"
"reference": safe_search(r"(AHK\s*/\S+)", text, default=None, context="AHK reference"),
"date": extract("Produced On", text) r["parties"]["seller"]=extract("Client",text)
}, r["parties"]["buyer"]=extract("Buyer",text)
"shipment": {
"bales": to_float(extract("Total Bales", text)), r["shipment"]["vessel"]=extract("Vessel",text)
"vessel": extract("Vessel", text), r["shipment"]["bl_no"]=extract("B/L No",text)
"bl": extract("B/L No", text), r["shipment"]["port_destination"]=extract("Destination",text)
"arrival_date": extract("Arrival Date", text) r["shipment"]["arrival_date"]=extract("Arrival Date",text)
}, r["shipment"]["weighing_method"]=extract("Weighing method",text)
"weights": { r["shipment"]["bales"]=to_float(extract("Total Bales",text))
"invoice_kg": to_float(extract("Net", invoice_block)),
"landed_kg": to_float(extract("Net", landed_block)), r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land))
"gain_loss_kg": to_float(extract("kg", loss_block)), r["weights"]["tare_kg"]=to_float(extract("Tare",land))
"gain_loss_percent": to_float(extract("Percentage", loss_block)) r["weights"]["net_landed_kg"]=to_float(extract("Net",land))
} r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv))
} r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss))
r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss))
return r
class IntertekParser: class IntertekParser:
lab = "INTERTEK" lab="INTERTEK"
def parse(self,text):
r=empty_weight_report("INTERTEK")
pct=safe_search(r"([0-9.]+)\s*%",text)
def parse(self, text): r["report"]["reference"]=extract("Global Ref",text)
m = re.search(r"([0-9.]+)\s*%", text) r["report"]["file_no"]=extract("Report / File No",text)
percent = m.group(1) if m else None r["report"]["date"]=extract("Dated",text)
return { r["contract"]["contract_no"]=extract("Contract No",text)
"report": { r["contract"]["invoice_no"]=extract("Invoice No",text)
"lab": "INTERTEK", r["contract"]["origin"]=extract("Growth",text)
"reference": extract("Global Ref", text), r["contract"]["commodity"]="Raw Cotton"
"date": extract("Dated", text)
}, r["parties"]["buyer"]=extract("Buyer",text)
"shipment": {
"bales": to_float(extract("Invoice Quantity", text)), r["shipment"]["vessel"]=extract("Vessel",text)
"vessel": extract("Vessel", text), r["shipment"]["bl_no"]=extract("B/L No",text)
"bl": extract("B/L No", text), r["shipment"]["arrival_date"]=extract("Arrival Date",text)
"arrival_date": extract("Arrival Date", text) r["shipment"]["weighing_place"]=extract("Weighed at",text)
}, r["shipment"]["bales"]=to_float(extract("Invoice Quantity",text))
"weights": {
"invoice_kg": to_float(extract("Invoice Weight", text)), r["weights"]["gross_landed_kg"]=to_float(extract("Gross",text))
"landed_kg": to_float(extract("Landed Weight", text)), r["weights"]["tare_kg"]=to_float(extract("Invoice Tare",text))
"gain_loss_kg": to_float(extract("Gain", text)), r["weights"]["net_landed_kg"]=to_float(extract("Landed Weight",text))
"gain_loss_percent": to_float(percent) r["weights"]["invoice_net_kg"]=to_float(extract("Invoice Weight",text))
} r["weights"]["gain_loss_kg"]=to_float(extract("Gain",text))
} r["weights"]["gain_loss_percent"]=to_float(pct)
return r
class RobertsonParser: class RobertsonParser:
lab = "ROBERTSON" lab="ROBERTSON"
def parse(self,text):
r=empty_weight_report("ROBERTSON")
pct=safe_search(r"([0-9.]+)\s*%",text)
def parse(self, text): r["report"]["reference"]=extract("OUR REF",text)
m = re.search(r"([0-9.]+)\s*%", text) r["report"]["date"]=extract("DATE",text)
percent = m.group(1) if m else None
return { r["contract"]["contract_no"]=extract("CONTRACT NO",text)
"report": { r["contract"]["invoice_no"]=extract("INVOICE NO",text)
"lab": "ROBERTSON", r["contract"]["lc_no"]=extract("LIC NO",text)
"reference": extract("RI REF NO.", text), r["contract"]["commodity"]="Raw Cotton"
"date": extract("DATED", text)
}, r["parties"]["seller"]=extract("SELLER",text)
"shipment": { r["parties"]["buyer"]=extract("BUYER",text)
"bales": to_float(extract("QUANTITY", text)),
"vessel": extract("VESSEL", text), r["shipment"]["vessel"]=extract("NAME OF VESSEL",text)
"bl": extract("B/L NO.", text), r["shipment"]["port_loading"]=extract("SAILED FROM",text)
"arrival_date": extract("ARRIVAL DATE", text) r["shipment"]["port_destination"]=extract("ARRIVED AT",text)
}, r["shipment"]["arrival_date"]=extract("DATE OF ARRIVAL",text)
"weights": { r["shipment"]["weighing_place"]=extract("PLACE OF CONTROL",text)
"invoice_kg": to_float(extract("NET INVOICE WEIGHT", text)), r["shipment"]["bales"]=to_float(extract("CONSIGNMENT",text))
"landed_kg": to_float(extract("NET LANDED WEIGHT", text)),
"gain_loss_kg": to_float(extract("LOSS", text)), r["weights"]["gross_landed_kg"]=to_float(extract("GROSS",text))
"gain_loss_percent": to_float(percent) r["weights"]["tare_kg"]=to_float(extract("TARE",text))
} r["weights"]["net_landed_kg"]=to_float(extract("LANDED NET",text))
} r["weights"]["invoice_net_kg"]=to_float(extract("INVOICE NET",text))
r["weights"]["gain_loss_kg"]=to_float(extract("GAIN",text))
r["weights"]["gain_loss_percent"]=to_float(pct)
return r
class SGSParser: class SGSParser:
lab = "SGS" lab="SGS"
def parse(self,text):
r=empty_weight_report("SGS")
r["report"]["reference"]=extract("LANDING REPORT No",text)
r["report"]["file_no"]=extract("FILE NO.",text)
r["report"]["date"]=extract("DATE",text)
def parse(self, text): r["contract"]["contract_no"]=extract("CONTRACT NO.",text)
r["contract"]["invoice_no"]=extract("INVOICE NO.",text)
r["contract"]["origin"]=extract("ORIGIN",text)
r["contract"]["commodity"]=extract("PRODUCT",text)
return { r["parties"]["seller"]=extract("Seller",text)
"report": { r["parties"]["buyer"]=extract("Buyer",text)
"lab": "SGS", r["parties"]["carrier"]=extract("Carrier",text)
"reference": extract("LANDING REPORT No", text),
"file_no": extract("FILE NO.", text), r["shipment"]["bl_no"]=extract("B/L no.",text)
"date": extract("DATE", text) r["shipment"]["port_loading"]=extract("Port of loading",text)
}, r["shipment"]["port_destination"]=extract("Port of destination",text)
"contract": { r["shipment"]["arrival_date"]=extract("Vessel arrival date",text)
"contract_no": extract("CONTRACT NO.", text), r["shipment"]["weighing_place"]=extract("Place of weighing",text)
"invoice_no": extract("INVOICE NO.", text), r["shipment"]["weighing_method"]=extract("Weighing mode",text)
"origin": extract("ORIGIN", text), r["shipment"]["bales"]=to_float(extract("Quantity arrived",text))
"product": extract("PRODUCT", text)
}, r["weights"]["gross_landed_kg"]=to_float(extract("Gross landed",text))
"parties": { r["weights"]["tare_kg"]=to_float(extract("Tare",text))
"seller": extract("Seller", text), r["weights"]["net_landed_kg"]=to_float(extract("Net landed",text))
"buyer": extract("Buyer", text), r["weights"]["invoice_net_kg"]=to_float(extract("Net invoiced",text))
"carrier": extract("Carrier", text) r["weights"]["gain_loss_kg"]=to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs",text))
}, r["weights"]["gain_loss_percent"]=to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%",text))
"shipment": { return r
"bl": extract("B/L no.", text),
"port_loading": extract("Port of loading", text),
"port_destination": extract("Port of destination", text),
"arrival_date": extract("Vessel arrival date", text),
"devanning_date": extract("Container devanning date", text),
"weighing_date": extract("Weighing date", text),
"weighing_mode": extract("Weighing mode", text),
"quantity_bales": to_float(extract("Quantity arrived", text))
},
"weights": {
"gross_landed_kg": to_float(extract("Gross landed", text)),
"tare_kg": to_float(extract("Tare", text)),
"net_landed_kg": to_float(extract("Net landed", text)),
"net_invoiced_kg": to_float(extract("Net invoiced", text)),
"gain_percent": to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%", text)),
"gain_kg": to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs", text))
}
}
class PICLParser: class PICLParser:
lab = "PICL" lab="PICL"
def parse(self,text):
r=empty_weight_report("PICL")
def parse(self, text): r["report"]["reference"]=safe_search(r"No[:\s]+([A-Z0-9\-]+)",text)
r["report"]["date"]=safe_search(r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})",text,group_index=2)
return { r["contract"]["contract_no"]=extract("Contract/Pl No & Date",text)
"report": { r["contract"]["invoice_no"]=extract("Invoice ilo & Date",text)
"lab": "PICL", r["contract"]["lc_no"]=extract("L/C No & Date",text)
"reference": safe_search(r"No[:\s]+([A-Z0-9\-]+)", text), r["contract"]["origin"]=extract("Country of Origin",text)
"date": safe_search(r"Monday,|Tuesday,|Wednesday,|Thursday,|Friday,|Saturday,|Sunday,?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})", text) r["contract"]["commodity"]=extract("Commodity",text)
},
"parties": { r["parties"]["seller"]=extract("FAIRCOT SA",text)
"seller": extract("FAIRCOT SA", text), r["parties"]["buyer"]=extract("M/S.",text)
"buyer": extract("M/S.", text) r["parties"]["carrier"]=extract("Shipping Agent",text)
},
"shipment": { r["shipment"]["vessel"]=extract("Shipped Per Vessel",text)
"bales": to_float(extract("Grand Total", text)), r["shipment"]["bl_no"]=extract("B/L No & Date",text)
"vessel": extract("Shipped Per Vessel", text), r["shipment"]["port_loading"]=extract("Port of Loading",text)
"feeder": extract("Feeder", text), r["shipment"]["port_destination"]=extract("Port of Discharge",text)
"port_loading": extract("Port of Loading", text), r["shipment"]["arrival_date"]=extract("Date of Anival & LDL",text)
"port_discharge": extract("Port of Discharge", text), r["shipment"]["weighing_place"]=extract("Place & Date of Weighment",text)
"arrival_date": extract("Date of Anival & LDL", text), r["shipment"]["weighing_method"]=extract("Method of Weighment",text)
"weighing_place": extract("Place & Date of Weighment", text) r["shipment"]["bales"]=to_float(extract("Grand Total",text))
},
"contract": { r["weights"]["gross_landed_kg"]=to_float(extract("Total;",text))
"contract_no": extract("Contract/Pl No & Date", text), r["weights"]["tare_kg"]=to_float(extract("Tare Weight",text))
"invoice_no": extract("Invoice ilo & Date", text), r["weights"]["net_landed_kg"]=to_float(extract("Grand Total",text))
"bl": extract("B/L No & Date", text), r["weights"]["invoice_net_kg"]=to_float(extract("Invoice weight",text))
"origin": extract("Country of Origin", text), r["weights"]["gain_loss_kg"]=to_float(safe_search(r"(-[0-9.,]+)\s*KGS",text))
"commodity": extract("Commodity", text) r["weights"]["gain_loss_percent"]=to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)",text))
}, return r
"weights": {
"gross_landed_kg": to_float(extract("Total;", text)),
"tare_kg": to_float(extract("Tare Weight", text)),
"net_landed_kg": to_float(extract("Grand Total", text)),
"invoice_weight_kg": to_float(extract("Invoice weight", text)),
"loss_kg": to_float(safe_search(r"(-[0-9.,]+)\s*KGS", text)),
"loss_percent": to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)", text))
}
}
# Configure root logger explicitly # Configure root logger explicitly
root = logging.getLogger() root = logging.getLogger()
@@ -494,19 +489,27 @@ PARSERS = {
"PICL": PICLParser() "PICL": PICLParser()
} }
def parse_report(text): def empty_weight_report(lab):
template = detect_template(text)
logger.info(f"Detected template: {template}")
if template not in PARSERS:
return {
"template": "UNKNOWN",
"raw_text": text[:5000]
}
data = PARSERS[template].parse(text)
return { return {
"template": template, "lab": lab,
"data": data "report": {"reference": None, "file_no": None, "date": None},
"contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None},
"parties": {"seller": None, "buyer": None, "carrier": None},
"shipment": {
"vessel": None, "bl_no": None, "port_loading": None,
"port_destination": None, "arrival_date": None,
"weighing_place": None, "weighing_method": None,
"bales": None
},
"weights": {
"gross_landed_kg": None, "tare_kg": None,
"net_landed_kg": None, "invoice_net_kg": None,
"gain_loss_kg": None, "gain_loss_percent": None
}
} }
def parse_report(text):
template=detect_template(text)
if template not in PARSERS:
return {"template":"UNKNOWN"}
return PARSERS[template].parse(text)