10.01.26
This commit is contained in:
323
app.py
323
app.py
@@ -24,174 +24,169 @@ file_handler.setFormatter(logging.Formatter(
|
||||
))
|
||||
|
||||
class AHKParser:
|
||||
lab = "AHK"
|
||||
lab="AHK"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("AHK")
|
||||
inv=section(text,"INVOICE WEIGHTS","Bales Weighed")
|
||||
land=section(text,"Bales Weighed","Outturn")
|
||||
loss=section(text,"LOSS","Invoice average")
|
||||
|
||||
def parse(self, text):
|
||||
invoice_block = section(text, "INVOICE WEIGHTS", "Bales Weighed")
|
||||
landed_block = section(text, "Bales Weighed", "Outturn")
|
||||
loss_block = section(text, "LOSS", "Invoice average")
|
||||
r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text)
|
||||
r["report"]["date"]=extract("Produced On",text)
|
||||
|
||||
return {
|
||||
"report": {
|
||||
"lab": "AHK",
|
||||
"reference": safe_search(r"(AHK\s*/\S+)", text, default=None, context="AHK reference"),
|
||||
"date": extract("Produced On", text)
|
||||
},
|
||||
"shipment": {
|
||||
"bales": to_float(extract("Total Bales", text)),
|
||||
"vessel": extract("Vessel", text),
|
||||
"bl": extract("B/L No", text),
|
||||
"arrival_date": extract("Arrival Date", text)
|
||||
},
|
||||
"weights": {
|
||||
"invoice_kg": to_float(extract("Net", invoice_block)),
|
||||
"landed_kg": to_float(extract("Net", landed_block)),
|
||||
"gain_loss_kg": to_float(extract("kg", loss_block)),
|
||||
"gain_loss_percent": to_float(extract("Percentage", loss_block))
|
||||
}
|
||||
}
|
||||
r["contract"]["invoice_no"]=extract("Client Reference",text)
|
||||
r["contract"]["origin"]=extract("Growth",text)
|
||||
r["contract"]["commodity"]="Raw Cotton"
|
||||
|
||||
r["parties"]["seller"]=extract("Client",text)
|
||||
r["parties"]["buyer"]=extract("Buyer",text)
|
||||
|
||||
r["shipment"]["vessel"]=extract("Vessel",text)
|
||||
r["shipment"]["bl_no"]=extract("B/L No",text)
|
||||
r["shipment"]["port_destination"]=extract("Destination",text)
|
||||
r["shipment"]["arrival_date"]=extract("Arrival Date",text)
|
||||
r["shipment"]["weighing_method"]=extract("Weighing method",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Total Bales",text))
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Tare",land))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Net",land))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv))
|
||||
r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss))
|
||||
r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss))
|
||||
return r
|
||||
|
||||
class IntertekParser:
|
||||
lab = "INTERTEK"
|
||||
lab="INTERTEK"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("INTERTEK")
|
||||
pct=safe_search(r"([0-9.]+)\s*%",text)
|
||||
|
||||
def parse(self, text):
|
||||
m = re.search(r"([0-9.]+)\s*%", text)
|
||||
percent = m.group(1) if m else None
|
||||
r["report"]["reference"]=extract("Global Ref",text)
|
||||
r["report"]["file_no"]=extract("Report / File No",text)
|
||||
r["report"]["date"]=extract("Dated",text)
|
||||
|
||||
return {
|
||||
"report": {
|
||||
"lab": "INTERTEK",
|
||||
"reference": extract("Global Ref", text),
|
||||
"date": extract("Dated", text)
|
||||
},
|
||||
"shipment": {
|
||||
"bales": to_float(extract("Invoice Quantity", text)),
|
||||
"vessel": extract("Vessel", text),
|
||||
"bl": extract("B/L No", text),
|
||||
"arrival_date": extract("Arrival Date", text)
|
||||
},
|
||||
"weights": {
|
||||
"invoice_kg": to_float(extract("Invoice Weight", text)),
|
||||
"landed_kg": to_float(extract("Landed Weight", text)),
|
||||
"gain_loss_kg": to_float(extract("Gain", text)),
|
||||
"gain_loss_percent": to_float(percent)
|
||||
}
|
||||
}
|
||||
r["contract"]["contract_no"]=extract("Contract No",text)
|
||||
r["contract"]["invoice_no"]=extract("Invoice No",text)
|
||||
r["contract"]["origin"]=extract("Growth",text)
|
||||
r["contract"]["commodity"]="Raw Cotton"
|
||||
|
||||
r["parties"]["buyer"]=extract("Buyer",text)
|
||||
|
||||
r["shipment"]["vessel"]=extract("Vessel",text)
|
||||
r["shipment"]["bl_no"]=extract("B/L No",text)
|
||||
r["shipment"]["arrival_date"]=extract("Arrival Date",text)
|
||||
r["shipment"]["weighing_place"]=extract("Weighed at",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Invoice Quantity",text))
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Gross",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Invoice Tare",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Landed Weight",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Invoice Weight",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(extract("Gain",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(pct)
|
||||
return r
|
||||
|
||||
class RobertsonParser:
|
||||
lab = "ROBERTSON"
|
||||
lab="ROBERTSON"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("ROBERTSON")
|
||||
pct=safe_search(r"([0-9.]+)\s*%",text)
|
||||
|
||||
def parse(self, text):
|
||||
m = re.search(r"([0-9.]+)\s*%", text)
|
||||
percent = m.group(1) if m else None
|
||||
r["report"]["reference"]=extract("OUR REF",text)
|
||||
r["report"]["date"]=extract("DATE",text)
|
||||
|
||||
return {
|
||||
"report": {
|
||||
"lab": "ROBERTSON",
|
||||
"reference": extract("RI REF NO.", text),
|
||||
"date": extract("DATED", text)
|
||||
},
|
||||
"shipment": {
|
||||
"bales": to_float(extract("QUANTITY", text)),
|
||||
"vessel": extract("VESSEL", text),
|
||||
"bl": extract("B/L NO.", text),
|
||||
"arrival_date": extract("ARRIVAL DATE", text)
|
||||
},
|
||||
"weights": {
|
||||
"invoice_kg": to_float(extract("NET INVOICE WEIGHT", text)),
|
||||
"landed_kg": to_float(extract("NET LANDED WEIGHT", text)),
|
||||
"gain_loss_kg": to_float(extract("LOSS", text)),
|
||||
"gain_loss_percent": to_float(percent)
|
||||
}
|
||||
}
|
||||
r["contract"]["contract_no"]=extract("CONTRACT NO",text)
|
||||
r["contract"]["invoice_no"]=extract("INVOICE NO",text)
|
||||
r["contract"]["lc_no"]=extract("LIC NO",text)
|
||||
r["contract"]["commodity"]="Raw Cotton"
|
||||
|
||||
r["parties"]["seller"]=extract("SELLER",text)
|
||||
r["parties"]["buyer"]=extract("BUYER",text)
|
||||
|
||||
r["shipment"]["vessel"]=extract("NAME OF VESSEL",text)
|
||||
r["shipment"]["port_loading"]=extract("SAILED FROM",text)
|
||||
r["shipment"]["port_destination"]=extract("ARRIVED AT",text)
|
||||
r["shipment"]["arrival_date"]=extract("DATE OF ARRIVAL",text)
|
||||
r["shipment"]["weighing_place"]=extract("PLACE OF CONTROL",text)
|
||||
r["shipment"]["bales"]=to_float(extract("CONSIGNMENT",text))
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("GROSS",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("TARE",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("LANDED NET",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("INVOICE NET",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(extract("GAIN",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(pct)
|
||||
return r
|
||||
|
||||
class SGSParser:
|
||||
lab = "SGS"
|
||||
lab="SGS"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("SGS")
|
||||
r["report"]["reference"]=extract("LANDING REPORT No",text)
|
||||
r["report"]["file_no"]=extract("FILE NO.",text)
|
||||
r["report"]["date"]=extract("DATE",text)
|
||||
|
||||
def parse(self, text):
|
||||
r["contract"]["contract_no"]=extract("CONTRACT NO.",text)
|
||||
r["contract"]["invoice_no"]=extract("INVOICE NO.",text)
|
||||
r["contract"]["origin"]=extract("ORIGIN",text)
|
||||
r["contract"]["commodity"]=extract("PRODUCT",text)
|
||||
|
||||
return {
|
||||
"report": {
|
||||
"lab": "SGS",
|
||||
"reference": extract("LANDING REPORT No", text),
|
||||
"file_no": extract("FILE NO.", text),
|
||||
"date": extract("DATE", text)
|
||||
},
|
||||
"contract": {
|
||||
"contract_no": extract("CONTRACT NO.", text),
|
||||
"invoice_no": extract("INVOICE NO.", text),
|
||||
"origin": extract("ORIGIN", text),
|
||||
"product": extract("PRODUCT", text)
|
||||
},
|
||||
"parties": {
|
||||
"seller": extract("Seller", text),
|
||||
"buyer": extract("Buyer", text),
|
||||
"carrier": extract("Carrier", text)
|
||||
},
|
||||
"shipment": {
|
||||
"bl": extract("B/L no.", text),
|
||||
"port_loading": extract("Port of loading", text),
|
||||
"port_destination": extract("Port of destination", text),
|
||||
"arrival_date": extract("Vessel arrival date", text),
|
||||
"devanning_date": extract("Container devanning date", text),
|
||||
"weighing_date": extract("Weighing date", text),
|
||||
"weighing_mode": extract("Weighing mode", text),
|
||||
"quantity_bales": to_float(extract("Quantity arrived", text))
|
||||
},
|
||||
"weights": {
|
||||
"gross_landed_kg": to_float(extract("Gross landed", text)),
|
||||
"tare_kg": to_float(extract("Tare", text)),
|
||||
"net_landed_kg": to_float(extract("Net landed", text)),
|
||||
"net_invoiced_kg": to_float(extract("Net invoiced", text)),
|
||||
"gain_percent": to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%", text)),
|
||||
"gain_kg": to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs", text))
|
||||
}
|
||||
}
|
||||
r["parties"]["seller"]=extract("Seller",text)
|
||||
r["parties"]["buyer"]=extract("Buyer",text)
|
||||
r["parties"]["carrier"]=extract("Carrier",text)
|
||||
|
||||
r["shipment"]["bl_no"]=extract("B/L no.",text)
|
||||
r["shipment"]["port_loading"]=extract("Port of loading",text)
|
||||
r["shipment"]["port_destination"]=extract("Port of destination",text)
|
||||
r["shipment"]["arrival_date"]=extract("Vessel arrival date",text)
|
||||
r["shipment"]["weighing_place"]=extract("Place of weighing",text)
|
||||
r["shipment"]["weighing_method"]=extract("Weighing mode",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Quantity arrived",text))
|
||||
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Gross landed",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Tare",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Net landed",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Net invoiced",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%",text))
|
||||
return r
|
||||
|
||||
class PICLParser:
|
||||
lab = "PICL"
|
||||
lab="PICL"
|
||||
def parse(self,text):
|
||||
r=empty_weight_report("PICL")
|
||||
|
||||
def parse(self, text):
|
||||
r["report"]["reference"]=safe_search(r"No[:\s]+([A-Z0-9\-]+)",text)
|
||||
r["report"]["date"]=safe_search(r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})",text,group_index=2)
|
||||
|
||||
return {
|
||||
"report": {
|
||||
"lab": "PICL",
|
||||
"reference": safe_search(r"No[:\s]+([A-Z0-9\-]+)", text),
|
||||
"date": safe_search(r"Monday,|Tuesday,|Wednesday,|Thursday,|Friday,|Saturday,|Sunday,?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})", text)
|
||||
},
|
||||
r["contract"]["contract_no"]=extract("Contract/Pl No & Date",text)
|
||||
r["contract"]["invoice_no"]=extract("Invoice ilo & Date",text)
|
||||
r["contract"]["lc_no"]=extract("L/C No & Date",text)
|
||||
r["contract"]["origin"]=extract("Country of Origin",text)
|
||||
r["contract"]["commodity"]=extract("Commodity",text)
|
||||
|
||||
"parties": {
|
||||
"seller": extract("FAIRCOT SA", text),
|
||||
"buyer": extract("M/S.", text)
|
||||
},
|
||||
r["parties"]["seller"]=extract("FAIRCOT SA",text)
|
||||
r["parties"]["buyer"]=extract("M/S.",text)
|
||||
r["parties"]["carrier"]=extract("Shipping Agent",text)
|
||||
|
||||
"shipment": {
|
||||
"bales": to_float(extract("Grand Total", text)),
|
||||
"vessel": extract("Shipped Per Vessel", text),
|
||||
"feeder": extract("Feeder", text),
|
||||
"port_loading": extract("Port of Loading", text),
|
||||
"port_discharge": extract("Port of Discharge", text),
|
||||
"arrival_date": extract("Date of Anival & LDL", text),
|
||||
"weighing_place": extract("Place & Date of Weighment", text)
|
||||
},
|
||||
r["shipment"]["vessel"]=extract("Shipped Per Vessel",text)
|
||||
r["shipment"]["bl_no"]=extract("B/L No & Date",text)
|
||||
r["shipment"]["port_loading"]=extract("Port of Loading",text)
|
||||
r["shipment"]["port_destination"]=extract("Port of Discharge",text)
|
||||
r["shipment"]["arrival_date"]=extract("Date of Anival & LDL",text)
|
||||
r["shipment"]["weighing_place"]=extract("Place & Date of Weighment",text)
|
||||
r["shipment"]["weighing_method"]=extract("Method of Weighment",text)
|
||||
r["shipment"]["bales"]=to_float(extract("Grand Total",text))
|
||||
|
||||
"contract": {
|
||||
"contract_no": extract("Contract/Pl No & Date", text),
|
||||
"invoice_no": extract("Invoice ilo & Date", text),
|
||||
"bl": extract("B/L No & Date", text),
|
||||
"origin": extract("Country of Origin", text),
|
||||
"commodity": extract("Commodity", text)
|
||||
},
|
||||
r["weights"]["gross_landed_kg"]=to_float(extract("Total;",text))
|
||||
r["weights"]["tare_kg"]=to_float(extract("Tare Weight",text))
|
||||
r["weights"]["net_landed_kg"]=to_float(extract("Grand Total",text))
|
||||
r["weights"]["invoice_net_kg"]=to_float(extract("Invoice weight",text))
|
||||
r["weights"]["gain_loss_kg"]=to_float(safe_search(r"(-[0-9.,]+)\s*KGS",text))
|
||||
r["weights"]["gain_loss_percent"]=to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)",text))
|
||||
return r
|
||||
|
||||
"weights": {
|
||||
"gross_landed_kg": to_float(extract("Total;", text)),
|
||||
"tare_kg": to_float(extract("Tare Weight", text)),
|
||||
"net_landed_kg": to_float(extract("Grand Total", text)),
|
||||
"invoice_weight_kg": to_float(extract("Invoice weight", text)),
|
||||
"loss_kg": to_float(safe_search(r"(-[0-9.,]+)\s*KGS", text)),
|
||||
"loss_percent": to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)", text))
|
||||
}
|
||||
}
|
||||
|
||||
# Configure root logger explicitly
|
||||
root = logging.getLogger()
|
||||
@@ -494,19 +489,27 @@ PARSERS = {
|
||||
"PICL": PICLParser()
|
||||
}
|
||||
|
||||
def empty_weight_report(lab):
|
||||
return {
|
||||
"lab": lab,
|
||||
"report": {"reference": None, "file_no": None, "date": None},
|
||||
"contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None},
|
||||
"parties": {"seller": None, "buyer": None, "carrier": None},
|
||||
"shipment": {
|
||||
"vessel": None, "bl_no": None, "port_loading": None,
|
||||
"port_destination": None, "arrival_date": None,
|
||||
"weighing_place": None, "weighing_method": None,
|
||||
"bales": None
|
||||
},
|
||||
"weights": {
|
||||
"gross_landed_kg": None, "tare_kg": None,
|
||||
"net_landed_kg": None, "invoice_net_kg": None,
|
||||
"gain_loss_kg": None, "gain_loss_percent": None
|
||||
}
|
||||
}
|
||||
|
||||
def parse_report(text):
|
||||
template = detect_template(text)
|
||||
logger.info(f"Detected template: {template}")
|
||||
|
||||
template=detect_template(text)
|
||||
if template not in PARSERS:
|
||||
return {
|
||||
"template": "UNKNOWN",
|
||||
"raw_text": text[:5000]
|
||||
}
|
||||
|
||||
data = PARSERS[template].parse(text)
|
||||
|
||||
return {
|
||||
"template": template,
|
||||
"data": data
|
||||
}
|
||||
return {"template":"UNKNOWN"}
|
||||
return PARSERS[template].parse(text)
|
||||
Reference in New Issue
Block a user