diff --git a/app.py b/app.py index 8cba8d7..f20f966 100644 --- a/app.py +++ b/app.py @@ -24,174 +24,169 @@ file_handler.setFormatter(logging.Formatter( )) class AHKParser: - lab = "AHK" + lab="AHK" + def parse(self,text): + r=empty_weight_report("AHK") + inv=section(text,"INVOICE WEIGHTS","Bales Weighed") + land=section(text,"Bales Weighed","Outturn") + loss=section(text,"LOSS","Invoice average") - def parse(self, text): - invoice_block = section(text, "INVOICE WEIGHTS", "Bales Weighed") - landed_block = section(text, "Bales Weighed", "Outturn") - loss_block = section(text, "LOSS", "Invoice average") + r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text) + r["report"]["date"]=extract("Produced On",text) - return { - "report": { - "lab": "AHK", - "reference": safe_search(r"(AHK\s*/\S+)", text, default=None, context="AHK reference"), - "date": extract("Produced On", text) - }, - "shipment": { - "bales": to_float(extract("Total Bales", text)), - "vessel": extract("Vessel", text), - "bl": extract("B/L No", text), - "arrival_date": extract("Arrival Date", text) - }, - "weights": { - "invoice_kg": to_float(extract("Net", invoice_block)), - "landed_kg": to_float(extract("Net", landed_block)), - "gain_loss_kg": to_float(extract("kg", loss_block)), - "gain_loss_percent": to_float(extract("Percentage", loss_block)) - } - } + r["contract"]["invoice_no"]=extract("Client Reference",text) + r["contract"]["origin"]=extract("Growth",text) + r["contract"]["commodity"]="Raw Cotton" + + r["parties"]["seller"]=extract("Client",text) + r["parties"]["buyer"]=extract("Buyer",text) + + r["shipment"]["vessel"]=extract("Vessel",text) + r["shipment"]["bl_no"]=extract("B/L No",text) + r["shipment"]["port_destination"]=extract("Destination",text) + r["shipment"]["arrival_date"]=extract("Arrival Date",text) + r["shipment"]["weighing_method"]=extract("Weighing method",text) + r["shipment"]["bales"]=to_float(extract("Total Bales",text)) + + r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land)) + r["weights"]["tare_kg"]=to_float(extract("Tare",land)) + r["weights"]["net_landed_kg"]=to_float(extract("Net",land)) + r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv)) + r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss)) + r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss)) + return r class IntertekParser: - lab = "INTERTEK" + lab="INTERTEK" + def parse(self,text): + r=empty_weight_report("INTERTEK") + pct=safe_search(r"([0-9.]+)\s*%",text) - def parse(self, text): - m = re.search(r"([0-9.]+)\s*%", text) - percent = m.group(1) if m else None + r["report"]["reference"]=extract("Global Ref",text) + r["report"]["file_no"]=extract("Report / File No",text) + r["report"]["date"]=extract("Dated",text) - return { - "report": { - "lab": "INTERTEK", - "reference": extract("Global Ref", text), - "date": extract("Dated", text) - }, - "shipment": { - "bales": to_float(extract("Invoice Quantity", text)), - "vessel": extract("Vessel", text), - "bl": extract("B/L No", text), - "arrival_date": extract("Arrival Date", text) - }, - "weights": { - "invoice_kg": to_float(extract("Invoice Weight", text)), - "landed_kg": to_float(extract("Landed Weight", text)), - "gain_loss_kg": to_float(extract("Gain", text)), - "gain_loss_percent": to_float(percent) - } - } + r["contract"]["contract_no"]=extract("Contract No",text) + r["contract"]["invoice_no"]=extract("Invoice No",text) + r["contract"]["origin"]=extract("Growth",text) + r["contract"]["commodity"]="Raw Cotton" + + r["parties"]["buyer"]=extract("Buyer",text) + + r["shipment"]["vessel"]=extract("Vessel",text) + r["shipment"]["bl_no"]=extract("B/L No",text) + r["shipment"]["arrival_date"]=extract("Arrival Date",text) + r["shipment"]["weighing_place"]=extract("Weighed at",text) + r["shipment"]["bales"]=to_float(extract("Invoice Quantity",text)) + + r["weights"]["gross_landed_kg"]=to_float(extract("Gross",text)) + r["weights"]["tare_kg"]=to_float(extract("Invoice Tare",text)) + r["weights"]["net_landed_kg"]=to_float(extract("Landed Weight",text)) + r["weights"]["invoice_net_kg"]=to_float(extract("Invoice Weight",text)) + r["weights"]["gain_loss_kg"]=to_float(extract("Gain",text)) + r["weights"]["gain_loss_percent"]=to_float(pct) + return r class RobertsonParser: - lab = "ROBERTSON" + lab="ROBERTSON" + def parse(self,text): + r=empty_weight_report("ROBERTSON") + pct=safe_search(r"([0-9.]+)\s*%",text) - def parse(self, text): - m = re.search(r"([0-9.]+)\s*%", text) - percent = m.group(1) if m else None + r["report"]["reference"]=extract("OUR REF",text) + r["report"]["date"]=extract("DATE",text) - return { - "report": { - "lab": "ROBERTSON", - "reference": extract("RI REF NO.", text), - "date": extract("DATED", text) - }, - "shipment": { - "bales": to_float(extract("QUANTITY", text)), - "vessel": extract("VESSEL", text), - "bl": extract("B/L NO.", text), - "arrival_date": extract("ARRIVAL DATE", text) - }, - "weights": { - "invoice_kg": to_float(extract("NET INVOICE WEIGHT", text)), - "landed_kg": to_float(extract("NET LANDED WEIGHT", text)), - "gain_loss_kg": to_float(extract("LOSS", text)), - "gain_loss_percent": to_float(percent) - } - } + r["contract"]["contract_no"]=extract("CONTRACT NO",text) + r["contract"]["invoice_no"]=extract("INVOICE NO",text) + r["contract"]["lc_no"]=extract("LIC NO",text) + r["contract"]["commodity"]="Raw Cotton" + + r["parties"]["seller"]=extract("SELLER",text) + r["parties"]["buyer"]=extract("BUYER",text) + + r["shipment"]["vessel"]=extract("NAME OF VESSEL",text) + r["shipment"]["port_loading"]=extract("SAILED FROM",text) + r["shipment"]["port_destination"]=extract("ARRIVED AT",text) + r["shipment"]["arrival_date"]=extract("DATE OF ARRIVAL",text) + r["shipment"]["weighing_place"]=extract("PLACE OF CONTROL",text) + r["shipment"]["bales"]=to_float(extract("CONSIGNMENT",text)) + + r["weights"]["gross_landed_kg"]=to_float(extract("GROSS",text)) + r["weights"]["tare_kg"]=to_float(extract("TARE",text)) + r["weights"]["net_landed_kg"]=to_float(extract("LANDED NET",text)) + r["weights"]["invoice_net_kg"]=to_float(extract("INVOICE NET",text)) + r["weights"]["gain_loss_kg"]=to_float(extract("GAIN",text)) + r["weights"]["gain_loss_percent"]=to_float(pct) + return r class SGSParser: - lab = "SGS" + lab="SGS" + def parse(self,text): + r=empty_weight_report("SGS") + r["report"]["reference"]=extract("LANDING REPORT No",text) + r["report"]["file_no"]=extract("FILE NO.",text) + r["report"]["date"]=extract("DATE",text) - def parse(self, text): + r["contract"]["contract_no"]=extract("CONTRACT NO.",text) + r["contract"]["invoice_no"]=extract("INVOICE NO.",text) + r["contract"]["origin"]=extract("ORIGIN",text) + r["contract"]["commodity"]=extract("PRODUCT",text) - return { - "report": { - "lab": "SGS", - "reference": extract("LANDING REPORT No", text), - "file_no": extract("FILE NO.", text), - "date": extract("DATE", text) - }, - "contract": { - "contract_no": extract("CONTRACT NO.", text), - "invoice_no": extract("INVOICE NO.", text), - "origin": extract("ORIGIN", text), - "product": extract("PRODUCT", text) - }, - "parties": { - "seller": extract("Seller", text), - "buyer": extract("Buyer", text), - "carrier": extract("Carrier", text) - }, - "shipment": { - "bl": extract("B/L no.", text), - "port_loading": extract("Port of loading", text), - "port_destination": extract("Port of destination", text), - "arrival_date": extract("Vessel arrival date", text), - "devanning_date": extract("Container devanning date", text), - "weighing_date": extract("Weighing date", text), - "weighing_mode": extract("Weighing mode", text), - "quantity_bales": to_float(extract("Quantity arrived", text)) - }, - "weights": { - "gross_landed_kg": to_float(extract("Gross landed", text)), - "tare_kg": to_float(extract("Tare", text)), - "net_landed_kg": to_float(extract("Net landed", text)), - "net_invoiced_kg": to_float(extract("Net invoiced", text)), - "gain_percent": to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%", text)), - "gain_kg": to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs", text)) - } - } + r["parties"]["seller"]=extract("Seller",text) + r["parties"]["buyer"]=extract("Buyer",text) + r["parties"]["carrier"]=extract("Carrier",text) + + r["shipment"]["bl_no"]=extract("B/L no.",text) + r["shipment"]["port_loading"]=extract("Port of loading",text) + r["shipment"]["port_destination"]=extract("Port of destination",text) + r["shipment"]["arrival_date"]=extract("Vessel arrival date",text) + r["shipment"]["weighing_place"]=extract("Place of weighing",text) + r["shipment"]["weighing_method"]=extract("Weighing mode",text) + r["shipment"]["bales"]=to_float(extract("Quantity arrived",text)) + + r["weights"]["gross_landed_kg"]=to_float(extract("Gross landed",text)) + r["weights"]["tare_kg"]=to_float(extract("Tare",text)) + r["weights"]["net_landed_kg"]=to_float(extract("Net landed",text)) + r["weights"]["invoice_net_kg"]=to_float(extract("Net invoiced",text)) + r["weights"]["gain_loss_kg"]=to_float(safe_search(r"Gain.*?([0-9.,]+)\s*kgs",text)) + r["weights"]["gain_loss_percent"]=to_float(safe_search(r"Gain\s*\+?\s*([0-9.,]+)\s*%",text)) + return r class PICLParser: - lab = "PICL" + lab="PICL" + def parse(self,text): + r=empty_weight_report("PICL") - def parse(self, text): + r["report"]["reference"]=safe_search(r"No[:\s]+([A-Z0-9\-]+)",text) + r["report"]["date"]=safe_search(r"(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})",text,group_index=2) - return { - "report": { - "lab": "PICL", - "reference": safe_search(r"No[:\s]+([A-Z0-9\-]+)", text), - "date": safe_search(r"Monday,|Tuesday,|Wednesday,|Thursday,|Friday,|Saturday,|Sunday,?\s*([A-Za-z]+\s+[0-9]{1,2},\s*[0-9]{4})", text) - }, + r["contract"]["contract_no"]=extract("Contract/Pl No & Date",text) + r["contract"]["invoice_no"]=extract("Invoice ilo & Date",text) + r["contract"]["lc_no"]=extract("L/C No & Date",text) + r["contract"]["origin"]=extract("Country of Origin",text) + r["contract"]["commodity"]=extract("Commodity",text) - "parties": { - "seller": extract("FAIRCOT SA", text), - "buyer": extract("M/S.", text) - }, + r["parties"]["seller"]=extract("FAIRCOT SA",text) + r["parties"]["buyer"]=extract("M/S.",text) + r["parties"]["carrier"]=extract("Shipping Agent",text) - "shipment": { - "bales": to_float(extract("Grand Total", text)), - "vessel": extract("Shipped Per Vessel", text), - "feeder": extract("Feeder", text), - "port_loading": extract("Port of Loading", text), - "port_discharge": extract("Port of Discharge", text), - "arrival_date": extract("Date of Anival & LDL", text), - "weighing_place": extract("Place & Date of Weighment", text) - }, + r["shipment"]["vessel"]=extract("Shipped Per Vessel",text) + r["shipment"]["bl_no"]=extract("B/L No & Date",text) + r["shipment"]["port_loading"]=extract("Port of Loading",text) + r["shipment"]["port_destination"]=extract("Port of Discharge",text) + r["shipment"]["arrival_date"]=extract("Date of Anival & LDL",text) + r["shipment"]["weighing_place"]=extract("Place & Date of Weighment",text) + r["shipment"]["weighing_method"]=extract("Method of Weighment",text) + r["shipment"]["bales"]=to_float(extract("Grand Total",text)) - "contract": { - "contract_no": extract("Contract/Pl No & Date", text), - "invoice_no": extract("Invoice ilo & Date", text), - "bl": extract("B/L No & Date", text), - "origin": extract("Country of Origin", text), - "commodity": extract("Commodity", text) - }, + r["weights"]["gross_landed_kg"]=to_float(extract("Total;",text)) + r["weights"]["tare_kg"]=to_float(extract("Tare Weight",text)) + r["weights"]["net_landed_kg"]=to_float(extract("Grand Total",text)) + r["weights"]["invoice_net_kg"]=to_float(extract("Invoice weight",text)) + r["weights"]["gain_loss_kg"]=to_float(safe_search(r"(-[0-9.,]+)\s*KGS",text)) + r["weights"]["gain_loss_percent"]=to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)",text)) + return r - "weights": { - "gross_landed_kg": to_float(extract("Total;", text)), - "tare_kg": to_float(extract("Tare Weight", text)), - "net_landed_kg": to_float(extract("Grand Total", text)), - "invoice_weight_kg": to_float(extract("Invoice weight", text)), - "loss_kg": to_float(safe_search(r"(-[0-9.,]+)\s*KGS", text)), - "loss_percent": to_float(safe_search(r"\(\s*([0-9.,]+)\s*o/o\s*\)", text)) - } - } # Configure root logger explicitly root = logging.getLogger() @@ -494,19 +489,27 @@ PARSERS = { "PICL": PICLParser() } -def parse_report(text): - template = detect_template(text) - logger.info(f"Detected template: {template}") - - if template not in PARSERS: - return { - "template": "UNKNOWN", - "raw_text": text[:5000] - } - - data = PARSERS[template].parse(text) - +def empty_weight_report(lab): return { - "template": template, - "data": data + "lab": lab, + "report": {"reference": None, "file_no": None, "date": None}, + "contract": {"contract_no": None, "invoice_no": None, "lc_no": None, "origin": None, "commodity": None}, + "parties": {"seller": None, "buyer": None, "carrier": None}, + "shipment": { + "vessel": None, "bl_no": None, "port_loading": None, + "port_destination": None, "arrival_date": None, + "weighing_place": None, "weighing_method": None, + "bales": None + }, + "weights": { + "gross_landed_kg": None, "tare_kg": None, + "net_landed_kg": None, "invoice_net_kg": None, + "gain_loss_kg": None, "gain_loss_percent": None + } } + +def parse_report(text): + template=detect_template(text) + if template not in PARSERS: + return {"template":"UNKNOWN"} + return PARSERS[template].parse(text) \ No newline at end of file