This commit is contained in:
2026-01-11 16:29:29 +01:00
parent 13c599ac0b
commit ba853074d0

121
app.py
View File

@@ -23,37 +23,108 @@ file_handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
))
# class AHKParser:
# lab="AHK"
# def parse(self,text):
# r=empty_weight_report("AHK")
# inv=section(text,"INVOICE WEIGHTS","Bales Weighed")
# land=section(text,"Bales Weighed","Outturn")
# loss=section(text,"LOSS","Invoice average")
# r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text)
# r["report"]["date"]=extract("Produced On",text)
# r["contract"]["invoice_no"]=extract("Client Reference",text)
# r["contract"]["origin"]=extract("Growth",text)
# r["contract"]["commodity"]="Raw Cotton"
# r["parties"]["seller"]=extract("Client",text)
# r["parties"]["buyer"]=extract("Buyer",text)
# r["shipment"]["vessel"]=extract("Vessel",text)
# r["shipment"]["bl_no"]=extract("B/L No",text)
# r["shipment"]["port_destination"]=extract("Destination",text)
# r["shipment"]["arrival_date"]=extract("Arrival Date",text)
# r["shipment"]["weighing_method"]=extract("Weighing method",text)
# r["shipment"]["bales"]=to_float(extract("Total Bales",text))
# r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land))
# r["weights"]["tare_kg"]=to_float(extract("Tare",land))
# r["weights"]["net_landed_kg"]=to_float(extract("Net",land))
# r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv))
# r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss))
# r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss))
# return r
class AHKParser:
lab="AHK"
def parse(self,text):
r=empty_weight_report("AHK")
inv=section(text,"INVOICE WEIGHTS","Bales Weighed")
land=section(text,"Bales Weighed","Outturn")
loss=section(text,"LOSS","Invoice average")
lab = "AHK"
r["report"]["reference"]=safe_search(r"(AHK\s*/\S+)",text)
r["report"]["date"]=extract("Produced On",text)
# ---------- helpers ----------
def clean(self, t):
return " ".join(t.replace("\xa0", " ").split())
r["contract"]["invoice_no"]=extract("Client Reference",text)
r["contract"]["origin"]=extract("Growth",text)
r["contract"]["commodity"]="Raw Cotton"
def find(self, pattern, text):
m = re.search(pattern, text, re.I)
return self.clean(m.group(1)) if m else None
r["parties"]["seller"]=extract("Client",text)
r["parties"]["buyer"]=extract("Buyer",text)
def block(self, text, labels):
lines = [self.clean(l) for l in text.splitlines() if self.clean(l)]
idx = [i for i,l in enumerate(lines) if l in labels]
values = []
r["shipment"]["vessel"]=extract("Vessel",text)
r["shipment"]["bl_no"]=extract("B/L No",text)
r["shipment"]["port_destination"]=extract("Destination",text)
r["shipment"]["arrival_date"]=extract("Arrival Date",text)
r["shipment"]["weighing_method"]=extract("Weighing method",text)
r["shipment"]["bales"]=to_float(extract("Total Bales",text))
for i in range(len(idx)):
start = idx[i]
end = idx[i+1] if i+1 < len(idx) else len(lines)
for j in range(start, end):
if lines[j].startswith(":"):
values.append(lines[j].lstrip(":").strip())
break
return dict(zip(labels, values))
# ---------- parser ----------
def parse(self, text):
r = empty_weight_report("AHK")
text = self.clean(text)
# ---------- report ----------
r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text)
r["report"]["date"] = self.find(r"Produced On\s*([0-9A-Za-z ]+)", text)
# ---------- contract ----------
r["contract"]["invoice_no"] = self.find(r"Client Reference:\s*([A-Z0-9\- /]+)", text)
r["contract"]["commodity"] = "Raw Cotton"
# ---------- parties ----------
r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text)
r["parties"]["seller"] = self.find(r"Client\s*Ref No\.\s*:\s*(.+)", text)
# ---------- shipment block ----------
ship = self.block(text, [
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination",
"Growth","Arrival Date","First date of weighing","Last Date of Weighing",
"Weighing method","Tare"
])
r["shipment"]["bales"] = to_float(ship.get("Total Bales"))
r["shipment"]["vessel"] = ship.get("Vessel")
r["shipment"]["bl_no"] = ship.get("B/L No.")
r["shipment"]["port_destination"] = ship.get("Destination")
r["shipment"]["arrival_date"] = ship.get("Arrival Date")
r["shipment"]["weighing_method"] = ship.get("Weighing method")
r["contract"]["origin"] = ship.get("Growth")
# ---------- weights ----------
inv = self.block(text, ["Bales","Gross","Tare","Net"])
land = self.block(section(text,"Bales Weighed","Outturn"),["Bales","Gross","Tare","Net"])
loss = section(text,"LOSS","Invoice average")
r["weights"]["gross_landed_kg"] = to_float(land.get("Gross"))
r["weights"]["tare_kg"] = to_float(land.get("Tare"))
r["weights"]["net_landed_kg"] = to_float(land.get("Net"))
r["weights"]["invoice_net_kg"] = to_float(inv.get("Net"))
r["weights"]["gain_loss_kg"] = to_float(self.find(r"(-?\d+\.?\d*)\s*kg", loss))
r["weights"]["gain_loss_percent"] = to_float(self.find(r"Percentage\s*:\s*(-?\d+\.?\d*)", loss))
r["weights"]["gross_landed_kg"]=to_float(extract("Gross",land))
r["weights"]["tare_kg"]=to_float(extract("Tare",land))
r["weights"]["net_landed_kg"]=to_float(extract("Net",land))
r["weights"]["invoice_net_kg"]=to_float(extract("Net",inv))
r["weights"]["gain_loss_kg"]=to_float(extract("kg",loss))
r["weights"]["gain_loss_percent"]=to_float(extract("Percentage",loss))
return r
class IntertekParser: