11.01.26
This commit is contained in:
25
app.py
25
app.py
@@ -59,31 +59,33 @@ class AHKParser:
|
|||||||
lab = "AHK"
|
lab = "AHK"
|
||||||
|
|
||||||
# ---------- helpers ----------
|
# ---------- helpers ----------
|
||||||
def clean(self, t):
|
def norm(self, s):
|
||||||
return " ".join(t.replace("\xa0", " ").split())
|
return s.replace("\xa0", " ").strip()
|
||||||
|
|
||||||
def find(self, pattern, text):
|
def find(self, pattern, text):
|
||||||
m = re.search(pattern, text, re.I)
|
m = re.search(pattern, text, re.I)
|
||||||
return self.clean(m.group(1)) if m else None
|
return self.norm(m.group(1)) if m else None
|
||||||
|
|
||||||
|
def lines(self, text):
|
||||||
|
return [self.norm(l) for l in text.splitlines() if self.norm(l)]
|
||||||
|
|
||||||
def block(self, text, labels):
|
def block(self, text, labels):
|
||||||
lines = [self.clean(l) for l in text.splitlines() if self.clean(l)]
|
L = self.lines(text)
|
||||||
idx = [i for i,l in enumerate(lines) if l in labels]
|
idx = [i for i,l in enumerate(L) if l in labels]
|
||||||
values = []
|
values = []
|
||||||
|
|
||||||
for i in range(len(idx)):
|
for i in range(len(idx)):
|
||||||
start = idx[i]
|
start = idx[i]
|
||||||
end = idx[i+1] if i+1 < len(idx) else len(lines)
|
end = idx[i+1] if i+1 < len(idx) else len(L)
|
||||||
for j in range(start, end):
|
for j in range(start, end):
|
||||||
if lines[j].startswith(":"):
|
if L[j].startswith(":"):
|
||||||
values.append(lines[j].lstrip(":").strip())
|
values.append(L[j][1:].strip())
|
||||||
break
|
break
|
||||||
return dict(zip(labels, values))
|
return dict(zip(labels, values))
|
||||||
|
|
||||||
# ---------- parser ----------
|
# ---------- parser ----------
|
||||||
def parse(self, text):
|
def parse(self, text):
|
||||||
r = empty_weight_report("AHK")
|
r = empty_weight_report("AHK")
|
||||||
text = self.clean(text)
|
|
||||||
|
|
||||||
# ---------- report ----------
|
# ---------- report ----------
|
||||||
r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text)
|
r["report"]["reference"] = self.find(r"(AHK\s*/\S+)", text)
|
||||||
@@ -94,10 +96,9 @@ class AHKParser:
|
|||||||
r["contract"]["commodity"] = "Raw Cotton"
|
r["contract"]["commodity"] = "Raw Cotton"
|
||||||
|
|
||||||
# ---------- parties ----------
|
# ---------- parties ----------
|
||||||
r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text)
|
r["parties"]["buyer"] = self.find(r"Buyer\s*:\s*(.+)", text)
|
||||||
r["parties"]["seller"] = self.find(r"Client\s*Ref No\.\s*:\s*(.+)", text)
|
|
||||||
|
|
||||||
# ---------- shipment block ----------
|
# ---------- shipment ----------
|
||||||
ship = self.block(text, [
|
ship = self.block(text, [
|
||||||
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination",
|
"Total Bales","Vessel","Voy. No.","B/L No.","B/L Date","Destination",
|
||||||
"Growth","Arrival Date","First date of weighing","Last Date of Weighing",
|
"Growth","Arrival Date","First date of weighing","Last Date of Weighing",
|
||||||
|
|||||||
Reference in New Issue
Block a user