浏览代码

Georgetown Update

Benjamin Harris 2 月之前
父节点
当前提交
4615b58637
共有 1 个文件被更改,包括 25 次插入13 次删除
  1. 25 13
      scrapers/georgetown.rb

+ 25 - 13
scrapers/georgetown.rb

@@ -58,9 +58,11 @@ cards.each do |card|
   }
 
   application_id     = find.call(/^(Application\s*(ID|No|Number)|Ref)/i)
-  address            = find.call(/(Address|Property)/i)
+  address            = find.call(/(Address|Property|Location)/i)
   proposal           = find.call(/(Proposal|Description)/i)
-  app_date_raw       = find.call(/(Application\s*Date|Date\s*Lodged|Date\s*Received)/i)
+  applicant          = find.call(/(Applicant)/i)
+  title_ref          = find.call(/(Title\s*[Rr]ef)/i)
+  app_date_raw       = find.call(/(Application\s*Date|Date\s*Lodged|Date\s*Received|Opening\s*Date)/i)
   closing_date_raw   = find.call(/(On\s*Notice\s*(to|until)|Closing\s*Date|Closes)/i)
 
   # Document link if present in the table or surrounding block
@@ -85,13 +87,20 @@ cards.each do |card|
   address = address.to_s.strip
   next if address.empty? || council_reference.empty?
 
+  on_notice_to     = Util.parse_aus_date(closing_date_raw)
+  on_notice_to_raw = closing_date_raw.to_s.strip
+
   items << {
-    description: proposal.to_s.strip,
-    date_received: date_received,
+    description:       proposal.to_s.strip,
+    date_received:     date_received,
     date_received_raw: date_received_raw,
-    address: address,
+    on_notice_to:      on_notice_to,
+    on_notice_to_raw:  on_notice_to_raw,
+    address:           address,
     council_reference: council_reference,
-    document_url: document_url
+    applicant:         applicant.to_s.strip,
+    title_reference:   title_ref.to_s.strip,
+    document_url:      document_url
   }
 end
 
@@ -101,15 +110,18 @@ items.each do |row|
   upsert_and_enrich!(
     table: TABLE,
     row: {
-      description: row[:description],
-      date_received: row[:date_received],
+      description:       row[:description],
+      date_received:     row[:date_received],
       date_received_raw: row[:date_received_raw],
-      address: row[:address],
+      on_notice_to:      row[:on_notice_to],
+      on_notice_to_raw:  row[:on_notice_to_raw],
+      address:           row[:address],
       council_reference: row[:council_reference],
-      applicant: "",
-      owner: ""
-    },
-    extras: { document_url: row[:document_url] }
+      applicant:         row[:applicant],
+      owner:             "",
+      title_reference:   row[:title_reference],
+      document_url:      row[:document_url]
+    }
   )
 end