Skip to content

Commit e036c96

Browse files
committed
Support longer street names, especially for street address directionals.
* E.g. 60 Ida Lee Dr NW
1 parent 7c9d9e1 commit e036c96

File tree

2 files changed

+21
-15
lines changed

2 files changed

+21
-15
lines changed

lib/address_extractor.rb

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ def first_address(string)
1212
def find_addresses(string)
1313
string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
1414
end
15-
16-
# Pass it a block that recieves 2 parameters:
15+
16+
# Pass it a block that recieves 2 parameters:
1717
# address hash
1818
# matched address string ($&)
1919
# Whatever your block returns will be used for the substition.
@@ -33,9 +33,9 @@ def replace_addresses(string)
3333
useful_address?(hash) ? yield(hash, $&) : match
3434
end
3535
end
36-
36+
3737
private
38-
38+
3939
def hashify_results(matches)
4040
return nil if matches.nil?
4141
result = { }
@@ -46,16 +46,16 @@ def hashify_results(matches)
4646
end
4747
useful_address?(result) ? result : nil
4848
end
49-
49+
5050
def useful_address?(hash)
51-
hash &&
51+
hash &&
5252
hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
5353
end
54-
54+
5555
end
56-
56+
5757
CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
58-
58+
5959
STATES = <<-EOF
6060
ALABAMA AL
6161
ALASKA AK
@@ -117,9 +117,9 @@ def useful_address?(hash)
117117
WISCONSIN WI
118118
WYOMING WY
119119
EOF
120-
120+
121121
STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
122-
122+
123123
SECONDARY_UNIT_DESIGNATORS = <<-EOF
124124
APARTMENT APT
125125
BASEMENT BSMT
@@ -145,14 +145,14 @@ def useful_address?(hash)
145145
UNIT UNIT
146146
UPPER UPPR
147147
EOF
148-
148+
149149
SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
150150

151151
ADDRESS_PATTERN = /
152152
(
153153
\d+ # A few numbers
154154
\s+
155-
(?:[A-Za-z'.-]+\s?){0,2} (?:[A-Za-z'.-]+) # Followed by a street name
155+
(?:[A-Za-z'.-]+\s?){1,5} # Followed by a street name
156156
)
157157
\s* ,? \s* # a comma, optionally
158158
(
@@ -168,9 +168,9 @@ def useful_address?(hash)
168168
\b(#{STATE_REGEX})\b # state
169169
\s* ,? \s* # a comma, optionally
170170
(\d{5})? # a zip code, optionally
171-
)
171+
)
172172
| # or, instead of city and state
173173
(\d{5})? # a lone zip code will do
174174
)
175175
/xi
176-
end
176+
end

test/test_address_extractor.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,9 @@ def test_no_addresses_found
8181
test_input "Apple Computer, Inc. 1 Infinite Loop, Cupertino, CA 95014",
8282
{ :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
8383

84+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr NW, Leesburg, VA",
85+
{ :street1 => "60 Ida Lee Dr NW", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
86+
87+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr N West, Leesburg, VA",
88+
{ :street1 => "60 Ida Lee Dr N West", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
89+

0 commit comments

Comments
 (0)