Skip to content

Commit 1102c84

Browse files
committed
initial import
0 parents  commit 1102c84

File tree

6 files changed

+318
-0
lines changed

6 files changed

+318
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pkg
2+
doc
3+
Manifest

LICENSE.textile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
h4. Copyright and License
2+
3+
The MIT License
4+
5+
Copyright (c) 2008 Jim Garvin
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.

README.textile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
h1. AddressExtractor
2+
3+
Find and/or replace mailing addresses in strings.
4+
5+
h2. Examples
6+
7+
<pre><code>
8+
string = <<EOF
9+
Please send the package to 123 Foo St., Someplace FL
10+
11+
My phone number is 123-1234 and St. Marc of Israel can be reached
12+
via mail at:
13+
123 Goob Avenue
14+
Apt 123
15+
Nice Town CA 123456
16+
EOF
17+
18+
# Find first address
19+
AddressExtractor.first_address(string) # => { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
20+
21+
# Find all addresses
22+
AddressExtractor.find_addresses(string) # =>
23+
# [
24+
# { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
25+
# { :street1 => "123 Goob Avenue.", :street2 => "Apt 123", :city => "Nice Town", :state => "CA", :zip => "123456" }
26+
# ]
27+
28+
# Do a gsub on first address
29+
new_string = AddressExtractor.replace_first_address(string) do |address_hash, address_string|
30+
map_link_to(address_string)
31+
end
32+
33+
# Do a gsub on all addresses
34+
new_string = AddressExtractor.replace_addresses(string) do |address_hash, address_string|
35+
map_link_to(address_string)
36+
end
37+
</code></pre>
38+
39+
h3. About
40+
41+
Written by Jim Garvin at RubyConf '08 at the request of Chris Murphy and Ryan McGeary so they could use it in Yarp.com.
42+
43+
You can use it, too.
44+
45+
The address-finding regex may be a bit naive, I'll gladly accept pull requests that add to the test data and tests.

Rakefile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
require 'rubygems'
2+
require 'rake'
3+
require 'echoe'
4+
5+
Echoe.new('address_extractor', '0.1.0') do |p|
6+
p.description = "Give it text. It finds addresses in it."
7+
p.url = "http://github.com/coderifous/address_extractor"
8+
p.author = "Jim Garvin"
9+
p.email = "jim at thegarvin dot com"
10+
p.ignore_pattern = ["tmp/*", "script/*"]
11+
p.development_dependencies = []
12+
end
13+
14+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }

lib/address_extractor.rb

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
class AddressExtractor
2+
class << self
3+
4+
def first_address(string)
5+
hashify_results string.scan(ADDRESS_PATTERN).first
6+
end
7+
8+
def find_addresses(string)
9+
string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
10+
end
11+
12+
def replace_first_address(string)
13+
hash = first_address(string)
14+
string.sub(ADDRESS_PATTERN) do |match|
15+
yield(hash, $&)
16+
end
17+
end
18+
19+
def replace_addresses(string)
20+
string.gsub(ADDRESS_PATTERN) do |match|
21+
hash = hashify_results match.scan(ADDRESS_PATTERN).first
22+
useful_address?(hash) ? yield(hash, $&) : match
23+
end
24+
end
25+
26+
def hashify_results(matches)
27+
result = { }
28+
capture_index = 0
29+
CAPTURE_MAP.each do |field|
30+
result[field] = matches[capture_index].to_s.chomp if matches[capture_index]
31+
capture_index += 1
32+
end
33+
useful_address?(result) ? result : nil
34+
end
35+
36+
def useful_address?(hash)
37+
hash &&
38+
hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
39+
end
40+
41+
end
42+
43+
CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
44+
45+
STATES = <<-EOF
46+
ALABAMA AL
47+
ALASKA AK
48+
AMERICAN SAMOA AS
49+
ARIZONA AZ
50+
ARKANSAS AR
51+
CALIFORNIA CA
52+
COLORADO CO
53+
CONNECTICUT CT
54+
DELAWARE DE
55+
DISTRICT OF COLUMBIA DC
56+
FEDERATED STATES OF MICRONESIA FM
57+
FLORIDA FL
58+
GEORGIA GA
59+
GUAM GU
60+
HAWAII HI
61+
IDAHO ID
62+
ILLINOIS IL
63+
INDIANA IN
64+
IOWA IA
65+
KANSAS KS
66+
KENTUCKY KY
67+
LOUISIANA LA
68+
MAINE ME
69+
MARSHALL ISLANDS MH
70+
MARYLAND MD
71+
MASSACHUSETTS MA
72+
MICHIGAN MI
73+
MINNESOTA MN
74+
MISSISSIPPI MS
75+
MISSOURI MO
76+
MONTANA MT
77+
NEBRASKA NE
78+
NEVADA NV
79+
NEW HAMPSHIRE NH
80+
NEW JERSEY NJ
81+
NEW MEXICO NM
82+
NEW YORK NY
83+
NORTH CAROLINA NC
84+
NORTH DAKOTA ND
85+
NORTHERN MARIANA ISLANDS MP
86+
OHIO OH
87+
OKLAHOMA OK
88+
OREGON OR
89+
PALAU PW
90+
PENNSYLVANIA PA
91+
PUERTO RICO PR
92+
RHODE ISLAND RI
93+
SOUTH CAROLINA SC
94+
SOUTH DAKOTA SD
95+
TENNESSEE TN
96+
TEXAS TX
97+
UTAH UT
98+
VERMONT VT
99+
VIRGIN ISLANDS VI
100+
VIRGINIA VA
101+
WASHINGTON WA
102+
WEST VIRGINIA WV
103+
WISCONSIN WI
104+
WYOMING WY
105+
EOF
106+
107+
STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
108+
109+
SECONDARY_UNIT_DESIGNATORS = <<-EOF
110+
APARTMENT APT
111+
BASEMENT BSMT
112+
BUILDING BLDG
113+
DEPARTMENT DEPT
114+
FLOOR FL
115+
FRONT FRNT
116+
HANGAR HNGR
117+
LOBBY LBBY
118+
LOT LOT
119+
LOWER LOWR
120+
OFFICE OFC
121+
PENTHOUSE PH
122+
PIER PIER
123+
REAR REAR
124+
ROOM RM
125+
SIDE SIDE
126+
SLIP SLIP
127+
SPACE SPC
128+
STOP STOP
129+
SUITE STE
130+
TRAILER TRLR
131+
UNIT UNIT
132+
UPPER UPPR
133+
EOF
134+
135+
SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
136+
137+
ADDRESS_PATTERN = /
138+
(
139+
\d+ # A few numbers
140+
\s+
141+
(?:[A-Za-z'.-]+\s?){1,3} # Followed by a street name
142+
)
143+
\s* ,? \s*
144+
(
145+
(?:\d+\s+)? # a secondary unit, optionally
146+
(?:#{SECONDARY_UNIT_DESIGNATORS_REGEX})
147+
(?:\s+\d+)?
148+
)?
149+
\s* ,? \s* # a comma, optionally
150+
(?:
151+
(?:
152+
((?:[A-Za-z]+\s?){1,3}) # city
153+
\s+
154+
\b(#{STATE_REGEX})\b # state
155+
\s* ,? \s* # a comma, optionally
156+
(\d{6})? # a zip code, optionally
157+
)
158+
| # or, instead of city and state
159+
(\d{6})? # a lone zip code will do
160+
)
161+
/xi
162+
end

test/test_address_extractor.rb

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
$: << File.dirname(__FILE__)+"/../lib"
2+
3+
require 'test/unit'
4+
require 'address_extractor.rb'
5+
6+
class AddressExtractorTest < Test::Unit::TestCase
7+
8+
def test_first_address_extraction
9+
address = AddressExtractor.first_address(DATA1)
10+
assert_first_address(address)
11+
end
12+
13+
def test_find_addresses
14+
addresses = AddressExtractor.find_addresses(DATA1)
15+
assert_first_address addresses[0]
16+
assert_second_address addresses[1]
17+
end
18+
19+
def test_replace_first_address
20+
string = AddressExtractor.replace_first_address(DATA1) do |address_hash, address|
21+
assert_first_address address_hash
22+
assert_first_address_string address
23+
"skidoosh"
24+
end
25+
assert string =~ /Please send the package to skidoosh/
26+
end
27+
28+
def test_replace_addresses
29+
string = AddressExtractor.replace_addresses(DATA1) do |address_hash, address|
30+
"skidoosh"
31+
end
32+
assert string =~ /Please send the package to skidoosh/
33+
assert string =~ /via mail at:\n skidoosh/
34+
end
35+
36+
module Helpers
37+
def assert_first_address(a)
38+
assert_not_nil a
39+
assert_equal "123 Foo St.", a[:street1]
40+
assert_equal nil, a[:street2]
41+
assert_equal "Someplace", a[:city]
42+
assert_equal "FL", a[:state]
43+
assert_equal nil, a[:zip]
44+
end
45+
46+
def assert_first_address_string(string)
47+
assert_match /^123 Foo St\., Someplace FL\s*$/, string
48+
end
49+
50+
51+
def assert_second_address(a)
52+
assert_not_nil a
53+
assert_equal "123 Goob Avenue", a[:street1]
54+
assert_equal "Apt 123", a[:street2]
55+
assert_equal "Nice Town", a[:city]
56+
assert_equal "CA", a[:state]
57+
assert_equal "123456", a[:zip]
58+
end
59+
end
60+
include Helpers
61+
end
62+
63+
DATA1 = <<EOF
64+
Please send the package to 123 Foo St., Someplace FL
65+
66+
My phone number is 123-1234 and St. Marc of Israel can be reached
67+
via mail at:
68+
123 Goob Avenue
69+
Apt 123
70+
Nice Town CA 123456
71+
EOF

0 commit comments

Comments
 (0)