|
| 1 | +#coding=gbk |
| 2 | + |
| 3 | +from __future__ import unicode_literals |
| 4 | +import Image |
| 5 | +import urllib |
| 6 | +from Binaryzation import Binaryzation |
| 7 | + |
| 8 | +class VerticalCut(object): |
| 9 | + ''' |
| 10 | + 垂直分割,适用于字符之间没有粘连的情况 |
| 11 | + 所谓垂直分割就是按列扫描,找到某列没有黑色像素点(已经二值化)的位置 |
| 12 | + |
| 13 | + 依赖于二值化 |
| 14 | + ''' |
| 15 | + |
| 16 | + img = None |
| 17 | + |
| 18 | + def __init__(self, img): |
| 19 | + self.img = img |
| 20 | + |
| 21 | + def getBorderPoint(self): |
| 22 | + """ |
| 23 | + 获得图像的左右边界,即在这个区间之外的都是空的图像。在这个区间之内才有图像 |
| 24 | + """ |
| 25 | + im = self.img |
| 26 | + pixels = im.load() |
| 27 | + w, h = im.size |
| 28 | + |
| 29 | + left = -1 |
| 30 | + right = -1 |
| 31 | + breakout = False |
| 32 | + |
| 33 | + #找左边界 |
| 34 | + for x in range(w): |
| 35 | + for y in range(h): |
| 36 | + if pixels[x, y] == 0: #该列中的某个点为黑色 |
| 37 | + left = x |
| 38 | + breakout = True |
| 39 | + break |
| 40 | + if breakout: |
| 41 | + break |
| 42 | + |
| 43 | + breakout = False |
| 44 | + #找右边界 |
| 45 | + for x in range(w-1,0,-1): |
| 46 | + for y in range(h): |
| 47 | + if pixels[x, y] == 0: #该列中的某个点为黑色 |
| 48 | + right = x |
| 49 | + breakout = True |
| 50 | + break |
| 51 | + if breakout: |
| 52 | + break |
| 53 | + |
| 54 | + return left,right |
| 55 | + |
| 56 | + def showVerticalProjection(self,graph): |
| 57 | + w = len(graph) |
| 58 | + h = max(graph) |
| 59 | + img = Image.new('1', (w, h)) |
| 60 | + for x in range(w): |
| 61 | + for y in range(h): |
| 62 | + if y <= graph[x]: |
| 63 | + img.putpixel((x, y), 255) |
| 64 | + else: |
| 65 | + break |
| 66 | + img = img.transpose(Image.FLIP_TOP_BOTTOM) |
| 67 | + img.show() |
| 68 | + |
| 69 | + def cut(self): |
| 70 | + """ |
| 71 | + 开始垂直分割 |
| 72 | + """ |
| 73 | + |
| 74 | + if self.img: |
| 75 | + pixels = self.img.load() |
| 76 | + w,h = self.img.size |
| 77 | + start,end = self.getBorderPoint() |
| 78 | + graph = [0] * (end - start) #指定数组的长度 |
| 79 | + |
| 80 | + #从开始到结尾,逐列扫描,把每列的像素点个数记下来 |
| 81 | + for x in range(start, end): |
| 82 | + for y in range(h): |
| 83 | + pixel = pixels[x, y] |
| 84 | + if pixel == 0: # 此列有字符 |
| 85 | + graph[x - start] += 1 |
| 86 | + return graph |
| 87 | + |
| 88 | + return None |
| 89 | + |
| 90 | +if __name__ == '__main__': |
| 91 | + |
| 92 | + #简单验证码地址:http://su.100steps.net/2007/vote/verify.php |
| 93 | + #淘宝验证码地址:http://regcheckcode.taobao.com/auction/checkcode?sessionID=f06c56ea0e0bda9a9d71832422b68f29 |
| 94 | + url = 'http://su.100steps.net/2007/vote/verify.php' |
| 95 | + s = urllib.urlopen(url).read() |
| 96 | + f = open('v.jpg','wb') |
| 97 | + f.write(s) |
| 98 | + f.close() |
| 99 | + im = Image.open('v.jpg') |
| 100 | + b = Binaryzation(im) |
| 101 | + im = b.ConvertToBinaryzation(160) |
| 102 | + im.show() |
| 103 | + v = VerticalCut(im) |
| 104 | + print v.cut() |
| 105 | + |
0 commit comments