Day09 学习笔记

线程同步:互斥锁 Lock

  • 多线程访问共享资源时使用互斥锁,保证同一时间只有一个线程进入临界区。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import threading
import time

lock = threading.Lock()

class MyThread(threading.Thread):
def __init__(self, name, delay):
threading.Thread.__init__(self)
self.name = name
self.delay = delay

def run(self):
print "starting", self.name
lock.acquire() # 获得锁(进入临界区)
try:
count = 5
while count:
time.sleep(self.delay)
print "%s : %s" % (self.name, time.ctime())
count -= 1
finally:
lock.release() # 释放锁(离开临界区)
print "exiting", self.name

t1 = MyThread("Thread-1", 1)
t2 = MyThread("Thread-2", 2)
t2.start()
t1.start()
t2.join()
print "exiting main!!!"

可重入锁 RLock(递归锁)

  • 一个线程在同一把锁上重复 acquire 时不会被自己阻塞,便于在同一线程内的嵌套调用中复用锁。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import threading
import time

class Worker(object):
lock = threading.RLock()

def step1(self):
if self.lock.acquire():
try:
count = 3
while count:
time.sleep(1)
print "step1"
count -= 1
finally:
self.lock.release()

def step2(self):
if self.lock.acquire():
try:
for _ in range(3):
print "step2"
finally:
self.lock.release()

def show(self):
if self.lock.acquire():
try:
self.step1() # 在同一线程内再次获取同一把锁
self.step2()
finally:
self.lock.release()
print "Thread-1 over!!!"

def step3(self, name):
if self.lock.acquire():
try:
print "step3 == %s" % name
finally:
self.lock.release()
print "Thread-2 over!!!"

w = Worker()
t1 = threading.Thread(target=w.show)
t2 = threading.Thread(target=w.step3, args=("tom",))
t1.start()
t2.start()

简单爬虫流程(示例:带请求头抓取页面)

  • 构造请求时添加 User-Agent,获取 HTML 后用正则表达式提取字段;解析结果可持久化(如写库)。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import urllib2
import re

headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)"}
url = "http://example.com/list"

req = urllib2.Request(url, headers=headers)
html = urllib2.urlopen(req).read()

# 示例:提取标题与价格(模式仅演示)
pattern = re.compile(r'<div class="item">.*?<h2>(.*?)</h2>.*?<span class="price">(.*?)</span>', re.S)
items = re.findall(pattern, html)

for title, price in items:
title = re.sub(r'\s+', ' ', title).strip()
price = price.strip()
print title, price

数据入库(示意:插入 Oracle)

  • 建表与插入的基本流程(示例字段,仅供参考):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import cx_Oracle

db = cx_Oracle.connect("user", "password", "127.0.0.1:1521/orcl")
cursor = db.cursor()

cursor.execute(
"""
CREATE TABLE WEB_DATA(
title varchar2(200),
price varchar2(50)
)
"""
)
db.commit()

rows = [("标题A", "1200"), ("标题B", "800")]
cursor.executemany("INSERT INTO WEB_DATA(title, price) VALUES (:1, :2)", rows)
db.commit()

cursor.close()
db.close()

Day09 学习笔记
https://blog.pangcy.cn/2018/10/23/后端编程相关/python/python2基础/Day09 学习笔记/
作者
子洋
发布于
2018年10月23日
许可协议